# HG changeset patch # User markus schnalke # Date 1428938811 -7200 # Node ID 4165f1b57d1824e08babc2bc5df1b39bf0651fa3 # Parent ac52712b2b5e4a2f3aeea734fa4c17245b05adad Become SUSv3 compatible and thus remove own regexp code The Heirloom tools can be compiled to comply to several standards. This version does not need this flexibility. We can omit the regexp code and use the system's, by using the SU3 variant of ed. This is the latest of the supported standards. diff -r ac52712b2b5e -r 4165f1b57d18 ed.c --- a/ed.c Sun Apr 12 21:45:34 2015 +0200 +++ b/ed.c Mon Apr 13 17:26:51 2015 +0200 @@ -47,15 +47,7 @@ #else #define USED #endif -#if defined (SU3) static const char sccsid[] USED = "@(#)ed_su3.sl 1.99 (gritter) 7/27/06"; -#elif defined (SUS) -static const char sccsid[] USED = "@(#)ed_sus.sl 1.99 (gritter) 7/27/06"; -#elif defined (S42) -static const char sccsid[] USED = "@(#)ed_s42.sl 1.99 (gritter) 7/27/06"; -#else /* !SU3, !SUS, !S42 */ -static const char sccsid[] USED = "@(#)ed.sl 1.99 (gritter) 7/27/06"; -#endif /* !SU3, !SUS, !S42 */ #include #include @@ -238,8 +230,6 @@ #define ERROR(c) cmplerr(c) static wint_t GETWC(char *); -#if defined (SUS) || defined (S42) || defined (SU3) - #include #define NBRA 9 @@ -254,12 +244,6 @@ static char *compile(char *, char *, const char *, int); static int step(const char *, const char *); -#else /* !SUS, !S42, !SU3 */ - -#include - -#endif /* !SUS, !S42, !SU3 */ - int main(int argc, char **argv) { @@ -267,9 +251,7 @@ void (*oldintr)(int); progname = basename(argv[0]); -#if defined (SUS) || defined (S42) || defined (SU3) setlocale(LC_COLLATE, ""); -#endif setlocale(LC_CTYPE, ""); mb_cur_max = MB_CUR_MAX; myuid = getuid(); @@ -397,19 +379,15 @@ continue; case 'c': -#if defined (SU3) if (addr1 == zero && addr1+1 <= dol) { if (addr1 == addr2) addr2++; addr1++; } -#endif /* SU3 */ delete(); append(gettty, addr1-1); -#if defined (SUS) || defined (SU3) if (dot == addr1-1 && addr1 <= dol) dot = addr1; -#endif /* SUS || SU3 */ continue; case 'd': @@ -466,7 +444,6 @@ case 'i': setdot(); -#if defined (SU3) if (addr1 == zero) { if (addr1 == addr2) addr2++; @@ -474,7 +451,6 @@ if (dol != zero) nonzero(); } else -#endif /* SU3 */ nonzero(); newline(); checkpoint(); @@ -1055,10 +1031,6 @@ linebuf[i++] = 0; if (linebuf[0]=='.' && linebuf[1]==0) return(EOF); -#if !defined (SUS) && !defined (SU3) - if (linebuf[0]=='\\' && linebuf[1]=='.' && linebuf[2]==0) - linebuf[0]='.', linebuf[1]=0; -#endif return(0); } @@ -1730,7 +1702,6 @@ } i = loc2 - linebuf; loc2 = j + linebuf; -#if defined (SUS) || defined (SU3) || defined (S42) if (loc1 == &linebuf[i]) { int n; wchar_t wc; @@ -1739,7 +1710,6 @@ else loc2++; } -#endif /* SUS || SU3 || S42 */ while (genbuf[j++] = linebuf[i++]) if (j >= LBSIZE) growlb("line too long"); @@ -1867,9 +1837,6 @@ { const char *msg; -#if !defined (SUS) && !defined (S42) && !defined (SU3) - expbuf[0] = 0; -#endif switch (c) { case 11: msg = "Range endpoint too large"; @@ -1987,10 +1954,7 @@ putchr('\\'); putchr('\n'); } - if (n<0 || -#if defined (SUS) || defined (S42) || defined (SU3) - c == '\\' || -#endif /* SUS || S42 || SU3 */ + if (n<0 || c == '\\' || !(mb_cur_max>1 ? iswprint(c) : isprint(c))) { if (n<0) n = 1; @@ -2005,9 +1969,7 @@ col++; } } -#if defined (SUS) || defined (S42) || defined (SU3) putchr('$'); -#endif putchr('\n'); } @@ -2016,24 +1978,6 @@ { int cad = 1, d; -#if !defined (SUS) && !defined (S42) && !defined (SU3) - if (c=='\t') { - c = '>'; - goto esc; - } - if (c=='\b') { - c = '<'; - esc: - putchr('-'); - putchr('\b'); - putchr(c); - } else if (c == '\n') { - putchr('\\'); - putchr('0'); - putchr('0'); - putchr('0'); - cad = 4; -#else /* !SUS, !S42, !SU3 */ if (c == '\n') c = '\0'; if (c == '\\') { @@ -2064,7 +2008,6 @@ putchr('\\'); putchr('v'); cad = 2; -#endif /* !SUS, !S42, !SU3 */ } else { putchr('\\'); putchr(((c&~077)>>6)+'0'); @@ -2281,7 +2224,6 @@ file[0] = savedfile[0] = 0; } -#if defined (SUS) || defined (S42) || defined (SU3) union ptrstore { void *vp; char bp[sizeof (void *)]; @@ -2385,7 +2327,7 @@ #ifdef REG_ANGLES reflags |= REG_ANGLES; #endif -#if defined (SU3) && defined (REG_AVOIDNULL) +#ifdef REG_AVOIDNULL reflags |= REG_AVOIDNULL; #endif if (op[0]) @@ -2453,7 +2395,6 @@ } return res == 0; } -#endif /* SUS || S42 || SU3 */ static void help(void) diff -r ac52712b2b5e -r 4165f1b57d18 makefile --- a/makefile Sun Apr 12 21:45:34 2015 +0200 +++ b/makefile Mon Apr 13 17:26:51 2015 +0200 @@ -88,15 +88,12 @@ all: ed -ed: ed.o regexpr.o sigset.o sigrelse.o +ed: ed.o sigset.o sigrelse.o $(LD) $(LDFLAGS) ed.o -o ed -ed.o: ed.c regexp.h +ed.o: ed.c $(CC) $(CFLAGS) $(CPPFLAGS) $(IWCHAR) -DSHELL='"$(SHELL)"' -I. -c ed.c -regexpr.o: regexpr.c regexpr.h regexp.h - $(CC) $(CFLAGS) $(CPPFLAGS) $(IWCHAR) -I. -c regexpr.c - sigset.o: sigset.c sigset.h $(CC) $(CFLAGS) $(CPPFLAGS) -I. -c sigset.c @@ -114,7 +111,7 @@ chmod 644 $(ROOT)$(MANDIR)/man1/ed.1 clean: - rm -f ed.o regexpr.o sigset.o sigrelse.o core log *~ + rm -f ed.o sigset.o sigrelse.o core log *~ mrproper: clean rm -f ed diff -r ac52712b2b5e -r 4165f1b57d18 regexp.h --- a/regexp.h Sun Apr 12 21:45:34 2015 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1211 +0,0 @@ -/* - * Simple Regular Expression functions. Derived from Unix 7th Edition, - * /usr/src/cmd/expr.y - * - * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002. - * - * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * Redistributions of source code and documentation must retain the - * above copyright notice, this list of conditions and the following - * disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed or owned by Caldera - * International, Inc. - * Neither the name of Caldera International, Inc. nor the names of - * other contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA - * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE - * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 -#define REGEXP_H_USED __attribute__ ((used)) -#elif defined __GNUC__ -#define REGEXP_H_USED __attribute__ ((unused)) -#else -#define REGEXP_H_USED -#endif -static const char regexp_h_sccsid[] REGEXP_H_USED = - "@(#)regexp.sl 1.56 (gritter) 5/29/05"; - -#if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__) -#define REGEXP_H_WCHARS -#endif - -#define CBRA 2 -#define CCHR 4 -#define CDOT 8 -#define CCL 12 -/* CLNUM 14 used in sed */ -/* CEND 16 used in sed */ -#define CDOL 20 -#define CCEOF 22 -#define CKET 24 -#define CBACK 36 -#define CNCL 40 -#define CBRC 44 -#define CLET 48 -#define CCH1 52 -#define CCH2 56 -#define CCH3 60 - -#define STAR 01 -#define RNGE 03 -#define REGEXP_H_LEAST 0100 - -#ifdef REGEXP_H_WCHARS -#define CMB 0200 -#else /* !REGEXP_H_WCHARS */ -#define CMB 0 -#endif /* !REGEXP_H_WCHARS */ - -#define NBRA 9 - -#define PLACE(c) ep[c >> 3] |= bittab[c & 07] -#define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) - -#ifdef REGEXP_H_WCHARS -#define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07]) -#endif - -#include -#include -#include -#ifdef REGEXP_H_WCHARS -#include -#include -#include -#endif /* REGEXP_H_WCHARS */ - -#define regexp_h_uletter(c) (isalpha(c) || (c) == '_') -#ifdef REGEXP_H_WCHARS -#define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_') - -/* - * Used to allocate memory for the multibyte star algorithm. - */ -#ifndef regexp_h_malloc -#define regexp_h_malloc(n) malloc(n) -#endif -#ifndef regexp_h_free -#define regexp_h_free(p) free(p) -#endif - -/* - * Can be predefined to 'inline' to inline some multibyte functions; - * may improve performance for files that contain many multibyte - * sequences. - */ -#ifndef regexp_h_inline -#define regexp_h_inline -#endif - -/* - * Mask to determine whether the first byte of a sequence possibly - * starts a multibyte character. Set to 0377 to force mbtowc() for - * any byte sequence (except 0). - */ -#ifndef REGEXP_H_MASK -#define REGEXP_H_MASK 0200 -#endif -#endif /* REGEXP_H_WCHARS */ - -/* - * For regexpr.h. - */ -#ifndef regexp_h_static -#define regexp_h_static -#endif -#ifndef REGEXP_H_STEP_INIT -#define REGEXP_H_STEP_INIT -#endif -#ifndef REGEXP_H_ADVANCE_INIT -#define REGEXP_H_ADVANCE_INIT -#endif - -char *braslist[NBRA]; -char *braelist[NBRA]; -int nbra; -char *loc1, *loc2, *locs; -int sed; -int nodelim; - -regexp_h_static int circf; -regexp_h_static int low; -regexp_h_static int size; - -regexp_h_static unsigned char bittab[] = { - 1, - 2, - 4, - 8, - 16, - 32, - 64, - 128 -}; -static int regexp_h_advance(register const char *lp, - register const char *ep); -static void regexp_h_getrnge(register const char *str, int least); - -static const char *regexp_h_bol; /* beginning of input line (for \<) */ - -#ifdef REGEXP_H_WCHARS -static int regexp_h_wchars; -static int regexp_h_mbcurmax; - -static const char *regexp_h_firstwc; /* location of first - multibyte character - on input line */ - -#define regexp_h_getwc(c) { \ - if (regexp_h_wchars) { \ - char mbbuf[MB_LEN_MAX + 1], *mbptr; \ - wchar_t wcbuf; \ - int mb, len; \ - mbptr = mbbuf; \ - do { \ - mb = GETC(); \ - *mbptr++ = mb; \ - *mbptr = '\0'; \ - } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \ - && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \ - if (len == -1) \ - ERROR(67); \ - c = wcbuf; \ - } else { \ - c = GETC(); \ - } \ -} - -#define regexp_h_store(wc, mb, me) { \ - int len; \ - if (wc == WEOF) \ - ERROR(67); \ - if ((len = me - mb) <= regexp_h_mbcurmax) { \ - char mt[MB_LEN_MAX]; \ - if (wctomb(mt, wc) >= len) \ - ERROR(50); \ - } \ - switch (len = wctomb(mb, wc)) { \ - case -1: \ - ERROR(67); \ - case 0: \ - mb++; \ - break; \ - default: \ - mb += len; \ - } \ -} - -static regexp_h_inline wint_t -regexp_h_fetchwc(const char **mb, int islp) -{ - wchar_t wc; - int len; - - if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) { - (*mb)++; - return WEOF; - } - if (islp && regexp_h_firstwc == NULL) - regexp_h_firstwc = *mb; - /*if (len == 0) { - (*mb)++; - return L'\0'; - } handled in singlebyte code */ - *mb += len; - return wc; -} - -#define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \ - (*(mb)++&0377): \ - regexp_h_fetchwc(&(mb), islp)) - -static regexp_h_inline wint_t -regexp_h_showwc(const char *mb) -{ - wchar_t wc; - - if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0) - return WEOF; - return wc; -} - -#define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \ - regexp_h_showwc(mb)) - -/* - * Return the character immediately preceding mb. Since no byte is - * required to be the first byte of a character, the longest multibyte - * character ending at &[mb-1] is searched. - */ -static regexp_h_inline wint_t -regexp_h_previous(const char *mb) -{ - const char *p = mb; - wchar_t wc, lastwc = WEOF; - int len, max = 0; - - if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc) - return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF); - while (p-- > regexp_h_bol) { - mbtowc(NULL, NULL, 0); - if ((len = mbtowc(&wc, p, mb - p)) >= 0) { - if (len < max || len < mb - p) - break; - max = len; - lastwc = wc; - } else if (len < 0 && max > 0) - break; - } - return lastwc; -} - -#define regexp_h_cclass(set, c, af) \ - ((c) == 0 || (c) == WEOF ? 0 : ( \ - ((c) > 0177) ? \ - regexp_h_cclass_wc(set, c, af) : ( \ - REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \ - ) \ - ) \ - ) - -static regexp_h_inline int -regexp_h_cclass_wc(const char *set, register wint_t c, int af) -{ - register wint_t wc, wl = WEOF; - const char *end; - - end = &set[18] + set[0] - 1; - set += 17; - while (set < end) { - wc = regexp_h_fetch(set, 0); -#ifdef REGEXP_H_VI_BACKSLASH - if (wc == '\\' && set < end && - (*set == ']' || *set == '-' || - *set == '^' || *set == '\\')) { - wc = regexp_h_fetch(set, 0); - } else -#endif /* REGEXP_H_VI_BACKSLASH */ - if (wc == '-' && wl != WEOF && set < end) { - wc = regexp_h_fetch(set, 0); -#ifdef REGEXP_H_VI_BACKSLASH - if (wc == '\\' && set < end && - (*set == ']' || *set == '-' || - *set == '^' || *set == '\\')) { - wc = regexp_h_fetch(set, 0); - } -#endif /* REGEXP_H_VI_BACKSLASH */ - if (c > wl && c < wc) - return af; - } - if (c == wc) - return af; - wl = wc; - } - return !af; -} -#else /* !REGEXP_H_WCHARS */ -#define regexp_h_wchars 0 -#define regexp_h_getwc(c) { c = GETC(); } -#endif /* !REGEXP_H_WCHARS */ - -regexp_h_static char * -compile(char *instring, char *ep, const char *endbuf, int seof) -{ - INIT /* Dependent declarations and initializations */ - register int c; - register int eof = seof; - char *lastep = instring; - int cclcnt; - char bracket[NBRA], *bracketp; - int closed; - char neg; - int lc; - int i, cflg; - -#ifdef REGEXP_H_WCHARS - char *eq; - regexp_h_mbcurmax = MB_CUR_MAX; - regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0; -#endif - lastep = 0; - bracketp = bracket; - if((c = GETC()) == eof || c == '\n') { - if (c == '\n') { - UNGETC(c); - nodelim = 1; - } - if(*ep == 0 && !sed) - ERROR(41); - if (bracketp > bracket) - ERROR(42); - RETURN(ep); - } - circf = closed = nbra = 0; - if (c == '^') - circf++; - else - UNGETC(c); - for (;;) { - if (ep >= endbuf) - ERROR(50); - regexp_h_getwc(c); - if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) - lastep = ep; - if (c == eof) { - *ep++ = CCEOF; - if (bracketp > bracket) - ERROR(42); - RETURN(ep); - } - switch (c) { - - case '.': - *ep++ = CDOT|regexp_h_wchars; - continue; - - case '\n': - if (sed == 0) { - UNGETC(c); - *ep++ = CCEOF; - nodelim = 1; - RETURN(ep); - } - ERROR(36); - case '*': - if (lastep==0 || *lastep==CBRA || *lastep==CKET || - *lastep==(CBRC|regexp_h_wchars) || - *lastep==(CLET|regexp_h_wchars)) - goto defchar; - *lastep |= STAR; - continue; - - case '$': - if(PEEKC() != eof) - goto defchar; - *ep++ = CDOL; - continue; - - case '[': -#ifdef REGEXP_H_WCHARS - if (regexp_h_wchars == 0) { -#endif - if(&ep[33] >= endbuf) - ERROR(50); - - *ep++ = CCL; - lc = 0; - for(i = 0; i < 32; i++) - ep[i] = 0; - - neg = 0; - if((c = GETC()) == '^') { - neg = 1; - c = GETC(); - } - - do { - c &= 0377; - if(c == '\0' || c == '\n') - ERROR(49); -#ifdef REGEXP_H_VI_BACKSLASH - if(c == '\\' && ((c = PEEKC()) == ']' || - c == '-' || c == '^' || - c == '\\')) { - c = GETC(); - c &= 0377; - } else -#endif /* REGEXP_H_VI_BACKSLASH */ - if(c == '-' && lc != 0) { - if ((c = GETC()) == ']') { - PLACE('-'); - break; - } -#ifdef REGEXP_H_VI_BACKSLASH - if(c == '\\' && - ((c = PEEKC()) == ']' || - c == '-' || - c == '^' || - c == '\\')) - c = GETC(); -#endif /* REGEXP_H_VI_BACKSLASH */ - c &= 0377; - while(lc < c) { - PLACE(lc); - lc++; - } - } - lc = c; - PLACE(c); - } while((c = GETC()) != ']'); - if(neg) { - for(cclcnt = 0; cclcnt < 32; cclcnt++) - ep[cclcnt] ^= 0377; - ep[0] &= 0376; - } - - ep += 32; -#ifdef REGEXP_H_WCHARS - } else { - if (&ep[18] >= endbuf) - ERROR(50); - *ep++ = CCL|CMB; - *ep++ = 0; - lc = 0; - for (i = 0; i < 16; i++) - ep[i] = 0; - eq = &ep[16]; - regexp_h_getwc(c); - if (c == L'^') { - regexp_h_getwc(c); - ep[-2] = CNCL|CMB; - } - do { - if (c == '\0' || c == '\n') - ERROR(49); -#ifdef REGEXP_H_VI_BACKSLASH - if(c == '\\' && ((c = PEEKC()) == ']' || - c == '-' || c == '^' || - c == '\\')) { - regexp_h_store(c, eq, endbuf); - regexp_h_getwc(c); - } else -#endif /* REGEXP_H_VI_BACKSLASH */ - if (c == '-' && lc != 0 && lc <= 0177) { - regexp_h_store(c, eq, endbuf); - regexp_h_getwc(c); - if (c == ']') { - PLACE('-'); - break; - } -#ifdef REGEXP_H_VI_BACKSLASH - if(c == '\\' && - ((c = PEEKC()) == ']' || - c == '-' || - c == '^' || - c == '\\')) { - regexp_h_store(c, eq, - endbuf); - regexp_h_getwc(c); - } -#endif /* REGEXP_H_VI_BACKSLASH */ - while (lc < (c & 0177)) { - PLACE(lc); - lc++; - } - } - lc = c; - if (c <= 0177) - PLACE(c); - regexp_h_store(c, eq, endbuf); - regexp_h_getwc(c); - } while (c != L']'); - if ((i = eq - &ep[16]) > 255) - ERROR(50); - lastep[1] = i; - ep = eq; - } -#endif /* REGEXP_H_WCHARS */ - - continue; - - case '\\': - regexp_h_getwc(c); - switch(c) { - - case '(': - if(nbra >= NBRA) - ERROR(43); - *bracketp++ = nbra; - *ep++ = CBRA; - *ep++ = nbra++; - continue; - - case ')': - if(bracketp <= bracket) - ERROR(42); - *ep++ = CKET; - *ep++ = *--bracketp; - closed++; - continue; - - case '<': - *ep++ = CBRC|regexp_h_wchars; - continue; - - case '>': - *ep++ = CLET|regexp_h_wchars; - continue; - - case '{': - if(lastep == (char *) (0)) - goto defchar; - *lastep |= RNGE; - cflg = 0; - nlim: - c = GETC(); - i = 0; - do { - if ('0' <= c && c <= '9') - i = 10 * i + c - '0'; - else - ERROR(16); - } while(((c = GETC()) != '\\') && (c != ',')); - if (i > 255) - ERROR(11); - *ep++ = i; - if (c == ',') { - if(cflg++) - ERROR(44); - if((c = GETC()) == '\\') { - *ep++ = (char)255; - *lastep |= REGEXP_H_LEAST; - } else { - UNGETC(c); - goto nlim; /* get 2'nd number */ - } - } - if(GETC() != '}') - ERROR(45); - if(!cflg) /* one number */ - *ep++ = i; - else if((ep[-1] & 0377) < (ep[-2] & 0377)) - ERROR(46); - continue; - - case '\n': - ERROR(36); - - case 'n': - c = '\n'; - goto defchar; - - default: - if(c >= '1' && c <= '9') { - if((c -= '1') >= closed) - ERROR(25); - *ep++ = CBACK; - *ep++ = c; - continue; - } - } - /* Drop through to default to use \ to turn off special chars */ - - defchar: - default: - lastep = ep; -#ifdef REGEXP_H_WCHARS - if (regexp_h_wchars == 0) { -#endif - *ep++ = CCHR; - *ep++ = c; -#ifdef REGEXP_H_WCHARS - } else { - char mbbuf[MB_LEN_MAX]; - - switch (wctomb(mbbuf, c)) { - case 1: *ep++ = CCH1; - break; - case 2: *ep++ = CCH2; - break; - case 3: *ep++ = CCH3; - break; - default: - *ep++ = CCHR|CMB; - } - regexp_h_store(c, ep, endbuf); - } -#endif /* REGEXP_H_WCHARS */ - } - } -} - -int -step(const char *p1, const char *p2) -{ - register int c; -#ifdef REGEXP_H_WCHARS - register int d; -#endif /* REGEXP_H_WCHARS */ - - REGEXP_H_STEP_INIT /* get circf */ - regexp_h_bol = p1; -#ifdef REGEXP_H_WCHARS - regexp_h_firstwc = NULL; -#endif /* REGEXP_H_WCHARS */ - if (circf) { - loc1 = (char *)p1; - return(regexp_h_advance(p1, p2)); - } - /* fast check for first character */ - if (*p2==CCHR) { - c = p2[1] & 0377; - do { - if ((*p1 & 0377) != c) - continue; - if (regexp_h_advance(p1, p2)) { - loc1 = (char *)p1; - return(1); - } - } while (*p1++); - return(0); - } -#ifdef REGEXP_H_WCHARS - else if (*p2==CCH1) { - do { - if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) { - loc1 = (char *)p1; - return(1); - } - c = regexp_h_fetch(p1, 1); - } while (c); - return(0); - } else if (*p2==CCH2) { - do { - if (p1[0] == p2[1] && p1[1] == p2[2] && - regexp_h_advance(p1, p2)) { - loc1 = (char *)p1; - return(1); - } - c = regexp_h_fetch(p1, 1); - } while (c); - return(0); - } else if (*p2==CCH3) { - do { - if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&& - regexp_h_advance(p1, p2)) { - loc1 = (char *)p1; - return(1); - } - c = regexp_h_fetch(p1, 1); - } while (c); - return(0); - } else if ((*p2&0377)==(CCHR|CMB)) { - d = regexp_h_fetch(p2, 0); - do { - c = regexp_h_fetch(p1, 1); - if (c == d && regexp_h_advance(p1, p2)) { - loc1 = (char *)p1; - return(1); - } - } while(c); - return(0); - } - /* regular algorithm */ - if (regexp_h_wchars) - do { - if (regexp_h_advance(p1, p2)) { - loc1 = (char *)p1; - return(1); - } - c = regexp_h_fetch(p1, 1); - } while (c); - else -#endif /* REGEXP_H_WCHARS */ - do { - if (regexp_h_advance(p1, p2)) { - loc1 = (char *)p1; - return(1); - } - } while (*p1++); - return(0); -} - -#ifdef REGEXP_H_WCHARS -/* - * It is painfully slow to read character-wise backwards in a - * multibyte string (see regexp_h_previous() above). For the star - * algorithm, we therefore keep track of every character as it is - * read in forward direction. - * - * Don't use alloca() for stack blocks since there is no measurable - * speedup and huge amounts of memory are used up for long input - * lines. - */ -#ifndef REGEXP_H_STAKBLOK -#define REGEXP_H_STAKBLOK 1000 -#endif - -struct regexp_h_stack { - struct regexp_h_stack *s_nxt; - struct regexp_h_stack *s_prv; - const char *s_ptr[REGEXP_H_STAKBLOK]; -}; - -#define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \ - regexp_h_pushwc(sb, sp, sc, lp) : (void)0) - -static regexp_h_inline void -regexp_h_pushwc(struct regexp_h_stack **sb, - struct regexp_h_stack **sp, - const char ***sc, const char *lp) -{ - if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc) - return; - if (*sb == NULL) { - if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL) - return; - (*sb)->s_nxt = (*sb)->s_prv = NULL; - *sp = *sb; - *sc = &(*sb)->s_ptr[0]; - } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) { - if ((*sp)->s_nxt == NULL) { - struct regexp_h_stack *bq; - - if ((bq = regexp_h_malloc(sizeof *bq)) == NULL) - return; - bq->s_nxt = NULL; - bq->s_prv = *sp; - (*sp)->s_nxt = bq; - *sp = bq; - } else - *sp = (*sp)->s_nxt; - *sc = &(*sp)->s_ptr[0]; - } - *(*sc)++ = lp; -} - -static regexp_h_inline const char * -regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp, - const char ***sc, const char *lp) -{ - if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc) - return &lp[-1]; - if (*sp == NULL) - return regexp_h_firstwc; - if (*sc == &(*sp)->s_ptr[0]) { - if ((*sp)->s_prv == NULL) { - regexp_h_free(*sp); - *sp = NULL; - *sb = NULL; - return regexp_h_firstwc; - } - *sp = (*sp)->s_prv; - regexp_h_free((*sp)->s_nxt); - (*sp)->s_nxt = NULL ; - *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK]; - } - return *(--(*sc)); -} - -static void -regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp) -{ - for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt) - if ((*sp)->s_prv) - regexp_h_free((*sp)->s_prv); - if (*sp) { - if ((*sp)->s_prv) - regexp_h_free((*sp)->s_prv); - regexp_h_free(*sp); - } - *sp = *sb = NULL; -} -#else /* !REGEXP_H_WCHARS */ -#define regexp_h_push(sb, sp, sc, lp) -#endif /* !REGEXP_H_WCHARS */ - -static int -regexp_h_advance(const char *lp, const char *ep) -{ - register const char *curlp; - int c, least; -#ifdef REGEXP_H_WCHARS - int d; - struct regexp_h_stack *sb = NULL, *sp = NULL; - const char **sc; -#endif /* REGEXP_H_WCHARS */ - char *bbeg; - int ct; - - for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) { - - case CCHR: -#ifdef REGEXP_H_WCHARS - case CCH1: -#endif - if (*ep++ == *lp++) - continue; - return(0); - -#ifdef REGEXP_H_WCHARS - case CCHR|CMB: - if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1)) - continue; - return(0); - - case CCH2: - if (ep[0] == lp[0] && ep[1] == lp[1]) { - ep += 2, lp += 2; - continue; - } - return(0); - - case CCH3: - if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) { - ep += 3, lp += 3; - continue; - } - return(0); -#endif /* REGEXP_H_WCHARS */ - - case CDOT: - if (*lp++) - continue; - return(0); -#ifdef REGEXP_H_WCHARS - case CDOT|CMB: - if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF) - continue; - return(0); -#endif /* REGEXP_H_WCHARS */ - - case CDOL: - if (*lp==0) - continue; - return(0); - - case CCEOF: - loc2 = (char *)lp; - return(1); - - case CCL: - c = *lp++ & 0377; - if(ISTHERE(c)) { - ep += 32; - continue; - } - return(0); - -#ifdef REGEXP_H_WCHARS - case CCL|CMB: - case CNCL|CMB: - c = regexp_h_fetch(lp, 1); - if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) { - ep += (*ep & 0377) + 17; - continue; - } - return 0; -#endif /* REGEXP_H_WCHARS */ - - case CBRA: - braslist[*ep++ & 0377] = (char *)lp; - continue; - - case CKET: - braelist[*ep++ & 0377] = (char *)lp; - continue; - - case CBRC: - if (lp == regexp_h_bol && locs == NULL) - continue; - if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377)) - && !regexp_h_uletter(lp[-1] & 0377) - && !isdigit(lp[-1] & 0377)) - continue; - return(0); - -#ifdef REGEXP_H_WCHARS - case CBRC|CMB: - c = regexp_h_show(lp); - d = regexp_h_previous(lp); - if ((iswdigit(c) || regexp_h_wuletter(c)) - && !regexp_h_wuletter(d) - && !iswdigit(d)) - continue; - return(0); -#endif /* REGEXP_H_WCHARS */ - - case CLET: - if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377)) - continue; - return(0); - -#ifdef REGEXP_H_WCHARS - case CLET|CMB: - c = regexp_h_show(lp); - if (!regexp_h_wuletter(c) && !iswdigit(c)) - continue; - return(0); -#endif /* REGEXP_H_WCHARS */ - - case CCHR|RNGE: - c = *ep++; - regexp_h_getrnge(ep, least); - while(low--) - if(*lp++ != c) - return(0); - curlp = lp; - while(size--) { - regexp_h_push(&sb, &sp, &sc, lp); - if(*lp++ != c) - break; - } - if(size < 0) { - regexp_h_push(&sb, &sp, &sc, lp); - lp++; - } - ep += 2; - goto star; - -#ifdef REGEXP_H_WCHARS - case CCHR|RNGE|CMB: - case CCH1|RNGE: - case CCH2|RNGE: - case CCH3|RNGE: - c = regexp_h_fetch(ep, 0); - regexp_h_getrnge(ep, least); - while (low--) - if (regexp_h_fetch(lp, 1) != c) - return 0; - curlp = lp; - while (size--) { - regexp_h_push(&sb, &sp, &sc, lp); - if (regexp_h_fetch(lp, 1) != c) - break; - } - if(size < 0) { - regexp_h_push(&sb, &sp, &sc, lp); - regexp_h_fetch(lp, 1); - } - ep += 2; - goto star; -#endif /* REGEXP_H_WCHARS */ - - case CDOT|RNGE: - regexp_h_getrnge(ep, least); - while(low--) - if(*lp++ == '\0') - return(0); - curlp = lp; - while(size--) { - regexp_h_push(&sb, &sp, &sc, lp); - if(*lp++ == '\0') - break; - } - if(size < 0) { - regexp_h_push(&sb, &sp, &sc, lp); - lp++; - } - ep += 2; - goto star; - -#ifdef REGEXP_H_WCHARS - case CDOT|RNGE|CMB: - regexp_h_getrnge(ep, least); - while (low--) - if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) - return 0; - curlp = lp; - while (size--) { - regexp_h_push(&sb, &sp, &sc, lp); - if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) - break; - } - if (size < 0) { - regexp_h_push(&sb, &sp, &sc, lp); - regexp_h_fetch(lp, 1); - } - ep += 2; - goto star; -#endif /* REGEXP_H_WCHARS */ - - case CCL|RNGE: - regexp_h_getrnge(ep + 32, least); - while(low--) { - c = *lp++ & 0377; - if(!ISTHERE(c)) - return(0); - } - curlp = lp; - while(size--) { - regexp_h_push(&sb, &sp, &sc, lp); - c = *lp++ & 0377; - if(!ISTHERE(c)) - break; - } - if(size < 0) { - regexp_h_push(&sb, &sp, &sc, lp); - lp++; - } - ep += 34; /* 32 + 2 */ - goto star; - -#ifdef REGEXP_H_WCHARS - case CCL|RNGE|CMB: - case CNCL|RNGE|CMB: - regexp_h_getrnge(ep + (*ep & 0377) + 17, least); - while (low--) { - c = regexp_h_fetch(lp, 1); - if (!regexp_h_cclass(ep, c, - (ep[-1] & 0377 & ~REGEXP_H_LEAST) - == (CCL|RNGE|CMB))) - return 0; - } - curlp = lp; - while (size--) { - regexp_h_push(&sb, &sp, &sc, lp); - c = regexp_h_fetch(lp, 1); - if (!regexp_h_cclass(ep, c, - (ep[-1] & 0377 & ~REGEXP_H_LEAST) - == (CCL|RNGE|CMB))) - break; - } - if (size < 0) { - regexp_h_push(&sb, &sp, &sc, lp); - regexp_h_fetch(lp, 1); - } - ep += (*ep & 0377) + 19; - goto star; -#endif /* REGEXP_H_WCHARS */ - - case CBACK: - bbeg = braslist[*ep & 0377]; - ct = braelist[*ep++ & 0377] - bbeg; - - if(strncmp(bbeg, lp, ct) == 0) { - lp += ct; - continue; - } - return(0); - - case CBACK|STAR: - bbeg = braslist[*ep & 0377]; - ct = braelist[*ep++ & 0377] - bbeg; - curlp = lp; - while(strncmp(bbeg, lp, ct) == 0) - lp += ct; - - while(lp >= curlp) { - if(regexp_h_advance(lp, ep)) return(1); - lp -= ct; - } - return(0); - - - case CDOT|STAR: - curlp = lp; - do - regexp_h_push(&sb, &sp, &sc, lp); - while (*lp++); - goto star; - -#ifdef REGEXP_H_WCHARS - case CDOT|STAR|CMB: - curlp = lp; - do - regexp_h_push(&sb, &sp, &sc, lp); - while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF); - goto star; -#endif /* REGEXP_H_WCHARS */ - - case CCHR|STAR: - curlp = lp; - do - regexp_h_push(&sb, &sp, &sc, lp); - while (*lp++ == *ep); - ep++; - goto star; - -#ifdef REGEXP_H_WCHARS - case CCHR|STAR|CMB: - case CCH1|STAR: - case CCH2|STAR: - case CCH3|STAR: - curlp = lp; - d = regexp_h_fetch(ep, 0); - do - regexp_h_push(&sb, &sp, &sc, lp); - while (regexp_h_fetch(lp, 1) == d); - goto star; -#endif /* REGEXP_H_WCHARS */ - - case CCL|STAR: - curlp = lp; - do { - regexp_h_push(&sb, &sp, &sc, lp); - c = *lp++ & 0377; - } while(ISTHERE(c)); - ep += 32; - goto star; - -#ifdef REGEXP_H_WCHARS - case CCL|STAR|CMB: - case CNCL|STAR|CMB: - curlp = lp; - do { - regexp_h_push(&sb, &sp, &sc, lp); - c = regexp_h_fetch(lp, 1); - } while (regexp_h_cclass(ep, c, (ep[-1] & 0377) - == (CCL|STAR|CMB))); - ep += (*ep & 0377) + 17; - goto star; -#endif /* REGEXP_H_WCHARS */ - - star: -#ifdef REGEXP_H_WCHARS - if (regexp_h_wchars == 0) { -#endif - do { - if(--lp == locs) - break; - if (regexp_h_advance(lp, ep)) - return(1); - } while (lp > curlp); -#ifdef REGEXP_H_WCHARS - } else { - do { - lp = regexp_h_pop(&sb, &sp, &sc, lp); - if (lp <= locs) - break; - if (regexp_h_advance(lp, ep)) { - regexp_h_zerostak(&sb, &sp); - return(1); - } - } while (lp > curlp); - regexp_h_zerostak(&sb, &sp); - } -#endif /* REGEXP_H_WCHARS */ - return(0); - - } -} - -static void -regexp_h_getrnge(register const char *str, int least) -{ - low = *str++ & 0377; - size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low; -} - -int -advance(const char *lp, const char *ep) -{ - REGEXP_H_ADVANCE_INIT /* skip past circf */ - regexp_h_bol = lp; -#ifdef REGEXP_H_WCHARS - regexp_h_firstwc = NULL; -#endif /* REGEXP_H_WCHARS */ - return regexp_h_advance(lp, ep); -} diff -r ac52712b2b5e -r 4165f1b57d18 regexpr.c --- a/regexpr.c Sun Apr 12 21:45:34 2015 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -/* - * Simple Regular Expression functions. Derived from Unix 7th Edition, - * /usr/src/cmd/expr.y - * - * Modified by Gunnar Ritter, Freiburg i. Br., Germany, January 2003. - * - * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * Redistributions of source code and documentation must retain the - * above copyright notice, this list of conditions and the following - * disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed or owned by Caldera - * International, Inc. - * Neither the name of Caldera International, Inc. nor the names of - * other contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA - * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE - * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Sccsid @(#)regexpr.c 1.8 (gritter) 10/13/04 */ - -#include -#include "regexpr.h" - -int regerrno, reglength; -static int circf; - -static char *regexpr_compile(char *, char *, const char *, int); - -char * -compile(const char *instring, char *ep, char *endbuf) -{ - char *cp; - int sz = 0; - - if (ep == 0) { - for (cp = (char *)instring; *cp != '\0'; cp++) - if (*cp == '[') - sz += 32; - sz += 2 * (cp - instring) + 5; - if ((ep = malloc(sz)) == 0) { - regerrno = 11; - return 0; - } - endbuf = &ep[sz]; - ep[1] = '\0'; - } - if ((cp=regexpr_compile((char *)instring, &ep[1], endbuf, '\0')) == 0) { - if (sz) - free(ep); - return 0; - } - ep[0] = circf; - reglength = cp - ep; - return sz ? ep : cp; -} - -#define INIT register char *sp = instring; -#define GETC() (*sp++) -#define PEEKC() (*sp) -#define UNGETC(c) (--sp) -#define RETURN(c) return (c); -#define ERROR(c) { regerrno = c; return 0; } - -#define compile(a, b, c, d) regexpr_compile(a, b, c, d) -#define regexp_h_static static -#define REGEXP_H_STEP_INIT circf = *p2++; -#define REGEXP_H_ADVANCE_INIT circf = *ep++; - -#include "regexp.h" diff -r ac52712b2b5e -r 4165f1b57d18 regexpr.h --- a/regexpr.h Sun Apr 12 21:45:34 2015 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ -/* - * Simple Regular Expression functions. Derived from Unix 7th Edition, - * /usr/src/cmd/expr.y - * - * Modified by Gunnar Ritter, Freiburg i. Br., Germany, January 2003. - * - * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * Redistributions of source code and documentation must retain the - * above copyright notice, this list of conditions and the following - * disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed or owned by Caldera - * International, Inc. - * Neither the name of Caldera International, Inc. nor the names of - * other contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA - * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE - * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Sccsid @(#)regexpr.h 1.2 (gritter) 1/11/03 */ - -#define NBRA 9 - -extern char *braslist[NBRA]; -extern char *braelist[NBRA]; -extern int nbra; -extern int regerrno, reglength; -extern char *loc1, *loc2, *locs; -extern int sed; - -extern char *compile(const char *, char *, char *); -extern int step(const char *, const char *); -extern int advance(const char *, const char *);