comparison regexp.h @ 4:4165f1b57d18 default tip

Become SUSv3 compatible and thus remove own regexp code The Heirloom tools can be compiled to comply to several standards. This version does not need this flexibility. We can omit the regexp code and use the system's, by using the SU3 variant of ed. This is the latest of the supported standards.
author markus schnalke <meillo@marmaro.de>
date Mon, 13 Apr 2015 17:26:51 +0200
parents ac52712b2b5e
children
comparison
equal deleted inserted replaced
3:ac52712b2b5e 4:4165f1b57d18
1 /*
2 * Simple Regular Expression functions. Derived from Unix 7th Edition,
3 * /usr/src/cmd/expr.y
4 *
5 * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002.
6 *
7 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * Redistributions of source code and documentation must retain the
13 * above copyright notice, this list of conditions and the following
14 * disclaimer.
15 * Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed or owned by Caldera
21 * International, Inc.
22 * Neither the name of Caldera International, Inc. nor the names of
23 * other contributors may be used to endorse or promote products
24 * derived from this software without specific prior written permission.
25 *
26 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
27 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
31 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
35 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
36 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
37 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
41 #define REGEXP_H_USED __attribute__ ((used))
42 #elif defined __GNUC__
43 #define REGEXP_H_USED __attribute__ ((unused))
44 #else
45 #define REGEXP_H_USED
46 #endif
47 static const char regexp_h_sccsid[] REGEXP_H_USED =
48 "@(#)regexp.sl 1.56 (gritter) 5/29/05";
49
50 #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__)
51 #define REGEXP_H_WCHARS
52 #endif
53
54 #define CBRA 2
55 #define CCHR 4
56 #define CDOT 8
57 #define CCL 12
58 /* CLNUM 14 used in sed */
59 /* CEND 16 used in sed */
60 #define CDOL 20
61 #define CCEOF 22
62 #define CKET 24
63 #define CBACK 36
64 #define CNCL 40
65 #define CBRC 44
66 #define CLET 48
67 #define CCH1 52
68 #define CCH2 56
69 #define CCH3 60
70
71 #define STAR 01
72 #define RNGE 03
73 #define REGEXP_H_LEAST 0100
74
75 #ifdef REGEXP_H_WCHARS
76 #define CMB 0200
77 #else /* !REGEXP_H_WCHARS */
78 #define CMB 0
79 #endif /* !REGEXP_H_WCHARS */
80
81 #define NBRA 9
82
83 #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
84 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
85
86 #ifdef REGEXP_H_WCHARS
87 #define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07])
88 #endif
89
90 #include <ctype.h>
91 #include <string.h>
92 #include <limits.h>
93 #ifdef REGEXP_H_WCHARS
94 #include <stdlib.h>
95 #include <wchar.h>
96 #include <wctype.h>
97 #endif /* REGEXP_H_WCHARS */
98
99 #define regexp_h_uletter(c) (isalpha(c) || (c) == '_')
100 #ifdef REGEXP_H_WCHARS
101 #define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_')
102
103 /*
104 * Used to allocate memory for the multibyte star algorithm.
105 */
106 #ifndef regexp_h_malloc
107 #define regexp_h_malloc(n) malloc(n)
108 #endif
109 #ifndef regexp_h_free
110 #define regexp_h_free(p) free(p)
111 #endif
112
113 /*
114 * Can be predefined to 'inline' to inline some multibyte functions;
115 * may improve performance for files that contain many multibyte
116 * sequences.
117 */
118 #ifndef regexp_h_inline
119 #define regexp_h_inline
120 #endif
121
122 /*
123 * Mask to determine whether the first byte of a sequence possibly
124 * starts a multibyte character. Set to 0377 to force mbtowc() for
125 * any byte sequence (except 0).
126 */
127 #ifndef REGEXP_H_MASK
128 #define REGEXP_H_MASK 0200
129 #endif
130 #endif /* REGEXP_H_WCHARS */
131
132 /*
133 * For regexpr.h.
134 */
135 #ifndef regexp_h_static
136 #define regexp_h_static
137 #endif
138 #ifndef REGEXP_H_STEP_INIT
139 #define REGEXP_H_STEP_INIT
140 #endif
141 #ifndef REGEXP_H_ADVANCE_INIT
142 #define REGEXP_H_ADVANCE_INIT
143 #endif
144
145 char *braslist[NBRA];
146 char *braelist[NBRA];
147 int nbra;
148 char *loc1, *loc2, *locs;
149 int sed;
150 int nodelim;
151
152 regexp_h_static int circf;
153 regexp_h_static int low;
154 regexp_h_static int size;
155
156 regexp_h_static unsigned char bittab[] = {
157 1,
158 2,
159 4,
160 8,
161 16,
162 32,
163 64,
164 128
165 };
166 static int regexp_h_advance(register const char *lp,
167 register const char *ep);
168 static void regexp_h_getrnge(register const char *str, int least);
169
170 static const char *regexp_h_bol; /* beginning of input line (for \<) */
171
172 #ifdef REGEXP_H_WCHARS
173 static int regexp_h_wchars;
174 static int regexp_h_mbcurmax;
175
176 static const char *regexp_h_firstwc; /* location of first
177 multibyte character
178 on input line */
179
180 #define regexp_h_getwc(c) { \
181 if (regexp_h_wchars) { \
182 char mbbuf[MB_LEN_MAX + 1], *mbptr; \
183 wchar_t wcbuf; \
184 int mb, len; \
185 mbptr = mbbuf; \
186 do { \
187 mb = GETC(); \
188 *mbptr++ = mb; \
189 *mbptr = '\0'; \
190 } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \
191 && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \
192 if (len == -1) \
193 ERROR(67); \
194 c = wcbuf; \
195 } else { \
196 c = GETC(); \
197 } \
198 }
199
200 #define regexp_h_store(wc, mb, me) { \
201 int len; \
202 if (wc == WEOF) \
203 ERROR(67); \
204 if ((len = me - mb) <= regexp_h_mbcurmax) { \
205 char mt[MB_LEN_MAX]; \
206 if (wctomb(mt, wc) >= len) \
207 ERROR(50); \
208 } \
209 switch (len = wctomb(mb, wc)) { \
210 case -1: \
211 ERROR(67); \
212 case 0: \
213 mb++; \
214 break; \
215 default: \
216 mb += len; \
217 } \
218 }
219
220 static regexp_h_inline wint_t
221 regexp_h_fetchwc(const char **mb, int islp)
222 {
223 wchar_t wc;
224 int len;
225
226 if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) {
227 (*mb)++;
228 return WEOF;
229 }
230 if (islp && regexp_h_firstwc == NULL)
231 regexp_h_firstwc = *mb;
232 /*if (len == 0) {
233 (*mb)++;
234 return L'\0';
235 } handled in singlebyte code */
236 *mb += len;
237 return wc;
238 }
239
240 #define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \
241 (*(mb)++&0377): \
242 regexp_h_fetchwc(&(mb), islp))
243
244 static regexp_h_inline wint_t
245 regexp_h_showwc(const char *mb)
246 {
247 wchar_t wc;
248
249 if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0)
250 return WEOF;
251 return wc;
252 }
253
254 #define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \
255 regexp_h_showwc(mb))
256
257 /*
258 * Return the character immediately preceding mb. Since no byte is
259 * required to be the first byte of a character, the longest multibyte
260 * character ending at &[mb-1] is searched.
261 */
262 static regexp_h_inline wint_t
263 regexp_h_previous(const char *mb)
264 {
265 const char *p = mb;
266 wchar_t wc, lastwc = WEOF;
267 int len, max = 0;
268
269 if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc)
270 return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF);
271 while (p-- > regexp_h_bol) {
272 mbtowc(NULL, NULL, 0);
273 if ((len = mbtowc(&wc, p, mb - p)) >= 0) {
274 if (len < max || len < mb - p)
275 break;
276 max = len;
277 lastwc = wc;
278 } else if (len < 0 && max > 0)
279 break;
280 }
281 return lastwc;
282 }
283
284 #define regexp_h_cclass(set, c, af) \
285 ((c) == 0 || (c) == WEOF ? 0 : ( \
286 ((c) > 0177) ? \
287 regexp_h_cclass_wc(set, c, af) : ( \
288 REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \
289 ) \
290 ) \
291 )
292
293 static regexp_h_inline int
294 regexp_h_cclass_wc(const char *set, register wint_t c, int af)
295 {
296 register wint_t wc, wl = WEOF;
297 const char *end;
298
299 end = &set[18] + set[0] - 1;
300 set += 17;
301 while (set < end) {
302 wc = regexp_h_fetch(set, 0);
303 #ifdef REGEXP_H_VI_BACKSLASH
304 if (wc == '\\' && set < end &&
305 (*set == ']' || *set == '-' ||
306 *set == '^' || *set == '\\')) {
307 wc = regexp_h_fetch(set, 0);
308 } else
309 #endif /* REGEXP_H_VI_BACKSLASH */
310 if (wc == '-' && wl != WEOF && set < end) {
311 wc = regexp_h_fetch(set, 0);
312 #ifdef REGEXP_H_VI_BACKSLASH
313 if (wc == '\\' && set < end &&
314 (*set == ']' || *set == '-' ||
315 *set == '^' || *set == '\\')) {
316 wc = regexp_h_fetch(set, 0);
317 }
318 #endif /* REGEXP_H_VI_BACKSLASH */
319 if (c > wl && c < wc)
320 return af;
321 }
322 if (c == wc)
323 return af;
324 wl = wc;
325 }
326 return !af;
327 }
328 #else /* !REGEXP_H_WCHARS */
329 #define regexp_h_wchars 0
330 #define regexp_h_getwc(c) { c = GETC(); }
331 #endif /* !REGEXP_H_WCHARS */
332
333 regexp_h_static char *
334 compile(char *instring, char *ep, const char *endbuf, int seof)
335 {
336 INIT /* Dependent declarations and initializations */
337 register int c;
338 register int eof = seof;
339 char *lastep = instring;
340 int cclcnt;
341 char bracket[NBRA], *bracketp;
342 int closed;
343 char neg;
344 int lc;
345 int i, cflg;
346
347 #ifdef REGEXP_H_WCHARS
348 char *eq;
349 regexp_h_mbcurmax = MB_CUR_MAX;
350 regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0;
351 #endif
352 lastep = 0;
353 bracketp = bracket;
354 if((c = GETC()) == eof || c == '\n') {
355 if (c == '\n') {
356 UNGETC(c);
357 nodelim = 1;
358 }
359 if(*ep == 0 && !sed)
360 ERROR(41);
361 if (bracketp > bracket)
362 ERROR(42);
363 RETURN(ep);
364 }
365 circf = closed = nbra = 0;
366 if (c == '^')
367 circf++;
368 else
369 UNGETC(c);
370 for (;;) {
371 if (ep >= endbuf)
372 ERROR(50);
373 regexp_h_getwc(c);
374 if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
375 lastep = ep;
376 if (c == eof) {
377 *ep++ = CCEOF;
378 if (bracketp > bracket)
379 ERROR(42);
380 RETURN(ep);
381 }
382 switch (c) {
383
384 case '.':
385 *ep++ = CDOT|regexp_h_wchars;
386 continue;
387
388 case '\n':
389 if (sed == 0) {
390 UNGETC(c);
391 *ep++ = CCEOF;
392 nodelim = 1;
393 RETURN(ep);
394 }
395 ERROR(36);
396 case '*':
397 if (lastep==0 || *lastep==CBRA || *lastep==CKET ||
398 *lastep==(CBRC|regexp_h_wchars) ||
399 *lastep==(CLET|regexp_h_wchars))
400 goto defchar;
401 *lastep |= STAR;
402 continue;
403
404 case '$':
405 if(PEEKC() != eof)
406 goto defchar;
407 *ep++ = CDOL;
408 continue;
409
410 case '[':
411 #ifdef REGEXP_H_WCHARS
412 if (regexp_h_wchars == 0) {
413 #endif
414 if(&ep[33] >= endbuf)
415 ERROR(50);
416
417 *ep++ = CCL;
418 lc = 0;
419 for(i = 0; i < 32; i++)
420 ep[i] = 0;
421
422 neg = 0;
423 if((c = GETC()) == '^') {
424 neg = 1;
425 c = GETC();
426 }
427
428 do {
429 c &= 0377;
430 if(c == '\0' || c == '\n')
431 ERROR(49);
432 #ifdef REGEXP_H_VI_BACKSLASH
433 if(c == '\\' && ((c = PEEKC()) == ']' ||
434 c == '-' || c == '^' ||
435 c == '\\')) {
436 c = GETC();
437 c &= 0377;
438 } else
439 #endif /* REGEXP_H_VI_BACKSLASH */
440 if(c == '-' && lc != 0) {
441 if ((c = GETC()) == ']') {
442 PLACE('-');
443 break;
444 }
445 #ifdef REGEXP_H_VI_BACKSLASH
446 if(c == '\\' &&
447 ((c = PEEKC()) == ']' ||
448 c == '-' ||
449 c == '^' ||
450 c == '\\'))
451 c = GETC();
452 #endif /* REGEXP_H_VI_BACKSLASH */
453 c &= 0377;
454 while(lc < c) {
455 PLACE(lc);
456 lc++;
457 }
458 }
459 lc = c;
460 PLACE(c);
461 } while((c = GETC()) != ']');
462 if(neg) {
463 for(cclcnt = 0; cclcnt < 32; cclcnt++)
464 ep[cclcnt] ^= 0377;
465 ep[0] &= 0376;
466 }
467
468 ep += 32;
469 #ifdef REGEXP_H_WCHARS
470 } else {
471 if (&ep[18] >= endbuf)
472 ERROR(50);
473 *ep++ = CCL|CMB;
474 *ep++ = 0;
475 lc = 0;
476 for (i = 0; i < 16; i++)
477 ep[i] = 0;
478 eq = &ep[16];
479 regexp_h_getwc(c);
480 if (c == L'^') {
481 regexp_h_getwc(c);
482 ep[-2] = CNCL|CMB;
483 }
484 do {
485 if (c == '\0' || c == '\n')
486 ERROR(49);
487 #ifdef REGEXP_H_VI_BACKSLASH
488 if(c == '\\' && ((c = PEEKC()) == ']' ||
489 c == '-' || c == '^' ||
490 c == '\\')) {
491 regexp_h_store(c, eq, endbuf);
492 regexp_h_getwc(c);
493 } else
494 #endif /* REGEXP_H_VI_BACKSLASH */
495 if (c == '-' && lc != 0 && lc <= 0177) {
496 regexp_h_store(c, eq, endbuf);
497 regexp_h_getwc(c);
498 if (c == ']') {
499 PLACE('-');
500 break;
501 }
502 #ifdef REGEXP_H_VI_BACKSLASH
503 if(c == '\\' &&
504 ((c = PEEKC()) == ']' ||
505 c == '-' ||
506 c == '^' ||
507 c == '\\')) {
508 regexp_h_store(c, eq,
509 endbuf);
510 regexp_h_getwc(c);
511 }
512 #endif /* REGEXP_H_VI_BACKSLASH */
513 while (lc < (c & 0177)) {
514 PLACE(lc);
515 lc++;
516 }
517 }
518 lc = c;
519 if (c <= 0177)
520 PLACE(c);
521 regexp_h_store(c, eq, endbuf);
522 regexp_h_getwc(c);
523 } while (c != L']');
524 if ((i = eq - &ep[16]) > 255)
525 ERROR(50);
526 lastep[1] = i;
527 ep = eq;
528 }
529 #endif /* REGEXP_H_WCHARS */
530
531 continue;
532
533 case '\\':
534 regexp_h_getwc(c);
535 switch(c) {
536
537 case '(':
538 if(nbra >= NBRA)
539 ERROR(43);
540 *bracketp++ = nbra;
541 *ep++ = CBRA;
542 *ep++ = nbra++;
543 continue;
544
545 case ')':
546 if(bracketp <= bracket)
547 ERROR(42);
548 *ep++ = CKET;
549 *ep++ = *--bracketp;
550 closed++;
551 continue;
552
553 case '<':
554 *ep++ = CBRC|regexp_h_wchars;
555 continue;
556
557 case '>':
558 *ep++ = CLET|regexp_h_wchars;
559 continue;
560
561 case '{':
562 if(lastep == (char *) (0))
563 goto defchar;
564 *lastep |= RNGE;
565 cflg = 0;
566 nlim:
567 c = GETC();
568 i = 0;
569 do {
570 if ('0' <= c && c <= '9')
571 i = 10 * i + c - '0';
572 else
573 ERROR(16);
574 } while(((c = GETC()) != '\\') && (c != ','));
575 if (i > 255)
576 ERROR(11);
577 *ep++ = i;
578 if (c == ',') {
579 if(cflg++)
580 ERROR(44);
581 if((c = GETC()) == '\\') {
582 *ep++ = (char)255;
583 *lastep |= REGEXP_H_LEAST;
584 } else {
585 UNGETC(c);
586 goto nlim; /* get 2'nd number */
587 }
588 }
589 if(GETC() != '}')
590 ERROR(45);
591 if(!cflg) /* one number */
592 *ep++ = i;
593 else if((ep[-1] & 0377) < (ep[-2] & 0377))
594 ERROR(46);
595 continue;
596
597 case '\n':
598 ERROR(36);
599
600 case 'n':
601 c = '\n';
602 goto defchar;
603
604 default:
605 if(c >= '1' && c <= '9') {
606 if((c -= '1') >= closed)
607 ERROR(25);
608 *ep++ = CBACK;
609 *ep++ = c;
610 continue;
611 }
612 }
613 /* Drop through to default to use \ to turn off special chars */
614
615 defchar:
616 default:
617 lastep = ep;
618 #ifdef REGEXP_H_WCHARS
619 if (regexp_h_wchars == 0) {
620 #endif
621 *ep++ = CCHR;
622 *ep++ = c;
623 #ifdef REGEXP_H_WCHARS
624 } else {
625 char mbbuf[MB_LEN_MAX];
626
627 switch (wctomb(mbbuf, c)) {
628 case 1: *ep++ = CCH1;
629 break;
630 case 2: *ep++ = CCH2;
631 break;
632 case 3: *ep++ = CCH3;
633 break;
634 default:
635 *ep++ = CCHR|CMB;
636 }
637 regexp_h_store(c, ep, endbuf);
638 }
639 #endif /* REGEXP_H_WCHARS */
640 }
641 }
642 }
643
644 int
645 step(const char *p1, const char *p2)
646 {
647 register int c;
648 #ifdef REGEXP_H_WCHARS
649 register int d;
650 #endif /* REGEXP_H_WCHARS */
651
652 REGEXP_H_STEP_INIT /* get circf */
653 regexp_h_bol = p1;
654 #ifdef REGEXP_H_WCHARS
655 regexp_h_firstwc = NULL;
656 #endif /* REGEXP_H_WCHARS */
657 if (circf) {
658 loc1 = (char *)p1;
659 return(regexp_h_advance(p1, p2));
660 }
661 /* fast check for first character */
662 if (*p2==CCHR) {
663 c = p2[1] & 0377;
664 do {
665 if ((*p1 & 0377) != c)
666 continue;
667 if (regexp_h_advance(p1, p2)) {
668 loc1 = (char *)p1;
669 return(1);
670 }
671 } while (*p1++);
672 return(0);
673 }
674 #ifdef REGEXP_H_WCHARS
675 else if (*p2==CCH1) {
676 do {
677 if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) {
678 loc1 = (char *)p1;
679 return(1);
680 }
681 c = regexp_h_fetch(p1, 1);
682 } while (c);
683 return(0);
684 } else if (*p2==CCH2) {
685 do {
686 if (p1[0] == p2[1] && p1[1] == p2[2] &&
687 regexp_h_advance(p1, p2)) {
688 loc1 = (char *)p1;
689 return(1);
690 }
691 c = regexp_h_fetch(p1, 1);
692 } while (c);
693 return(0);
694 } else if (*p2==CCH3) {
695 do {
696 if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&&
697 regexp_h_advance(p1, p2)) {
698 loc1 = (char *)p1;
699 return(1);
700 }
701 c = regexp_h_fetch(p1, 1);
702 } while (c);
703 return(0);
704 } else if ((*p2&0377)==(CCHR|CMB)) {
705 d = regexp_h_fetch(p2, 0);
706 do {
707 c = regexp_h_fetch(p1, 1);
708 if (c == d && regexp_h_advance(p1, p2)) {
709 loc1 = (char *)p1;
710 return(1);
711 }
712 } while(c);
713 return(0);
714 }
715 /* regular algorithm */
716 if (regexp_h_wchars)
717 do {
718 if (regexp_h_advance(p1, p2)) {
719 loc1 = (char *)p1;
720 return(1);
721 }
722 c = regexp_h_fetch(p1, 1);
723 } while (c);
724 else
725 #endif /* REGEXP_H_WCHARS */
726 do {
727 if (regexp_h_advance(p1, p2)) {
728 loc1 = (char *)p1;
729 return(1);
730 }
731 } while (*p1++);
732 return(0);
733 }
734
735 #ifdef REGEXP_H_WCHARS
736 /*
737 * It is painfully slow to read character-wise backwards in a
738 * multibyte string (see regexp_h_previous() above). For the star
739 * algorithm, we therefore keep track of every character as it is
740 * read in forward direction.
741 *
742 * Don't use alloca() for stack blocks since there is no measurable
743 * speedup and huge amounts of memory are used up for long input
744 * lines.
745 */
746 #ifndef REGEXP_H_STAKBLOK
747 #define REGEXP_H_STAKBLOK 1000
748 #endif
749
750 struct regexp_h_stack {
751 struct regexp_h_stack *s_nxt;
752 struct regexp_h_stack *s_prv;
753 const char *s_ptr[REGEXP_H_STAKBLOK];
754 };
755
756 #define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \
757 regexp_h_pushwc(sb, sp, sc, lp) : (void)0)
758
759 static regexp_h_inline void
760 regexp_h_pushwc(struct regexp_h_stack **sb,
761 struct regexp_h_stack **sp,
762 const char ***sc, const char *lp)
763 {
764 if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc)
765 return;
766 if (*sb == NULL) {
767 if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL)
768 return;
769 (*sb)->s_nxt = (*sb)->s_prv = NULL;
770 *sp = *sb;
771 *sc = &(*sb)->s_ptr[0];
772 } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) {
773 if ((*sp)->s_nxt == NULL) {
774 struct regexp_h_stack *bq;
775
776 if ((bq = regexp_h_malloc(sizeof *bq)) == NULL)
777 return;
778 bq->s_nxt = NULL;
779 bq->s_prv = *sp;
780 (*sp)->s_nxt = bq;
781 *sp = bq;
782 } else
783 *sp = (*sp)->s_nxt;
784 *sc = &(*sp)->s_ptr[0];
785 }
786 *(*sc)++ = lp;
787 }
788
789 static regexp_h_inline const char *
790 regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp,
791 const char ***sc, const char *lp)
792 {
793 if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc)
794 return &lp[-1];
795 if (*sp == NULL)
796 return regexp_h_firstwc;
797 if (*sc == &(*sp)->s_ptr[0]) {
798 if ((*sp)->s_prv == NULL) {
799 regexp_h_free(*sp);
800 *sp = NULL;
801 *sb = NULL;
802 return regexp_h_firstwc;
803 }
804 *sp = (*sp)->s_prv;
805 regexp_h_free((*sp)->s_nxt);
806 (*sp)->s_nxt = NULL ;
807 *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK];
808 }
809 return *(--(*sc));
810 }
811
812 static void
813 regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp)
814 {
815 for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt)
816 if ((*sp)->s_prv)
817 regexp_h_free((*sp)->s_prv);
818 if (*sp) {
819 if ((*sp)->s_prv)
820 regexp_h_free((*sp)->s_prv);
821 regexp_h_free(*sp);
822 }
823 *sp = *sb = NULL;
824 }
825 #else /* !REGEXP_H_WCHARS */
826 #define regexp_h_push(sb, sp, sc, lp)
827 #endif /* !REGEXP_H_WCHARS */
828
829 static int
830 regexp_h_advance(const char *lp, const char *ep)
831 {
832 register const char *curlp;
833 int c, least;
834 #ifdef REGEXP_H_WCHARS
835 int d;
836 struct regexp_h_stack *sb = NULL, *sp = NULL;
837 const char **sc;
838 #endif /* REGEXP_H_WCHARS */
839 char *bbeg;
840 int ct;
841
842 for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) {
843
844 case CCHR:
845 #ifdef REGEXP_H_WCHARS
846 case CCH1:
847 #endif
848 if (*ep++ == *lp++)
849 continue;
850 return(0);
851
852 #ifdef REGEXP_H_WCHARS
853 case CCHR|CMB:
854 if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1))
855 continue;
856 return(0);
857
858 case CCH2:
859 if (ep[0] == lp[0] && ep[1] == lp[1]) {
860 ep += 2, lp += 2;
861 continue;
862 }
863 return(0);
864
865 case CCH3:
866 if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) {
867 ep += 3, lp += 3;
868 continue;
869 }
870 return(0);
871 #endif /* REGEXP_H_WCHARS */
872
873 case CDOT:
874 if (*lp++)
875 continue;
876 return(0);
877 #ifdef REGEXP_H_WCHARS
878 case CDOT|CMB:
879 if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF)
880 continue;
881 return(0);
882 #endif /* REGEXP_H_WCHARS */
883
884 case CDOL:
885 if (*lp==0)
886 continue;
887 return(0);
888
889 case CCEOF:
890 loc2 = (char *)lp;
891 return(1);
892
893 case CCL:
894 c = *lp++ & 0377;
895 if(ISTHERE(c)) {
896 ep += 32;
897 continue;
898 }
899 return(0);
900
901 #ifdef REGEXP_H_WCHARS
902 case CCL|CMB:
903 case CNCL|CMB:
904 c = regexp_h_fetch(lp, 1);
905 if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) {
906 ep += (*ep & 0377) + 17;
907 continue;
908 }
909 return 0;
910 #endif /* REGEXP_H_WCHARS */
911
912 case CBRA:
913 braslist[*ep++ & 0377] = (char *)lp;
914 continue;
915
916 case CKET:
917 braelist[*ep++ & 0377] = (char *)lp;
918 continue;
919
920 case CBRC:
921 if (lp == regexp_h_bol && locs == NULL)
922 continue;
923 if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377))
924 && !regexp_h_uletter(lp[-1] & 0377)
925 && !isdigit(lp[-1] & 0377))
926 continue;
927 return(0);
928
929 #ifdef REGEXP_H_WCHARS
930 case CBRC|CMB:
931 c = regexp_h_show(lp);
932 d = regexp_h_previous(lp);
933 if ((iswdigit(c) || regexp_h_wuletter(c))
934 && !regexp_h_wuletter(d)
935 && !iswdigit(d))
936 continue;
937 return(0);
938 #endif /* REGEXP_H_WCHARS */
939
940 case CLET:
941 if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377))
942 continue;
943 return(0);
944
945 #ifdef REGEXP_H_WCHARS
946 case CLET|CMB:
947 c = regexp_h_show(lp);
948 if (!regexp_h_wuletter(c) && !iswdigit(c))
949 continue;
950 return(0);
951 #endif /* REGEXP_H_WCHARS */
952
953 case CCHR|RNGE:
954 c = *ep++;
955 regexp_h_getrnge(ep, least);
956 while(low--)
957 if(*lp++ != c)
958 return(0);
959 curlp = lp;
960 while(size--) {
961 regexp_h_push(&sb, &sp, &sc, lp);
962 if(*lp++ != c)
963 break;
964 }
965 if(size < 0) {
966 regexp_h_push(&sb, &sp, &sc, lp);
967 lp++;
968 }
969 ep += 2;
970 goto star;
971
972 #ifdef REGEXP_H_WCHARS
973 case CCHR|RNGE|CMB:
974 case CCH1|RNGE:
975 case CCH2|RNGE:
976 case CCH3|RNGE:
977 c = regexp_h_fetch(ep, 0);
978 regexp_h_getrnge(ep, least);
979 while (low--)
980 if (regexp_h_fetch(lp, 1) != c)
981 return 0;
982 curlp = lp;
983 while (size--) {
984 regexp_h_push(&sb, &sp, &sc, lp);
985 if (regexp_h_fetch(lp, 1) != c)
986 break;
987 }
988 if(size < 0) {
989 regexp_h_push(&sb, &sp, &sc, lp);
990 regexp_h_fetch(lp, 1);
991 }
992 ep += 2;
993 goto star;
994 #endif /* REGEXP_H_WCHARS */
995
996 case CDOT|RNGE:
997 regexp_h_getrnge(ep, least);
998 while(low--)
999 if(*lp++ == '\0')
1000 return(0);
1001 curlp = lp;
1002 while(size--) {
1003 regexp_h_push(&sb, &sp, &sc, lp);
1004 if(*lp++ == '\0')
1005 break;
1006 }
1007 if(size < 0) {
1008 regexp_h_push(&sb, &sp, &sc, lp);
1009 lp++;
1010 }
1011 ep += 2;
1012 goto star;
1013
1014 #ifdef REGEXP_H_WCHARS
1015 case CDOT|RNGE|CMB:
1016 regexp_h_getrnge(ep, least);
1017 while (low--)
1018 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
1019 return 0;
1020 curlp = lp;
1021 while (size--) {
1022 regexp_h_push(&sb, &sp, &sc, lp);
1023 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
1024 break;
1025 }
1026 if (size < 0) {
1027 regexp_h_push(&sb, &sp, &sc, lp);
1028 regexp_h_fetch(lp, 1);
1029 }
1030 ep += 2;
1031 goto star;
1032 #endif /* REGEXP_H_WCHARS */
1033
1034 case CCL|RNGE:
1035 regexp_h_getrnge(ep + 32, least);
1036 while(low--) {
1037 c = *lp++ & 0377;
1038 if(!ISTHERE(c))
1039 return(0);
1040 }
1041 curlp = lp;
1042 while(size--) {
1043 regexp_h_push(&sb, &sp, &sc, lp);
1044 c = *lp++ & 0377;
1045 if(!ISTHERE(c))
1046 break;
1047 }
1048 if(size < 0) {
1049 regexp_h_push(&sb, &sp, &sc, lp);
1050 lp++;
1051 }
1052 ep += 34; /* 32 + 2 */
1053 goto star;
1054
1055 #ifdef REGEXP_H_WCHARS
1056 case CCL|RNGE|CMB:
1057 case CNCL|RNGE|CMB:
1058 regexp_h_getrnge(ep + (*ep & 0377) + 17, least);
1059 while (low--) {
1060 c = regexp_h_fetch(lp, 1);
1061 if (!regexp_h_cclass(ep, c,
1062 (ep[-1] & 0377 & ~REGEXP_H_LEAST)
1063 == (CCL|RNGE|CMB)))
1064 return 0;
1065 }
1066 curlp = lp;
1067 while (size--) {
1068 regexp_h_push(&sb, &sp, &sc, lp);
1069 c = regexp_h_fetch(lp, 1);
1070 if (!regexp_h_cclass(ep, c,
1071 (ep[-1] & 0377 & ~REGEXP_H_LEAST)
1072 == (CCL|RNGE|CMB)))
1073 break;
1074 }
1075 if (size < 0) {
1076 regexp_h_push(&sb, &sp, &sc, lp);
1077 regexp_h_fetch(lp, 1);
1078 }
1079 ep += (*ep & 0377) + 19;
1080 goto star;
1081 #endif /* REGEXP_H_WCHARS */
1082
1083 case CBACK:
1084 bbeg = braslist[*ep & 0377];
1085 ct = braelist[*ep++ & 0377] - bbeg;
1086
1087 if(strncmp(bbeg, lp, ct) == 0) {
1088 lp += ct;
1089 continue;
1090 }
1091 return(0);
1092
1093 case CBACK|STAR:
1094 bbeg = braslist[*ep & 0377];
1095 ct = braelist[*ep++ & 0377] - bbeg;
1096 curlp = lp;
1097 while(strncmp(bbeg, lp, ct) == 0)
1098 lp += ct;
1099
1100 while(lp >= curlp) {
1101 if(regexp_h_advance(lp, ep)) return(1);
1102 lp -= ct;
1103 }
1104 return(0);
1105
1106
1107 case CDOT|STAR:
1108 curlp = lp;
1109 do
1110 regexp_h_push(&sb, &sp, &sc, lp);
1111 while (*lp++);
1112 goto star;
1113
1114 #ifdef REGEXP_H_WCHARS
1115 case CDOT|STAR|CMB:
1116 curlp = lp;
1117 do
1118 regexp_h_push(&sb, &sp, &sc, lp);
1119 while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF);
1120 goto star;
1121 #endif /* REGEXP_H_WCHARS */
1122
1123 case CCHR|STAR:
1124 curlp = lp;
1125 do
1126 regexp_h_push(&sb, &sp, &sc, lp);
1127 while (*lp++ == *ep);
1128 ep++;
1129 goto star;
1130
1131 #ifdef REGEXP_H_WCHARS
1132 case CCHR|STAR|CMB:
1133 case CCH1|STAR:
1134 case CCH2|STAR:
1135 case CCH3|STAR:
1136 curlp = lp;
1137 d = regexp_h_fetch(ep, 0);
1138 do
1139 regexp_h_push(&sb, &sp, &sc, lp);
1140 while (regexp_h_fetch(lp, 1) == d);
1141 goto star;
1142 #endif /* REGEXP_H_WCHARS */
1143
1144 case CCL|STAR:
1145 curlp = lp;
1146 do {
1147 regexp_h_push(&sb, &sp, &sc, lp);
1148 c = *lp++ & 0377;
1149 } while(ISTHERE(c));
1150 ep += 32;
1151 goto star;
1152
1153 #ifdef REGEXP_H_WCHARS
1154 case CCL|STAR|CMB:
1155 case CNCL|STAR|CMB:
1156 curlp = lp;
1157 do {
1158 regexp_h_push(&sb, &sp, &sc, lp);
1159 c = regexp_h_fetch(lp, 1);
1160 } while (regexp_h_cclass(ep, c, (ep[-1] & 0377)
1161 == (CCL|STAR|CMB)));
1162 ep += (*ep & 0377) + 17;
1163 goto star;
1164 #endif /* REGEXP_H_WCHARS */
1165
1166 star:
1167 #ifdef REGEXP_H_WCHARS
1168 if (regexp_h_wchars == 0) {
1169 #endif
1170 do {
1171 if(--lp == locs)
1172 break;
1173 if (regexp_h_advance(lp, ep))
1174 return(1);
1175 } while (lp > curlp);
1176 #ifdef REGEXP_H_WCHARS
1177 } else {
1178 do {
1179 lp = regexp_h_pop(&sb, &sp, &sc, lp);
1180 if (lp <= locs)
1181 break;
1182 if (regexp_h_advance(lp, ep)) {
1183 regexp_h_zerostak(&sb, &sp);
1184 return(1);
1185 }
1186 } while (lp > curlp);
1187 regexp_h_zerostak(&sb, &sp);
1188 }
1189 #endif /* REGEXP_H_WCHARS */
1190 return(0);
1191
1192 }
1193 }
1194
1195 static void
1196 regexp_h_getrnge(register const char *str, int least)
1197 {
1198 low = *str++ & 0377;
1199 size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low;
1200 }
1201
1202 int
1203 advance(const char *lp, const char *ep)
1204 {
1205 REGEXP_H_ADVANCE_INIT /* skip past circf */
1206 regexp_h_bol = lp;
1207 #ifdef REGEXP_H_WCHARS
1208 regexp_h_firstwc = NULL;
1209 #endif /* REGEXP_H_WCHARS */
1210 return regexp_h_advance(lp, ep);
1211 }