heirloom-ed

view regexp.h @ 1:db609ba8ab93

Added tag 0.1 for changeset 1493bea5ac22
author markus schnalke <meillo@marmaro.de>
date Mon, 05 Sep 2011 16:36:26 +0200
parents
children
line source
1 /*
2 * Simple Regular Expression functions. Derived from Unix 7th Edition,
3 * /usr/src/cmd/expr.y
4 *
5 * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002.
6 *
7 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * Redistributions of source code and documentation must retain the
13 * above copyright notice, this list of conditions and the following
14 * disclaimer.
15 * Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed or owned by Caldera
21 * International, Inc.
22 * Neither the name of Caldera International, Inc. nor the names of
23 * other contributors may be used to endorse or promote products
24 * derived from this software without specific prior written permission.
25 *
26 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
27 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
31 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
35 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
36 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
37 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
40 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
41 #define REGEXP_H_USED __attribute__ ((used))
42 #elif defined __GNUC__
43 #define REGEXP_H_USED __attribute__ ((unused))
44 #else
45 #define REGEXP_H_USED
46 #endif
47 static const char regexp_h_sccsid[] REGEXP_H_USED =
48 "@(#)regexp.sl 1.56 (gritter) 5/29/05";
50 #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__)
51 #define REGEXP_H_WCHARS
52 #endif
54 #define CBRA 2
55 #define CCHR 4
56 #define CDOT 8
57 #define CCL 12
58 /* CLNUM 14 used in sed */
59 /* CEND 16 used in sed */
60 #define CDOL 20
61 #define CCEOF 22
62 #define CKET 24
63 #define CBACK 36
64 #define CNCL 40
65 #define CBRC 44
66 #define CLET 48
67 #define CCH1 52
68 #define CCH2 56
69 #define CCH3 60
71 #define STAR 01
72 #define RNGE 03
73 #define REGEXP_H_LEAST 0100
75 #ifdef REGEXP_H_WCHARS
76 #define CMB 0200
77 #else /* !REGEXP_H_WCHARS */
78 #define CMB 0
79 #endif /* !REGEXP_H_WCHARS */
81 #define NBRA 9
83 #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
84 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
86 #ifdef REGEXP_H_WCHARS
87 #define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07])
88 #endif
90 #include <ctype.h>
91 #include <string.h>
92 #include <limits.h>
93 #ifdef REGEXP_H_WCHARS
94 #include <stdlib.h>
95 #include <wchar.h>
96 #include <wctype.h>
97 #endif /* REGEXP_H_WCHARS */
99 #define regexp_h_uletter(c) (isalpha(c) || (c) == '_')
100 #ifdef REGEXP_H_WCHARS
101 #define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_')
103 /*
104 * Used to allocate memory for the multibyte star algorithm.
105 */
106 #ifndef regexp_h_malloc
107 #define regexp_h_malloc(n) malloc(n)
108 #endif
109 #ifndef regexp_h_free
110 #define regexp_h_free(p) free(p)
111 #endif
113 /*
114 * Can be predefined to 'inline' to inline some multibyte functions;
115 * may improve performance for files that contain many multibyte
116 * sequences.
117 */
118 #ifndef regexp_h_inline
119 #define regexp_h_inline
120 #endif
122 /*
123 * Mask to determine whether the first byte of a sequence possibly
124 * starts a multibyte character. Set to 0377 to force mbtowc() for
125 * any byte sequence (except 0).
126 */
127 #ifndef REGEXP_H_MASK
128 #define REGEXP_H_MASK 0200
129 #endif
130 #endif /* REGEXP_H_WCHARS */
132 /*
133 * For regexpr.h.
134 */
135 #ifndef regexp_h_static
136 #define regexp_h_static
137 #endif
138 #ifndef REGEXP_H_STEP_INIT
139 #define REGEXP_H_STEP_INIT
140 #endif
141 #ifndef REGEXP_H_ADVANCE_INIT
142 #define REGEXP_H_ADVANCE_INIT
143 #endif
145 char *braslist[NBRA];
146 char *braelist[NBRA];
147 int nbra;
148 char *loc1, *loc2, *locs;
149 int sed;
150 int nodelim;
152 regexp_h_static int circf;
153 regexp_h_static int low;
154 regexp_h_static int size;
156 regexp_h_static unsigned char bittab[] = {
157 1,
158 2,
159 4,
160 8,
161 16,
162 32,
163 64,
164 128
165 };
166 static int regexp_h_advance(register const char *lp,
167 register const char *ep);
168 static void regexp_h_getrnge(register const char *str, int least);
170 static const char *regexp_h_bol; /* beginning of input line (for \<) */
172 #ifdef REGEXP_H_WCHARS
173 static int regexp_h_wchars;
174 static int regexp_h_mbcurmax;
176 static const char *regexp_h_firstwc; /* location of first
177 multibyte character
178 on input line */
180 #define regexp_h_getwc(c) { \
181 if (regexp_h_wchars) { \
182 char mbbuf[MB_LEN_MAX + 1], *mbptr; \
183 wchar_t wcbuf; \
184 int mb, len; \
185 mbptr = mbbuf; \
186 do { \
187 mb = GETC(); \
188 *mbptr++ = mb; \
189 *mbptr = '\0'; \
190 } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \
191 && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \
192 if (len == -1) \
193 ERROR(67); \
194 c = wcbuf; \
195 } else { \
196 c = GETC(); \
197 } \
198 }
200 #define regexp_h_store(wc, mb, me) { \
201 int len; \
202 if (wc == WEOF) \
203 ERROR(67); \
204 if ((len = me - mb) <= regexp_h_mbcurmax) { \
205 char mt[MB_LEN_MAX]; \
206 if (wctomb(mt, wc) >= len) \
207 ERROR(50); \
208 } \
209 switch (len = wctomb(mb, wc)) { \
210 case -1: \
211 ERROR(67); \
212 case 0: \
213 mb++; \
214 break; \
215 default: \
216 mb += len; \
217 } \
218 }
220 static regexp_h_inline wint_t
221 regexp_h_fetchwc(const char **mb, int islp)
222 {
223 wchar_t wc;
224 int len;
226 if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) {
227 (*mb)++;
228 return WEOF;
229 }
230 if (islp && regexp_h_firstwc == NULL)
231 regexp_h_firstwc = *mb;
232 /*if (len == 0) {
233 (*mb)++;
234 return L'\0';
235 } handled in singlebyte code */
236 *mb += len;
237 return wc;
238 }
240 #define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \
241 (*(mb)++&0377): \
242 regexp_h_fetchwc(&(mb), islp))
244 static regexp_h_inline wint_t
245 regexp_h_showwc(const char *mb)
246 {
247 wchar_t wc;
249 if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0)
250 return WEOF;
251 return wc;
252 }
254 #define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \
255 regexp_h_showwc(mb))
257 /*
258 * Return the character immediately preceding mb. Since no byte is
259 * required to be the first byte of a character, the longest multibyte
260 * character ending at &[mb-1] is searched.
261 */
262 static regexp_h_inline wint_t
263 regexp_h_previous(const char *mb)
264 {
265 const char *p = mb;
266 wchar_t wc, lastwc = WEOF;
267 int len, max = 0;
269 if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc)
270 return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF);
271 while (p-- > regexp_h_bol) {
272 mbtowc(NULL, NULL, 0);
273 if ((len = mbtowc(&wc, p, mb - p)) >= 0) {
274 if (len < max || len < mb - p)
275 break;
276 max = len;
277 lastwc = wc;
278 } else if (len < 0 && max > 0)
279 break;
280 }
281 return lastwc;
282 }
284 #define regexp_h_cclass(set, c, af) \
285 ((c) == 0 || (c) == WEOF ? 0 : ( \
286 ((c) > 0177) ? \
287 regexp_h_cclass_wc(set, c, af) : ( \
288 REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \
289 ) \
290 ) \
291 )
293 static regexp_h_inline int
294 regexp_h_cclass_wc(const char *set, register wint_t c, int af)
295 {
296 register wint_t wc, wl = WEOF;
297 const char *end;
299 end = &set[18] + set[0] - 1;
300 set += 17;
301 while (set < end) {
302 wc = regexp_h_fetch(set, 0);
303 #ifdef REGEXP_H_VI_BACKSLASH
304 if (wc == '\\' && set < end &&
305 (*set == ']' || *set == '-' ||
306 *set == '^' || *set == '\\')) {
307 wc = regexp_h_fetch(set, 0);
308 } else
309 #endif /* REGEXP_H_VI_BACKSLASH */
310 if (wc == '-' && wl != WEOF && set < end) {
311 wc = regexp_h_fetch(set, 0);
312 #ifdef REGEXP_H_VI_BACKSLASH
313 if (wc == '\\' && set < end &&
314 (*set == ']' || *set == '-' ||
315 *set == '^' || *set == '\\')) {
316 wc = regexp_h_fetch(set, 0);
317 }
318 #endif /* REGEXP_H_VI_BACKSLASH */
319 if (c > wl && c < wc)
320 return af;
321 }
322 if (c == wc)
323 return af;
324 wl = wc;
325 }
326 return !af;
327 }
328 #else /* !REGEXP_H_WCHARS */
329 #define regexp_h_wchars 0
330 #define regexp_h_getwc(c) { c = GETC(); }
331 #endif /* !REGEXP_H_WCHARS */
333 regexp_h_static char *
334 compile(char *instring, char *ep, const char *endbuf, int seof)
335 {
336 INIT /* Dependent declarations and initializations */
337 register int c;
338 register int eof = seof;
339 char *lastep = instring;
340 int cclcnt;
341 char bracket[NBRA], *bracketp;
342 int closed;
343 char neg;
344 int lc;
345 int i, cflg;
347 #ifdef REGEXP_H_WCHARS
348 char *eq;
349 regexp_h_mbcurmax = MB_CUR_MAX;
350 regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0;
351 #endif
352 lastep = 0;
353 bracketp = bracket;
354 if((c = GETC()) == eof || c == '\n') {
355 if (c == '\n') {
356 UNGETC(c);
357 nodelim = 1;
358 }
359 if(*ep == 0 && !sed)
360 ERROR(41);
361 if (bracketp > bracket)
362 ERROR(42);
363 RETURN(ep);
364 }
365 circf = closed = nbra = 0;
366 if (c == '^')
367 circf++;
368 else
369 UNGETC(c);
370 for (;;) {
371 if (ep >= endbuf)
372 ERROR(50);
373 regexp_h_getwc(c);
374 if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
375 lastep = ep;
376 if (c == eof) {
377 *ep++ = CCEOF;
378 if (bracketp > bracket)
379 ERROR(42);
380 RETURN(ep);
381 }
382 switch (c) {
384 case '.':
385 *ep++ = CDOT|regexp_h_wchars;
386 continue;
388 case '\n':
389 if (sed == 0) {
390 UNGETC(c);
391 *ep++ = CCEOF;
392 nodelim = 1;
393 RETURN(ep);
394 }
395 ERROR(36);
396 case '*':
397 if (lastep==0 || *lastep==CBRA || *lastep==CKET ||
398 *lastep==(CBRC|regexp_h_wchars) ||
399 *lastep==(CLET|regexp_h_wchars))
400 goto defchar;
401 *lastep |= STAR;
402 continue;
404 case '$':
405 if(PEEKC() != eof)
406 goto defchar;
407 *ep++ = CDOL;
408 continue;
410 case '[':
411 #ifdef REGEXP_H_WCHARS
412 if (regexp_h_wchars == 0) {
413 #endif
414 if(&ep[33] >= endbuf)
415 ERROR(50);
417 *ep++ = CCL;
418 lc = 0;
419 for(i = 0; i < 32; i++)
420 ep[i] = 0;
422 neg = 0;
423 if((c = GETC()) == '^') {
424 neg = 1;
425 c = GETC();
426 }
428 do {
429 c &= 0377;
430 if(c == '\0' || c == '\n')
431 ERROR(49);
432 #ifdef REGEXP_H_VI_BACKSLASH
433 if(c == '\\' && ((c = PEEKC()) == ']' ||
434 c == '-' || c == '^' ||
435 c == '\\')) {
436 c = GETC();
437 c &= 0377;
438 } else
439 #endif /* REGEXP_H_VI_BACKSLASH */
440 if(c == '-' && lc != 0) {
441 if ((c = GETC()) == ']') {
442 PLACE('-');
443 break;
444 }
445 #ifdef REGEXP_H_VI_BACKSLASH
446 if(c == '\\' &&
447 ((c = PEEKC()) == ']' ||
448 c == '-' ||
449 c == '^' ||
450 c == '\\'))
451 c = GETC();
452 #endif /* REGEXP_H_VI_BACKSLASH */
453 c &= 0377;
454 while(lc < c) {
455 PLACE(lc);
456 lc++;
457 }
458 }
459 lc = c;
460 PLACE(c);
461 } while((c = GETC()) != ']');
462 if(neg) {
463 for(cclcnt = 0; cclcnt < 32; cclcnt++)
464 ep[cclcnt] ^= 0377;
465 ep[0] &= 0376;
466 }
468 ep += 32;
469 #ifdef REGEXP_H_WCHARS
470 } else {
471 if (&ep[18] >= endbuf)
472 ERROR(50);
473 *ep++ = CCL|CMB;
474 *ep++ = 0;
475 lc = 0;
476 for (i = 0; i < 16; i++)
477 ep[i] = 0;
478 eq = &ep[16];
479 regexp_h_getwc(c);
480 if (c == L'^') {
481 regexp_h_getwc(c);
482 ep[-2] = CNCL|CMB;
483 }
484 do {
485 if (c == '\0' || c == '\n')
486 ERROR(49);
487 #ifdef REGEXP_H_VI_BACKSLASH
488 if(c == '\\' && ((c = PEEKC()) == ']' ||
489 c == '-' || c == '^' ||
490 c == '\\')) {
491 regexp_h_store(c, eq, endbuf);
492 regexp_h_getwc(c);
493 } else
494 #endif /* REGEXP_H_VI_BACKSLASH */
495 if (c == '-' && lc != 0 && lc <= 0177) {
496 regexp_h_store(c, eq, endbuf);
497 regexp_h_getwc(c);
498 if (c == ']') {
499 PLACE('-');
500 break;
501 }
502 #ifdef REGEXP_H_VI_BACKSLASH
503 if(c == '\\' &&
504 ((c = PEEKC()) == ']' ||
505 c == '-' ||
506 c == '^' ||
507 c == '\\')) {
508 regexp_h_store(c, eq,
509 endbuf);
510 regexp_h_getwc(c);
511 }
512 #endif /* REGEXP_H_VI_BACKSLASH */
513 while (lc < (c & 0177)) {
514 PLACE(lc);
515 lc++;
516 }
517 }
518 lc = c;
519 if (c <= 0177)
520 PLACE(c);
521 regexp_h_store(c, eq, endbuf);
522 regexp_h_getwc(c);
523 } while (c != L']');
524 if ((i = eq - &ep[16]) > 255)
525 ERROR(50);
526 lastep[1] = i;
527 ep = eq;
528 }
529 #endif /* REGEXP_H_WCHARS */
531 continue;
533 case '\\':
534 regexp_h_getwc(c);
535 switch(c) {
537 case '(':
538 if(nbra >= NBRA)
539 ERROR(43);
540 *bracketp++ = nbra;
541 *ep++ = CBRA;
542 *ep++ = nbra++;
543 continue;
545 case ')':
546 if(bracketp <= bracket)
547 ERROR(42);
548 *ep++ = CKET;
549 *ep++ = *--bracketp;
550 closed++;
551 continue;
553 case '<':
554 *ep++ = CBRC|regexp_h_wchars;
555 continue;
557 case '>':
558 *ep++ = CLET|regexp_h_wchars;
559 continue;
561 case '{':
562 if(lastep == (char *) (0))
563 goto defchar;
564 *lastep |= RNGE;
565 cflg = 0;
566 nlim:
567 c = GETC();
568 i = 0;
569 do {
570 if ('0' <= c && c <= '9')
571 i = 10 * i + c - '0';
572 else
573 ERROR(16);
574 } while(((c = GETC()) != '\\') && (c != ','));
575 if (i > 255)
576 ERROR(11);
577 *ep++ = i;
578 if (c == ',') {
579 if(cflg++)
580 ERROR(44);
581 if((c = GETC()) == '\\') {
582 *ep++ = (char)255;
583 *lastep |= REGEXP_H_LEAST;
584 } else {
585 UNGETC(c);
586 goto nlim; /* get 2'nd number */
587 }
588 }
589 if(GETC() != '}')
590 ERROR(45);
591 if(!cflg) /* one number */
592 *ep++ = i;
593 else if((ep[-1] & 0377) < (ep[-2] & 0377))
594 ERROR(46);
595 continue;
597 case '\n':
598 ERROR(36);
600 case 'n':
601 c = '\n';
602 goto defchar;
604 default:
605 if(c >= '1' && c <= '9') {
606 if((c -= '1') >= closed)
607 ERROR(25);
608 *ep++ = CBACK;
609 *ep++ = c;
610 continue;
611 }
612 }
613 /* Drop through to default to use \ to turn off special chars */
615 defchar:
616 default:
617 lastep = ep;
618 #ifdef REGEXP_H_WCHARS
619 if (regexp_h_wchars == 0) {
620 #endif
621 *ep++ = CCHR;
622 *ep++ = c;
623 #ifdef REGEXP_H_WCHARS
624 } else {
625 char mbbuf[MB_LEN_MAX];
627 switch (wctomb(mbbuf, c)) {
628 case 1: *ep++ = CCH1;
629 break;
630 case 2: *ep++ = CCH2;
631 break;
632 case 3: *ep++ = CCH3;
633 break;
634 default:
635 *ep++ = CCHR|CMB;
636 }
637 regexp_h_store(c, ep, endbuf);
638 }
639 #endif /* REGEXP_H_WCHARS */
640 }
641 }
642 }
644 int
645 step(const char *p1, const char *p2)
646 {
647 register int c;
648 #ifdef REGEXP_H_WCHARS
649 register int d;
650 #endif /* REGEXP_H_WCHARS */
652 REGEXP_H_STEP_INIT /* get circf */
653 regexp_h_bol = p1;
654 #ifdef REGEXP_H_WCHARS
655 regexp_h_firstwc = NULL;
656 #endif /* REGEXP_H_WCHARS */
657 if (circf) {
658 loc1 = (char *)p1;
659 return(regexp_h_advance(p1, p2));
660 }
661 /* fast check for first character */
662 if (*p2==CCHR) {
663 c = p2[1] & 0377;
664 do {
665 if ((*p1 & 0377) != c)
666 continue;
667 if (regexp_h_advance(p1, p2)) {
668 loc1 = (char *)p1;
669 return(1);
670 }
671 } while (*p1++);
672 return(0);
673 }
674 #ifdef REGEXP_H_WCHARS
675 else if (*p2==CCH1) {
676 do {
677 if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) {
678 loc1 = (char *)p1;
679 return(1);
680 }
681 c = regexp_h_fetch(p1, 1);
682 } while (c);
683 return(0);
684 } else if (*p2==CCH2) {
685 do {
686 if (p1[0] == p2[1] && p1[1] == p2[2] &&
687 regexp_h_advance(p1, p2)) {
688 loc1 = (char *)p1;
689 return(1);
690 }
691 c = regexp_h_fetch(p1, 1);
692 } while (c);
693 return(0);
694 } else if (*p2==CCH3) {
695 do {
696 if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&&
697 regexp_h_advance(p1, p2)) {
698 loc1 = (char *)p1;
699 return(1);
700 }
701 c = regexp_h_fetch(p1, 1);
702 } while (c);
703 return(0);
704 } else if ((*p2&0377)==(CCHR|CMB)) {
705 d = regexp_h_fetch(p2, 0);
706 do {
707 c = regexp_h_fetch(p1, 1);
708 if (c == d && regexp_h_advance(p1, p2)) {
709 loc1 = (char *)p1;
710 return(1);
711 }
712 } while(c);
713 return(0);
714 }
715 /* regular algorithm */
716 if (regexp_h_wchars)
717 do {
718 if (regexp_h_advance(p1, p2)) {
719 loc1 = (char *)p1;
720 return(1);
721 }
722 c = regexp_h_fetch(p1, 1);
723 } while (c);
724 else
725 #endif /* REGEXP_H_WCHARS */
726 do {
727 if (regexp_h_advance(p1, p2)) {
728 loc1 = (char *)p1;
729 return(1);
730 }
731 } while (*p1++);
732 return(0);
733 }
735 #ifdef REGEXP_H_WCHARS
736 /*
737 * It is painfully slow to read character-wise backwards in a
738 * multibyte string (see regexp_h_previous() above). For the star
739 * algorithm, we therefore keep track of every character as it is
740 * read in forward direction.
741 *
742 * Don't use alloca() for stack blocks since there is no measurable
743 * speedup and huge amounts of memory are used up for long input
744 * lines.
745 */
746 #ifndef REGEXP_H_STAKBLOK
747 #define REGEXP_H_STAKBLOK 1000
748 #endif
750 struct regexp_h_stack {
751 struct regexp_h_stack *s_nxt;
752 struct regexp_h_stack *s_prv;
753 const char *s_ptr[REGEXP_H_STAKBLOK];
754 };
756 #define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \
757 regexp_h_pushwc(sb, sp, sc, lp) : (void)0)
759 static regexp_h_inline void
760 regexp_h_pushwc(struct regexp_h_stack **sb,
761 struct regexp_h_stack **sp,
762 const char ***sc, const char *lp)
763 {
764 if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc)
765 return;
766 if (*sb == NULL) {
767 if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL)
768 return;
769 (*sb)->s_nxt = (*sb)->s_prv = NULL;
770 *sp = *sb;
771 *sc = &(*sb)->s_ptr[0];
772 } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) {
773 if ((*sp)->s_nxt == NULL) {
774 struct regexp_h_stack *bq;
776 if ((bq = regexp_h_malloc(sizeof *bq)) == NULL)
777 return;
778 bq->s_nxt = NULL;
779 bq->s_prv = *sp;
780 (*sp)->s_nxt = bq;
781 *sp = bq;
782 } else
783 *sp = (*sp)->s_nxt;
784 *sc = &(*sp)->s_ptr[0];
785 }
786 *(*sc)++ = lp;
787 }
789 static regexp_h_inline const char *
790 regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp,
791 const char ***sc, const char *lp)
792 {
793 if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc)
794 return &lp[-1];
795 if (*sp == NULL)
796 return regexp_h_firstwc;
797 if (*sc == &(*sp)->s_ptr[0]) {
798 if ((*sp)->s_prv == NULL) {
799 regexp_h_free(*sp);
800 *sp = NULL;
801 *sb = NULL;
802 return regexp_h_firstwc;
803 }
804 *sp = (*sp)->s_prv;
805 regexp_h_free((*sp)->s_nxt);
806 (*sp)->s_nxt = NULL ;
807 *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK];
808 }
809 return *(--(*sc));
810 }
812 static void
813 regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp)
814 {
815 for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt)
816 if ((*sp)->s_prv)
817 regexp_h_free((*sp)->s_prv);
818 if (*sp) {
819 if ((*sp)->s_prv)
820 regexp_h_free((*sp)->s_prv);
821 regexp_h_free(*sp);
822 }
823 *sp = *sb = NULL;
824 }
825 #else /* !REGEXP_H_WCHARS */
826 #define regexp_h_push(sb, sp, sc, lp)
827 #endif /* !REGEXP_H_WCHARS */
829 static int
830 regexp_h_advance(const char *lp, const char *ep)
831 {
832 register const char *curlp;
833 int c, least;
834 #ifdef REGEXP_H_WCHARS
835 int d;
836 struct regexp_h_stack *sb = NULL, *sp = NULL;
837 const char **sc;
838 #endif /* REGEXP_H_WCHARS */
839 char *bbeg;
840 int ct;
842 for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) {
844 case CCHR:
845 #ifdef REGEXP_H_WCHARS
846 case CCH1:
847 #endif
848 if (*ep++ == *lp++)
849 continue;
850 return(0);
852 #ifdef REGEXP_H_WCHARS
853 case CCHR|CMB:
854 if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1))
855 continue;
856 return(0);
858 case CCH2:
859 if (ep[0] == lp[0] && ep[1] == lp[1]) {
860 ep += 2, lp += 2;
861 continue;
862 }
863 return(0);
865 case CCH3:
866 if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) {
867 ep += 3, lp += 3;
868 continue;
869 }
870 return(0);
871 #endif /* REGEXP_H_WCHARS */
873 case CDOT:
874 if (*lp++)
875 continue;
876 return(0);
877 #ifdef REGEXP_H_WCHARS
878 case CDOT|CMB:
879 if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF)
880 continue;
881 return(0);
882 #endif /* REGEXP_H_WCHARS */
884 case CDOL:
885 if (*lp==0)
886 continue;
887 return(0);
889 case CCEOF:
890 loc2 = (char *)lp;
891 return(1);
893 case CCL:
894 c = *lp++ & 0377;
895 if(ISTHERE(c)) {
896 ep += 32;
897 continue;
898 }
899 return(0);
901 #ifdef REGEXP_H_WCHARS
902 case CCL|CMB:
903 case CNCL|CMB:
904 c = regexp_h_fetch(lp, 1);
905 if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) {
906 ep += (*ep & 0377) + 17;
907 continue;
908 }
909 return 0;
910 #endif /* REGEXP_H_WCHARS */
912 case CBRA:
913 braslist[*ep++ & 0377] = (char *)lp;
914 continue;
916 case CKET:
917 braelist[*ep++ & 0377] = (char *)lp;
918 continue;
920 case CBRC:
921 if (lp == regexp_h_bol && locs == NULL)
922 continue;
923 if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377))
924 && !regexp_h_uletter(lp[-1] & 0377)
925 && !isdigit(lp[-1] & 0377))
926 continue;
927 return(0);
929 #ifdef REGEXP_H_WCHARS
930 case CBRC|CMB:
931 c = regexp_h_show(lp);
932 d = regexp_h_previous(lp);
933 if ((iswdigit(c) || regexp_h_wuletter(c))
934 && !regexp_h_wuletter(d)
935 && !iswdigit(d))
936 continue;
937 return(0);
938 #endif /* REGEXP_H_WCHARS */
940 case CLET:
941 if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377))
942 continue;
943 return(0);
945 #ifdef REGEXP_H_WCHARS
946 case CLET|CMB:
947 c = regexp_h_show(lp);
948 if (!regexp_h_wuletter(c) && !iswdigit(c))
949 continue;
950 return(0);
951 #endif /* REGEXP_H_WCHARS */
953 case CCHR|RNGE:
954 c = *ep++;
955 regexp_h_getrnge(ep, least);
956 while(low--)
957 if(*lp++ != c)
958 return(0);
959 curlp = lp;
960 while(size--) {
961 regexp_h_push(&sb, &sp, &sc, lp);
962 if(*lp++ != c)
963 break;
964 }
965 if(size < 0) {
966 regexp_h_push(&sb, &sp, &sc, lp);
967 lp++;
968 }
969 ep += 2;
970 goto star;
972 #ifdef REGEXP_H_WCHARS
973 case CCHR|RNGE|CMB:
974 case CCH1|RNGE:
975 case CCH2|RNGE:
976 case CCH3|RNGE:
977 c = regexp_h_fetch(ep, 0);
978 regexp_h_getrnge(ep, least);
979 while (low--)
980 if (regexp_h_fetch(lp, 1) != c)
981 return 0;
982 curlp = lp;
983 while (size--) {
984 regexp_h_push(&sb, &sp, &sc, lp);
985 if (regexp_h_fetch(lp, 1) != c)
986 break;
987 }
988 if(size < 0) {
989 regexp_h_push(&sb, &sp, &sc, lp);
990 regexp_h_fetch(lp, 1);
991 }
992 ep += 2;
993 goto star;
994 #endif /* REGEXP_H_WCHARS */
996 case CDOT|RNGE:
997 regexp_h_getrnge(ep, least);
998 while(low--)
999 if(*lp++ == '\0')
1000 return(0);
1001 curlp = lp;
1002 while(size--) {
1003 regexp_h_push(&sb, &sp, &sc, lp);
1004 if(*lp++ == '\0')
1005 break;
1007 if(size < 0) {
1008 regexp_h_push(&sb, &sp, &sc, lp);
1009 lp++;
1011 ep += 2;
1012 goto star;
1014 #ifdef REGEXP_H_WCHARS
1015 case CDOT|RNGE|CMB:
1016 regexp_h_getrnge(ep, least);
1017 while (low--)
1018 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
1019 return 0;
1020 curlp = lp;
1021 while (size--) {
1022 regexp_h_push(&sb, &sp, &sc, lp);
1023 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
1024 break;
1026 if (size < 0) {
1027 regexp_h_push(&sb, &sp, &sc, lp);
1028 regexp_h_fetch(lp, 1);
1030 ep += 2;
1031 goto star;
1032 #endif /* REGEXP_H_WCHARS */
1034 case CCL|RNGE:
1035 regexp_h_getrnge(ep + 32, least);
1036 while(low--) {
1037 c = *lp++ & 0377;
1038 if(!ISTHERE(c))
1039 return(0);
1041 curlp = lp;
1042 while(size--) {
1043 regexp_h_push(&sb, &sp, &sc, lp);
1044 c = *lp++ & 0377;
1045 if(!ISTHERE(c))
1046 break;
1048 if(size < 0) {
1049 regexp_h_push(&sb, &sp, &sc, lp);
1050 lp++;
1052 ep += 34; /* 32 + 2 */
1053 goto star;
1055 #ifdef REGEXP_H_WCHARS
1056 case CCL|RNGE|CMB:
1057 case CNCL|RNGE|CMB:
1058 regexp_h_getrnge(ep + (*ep & 0377) + 17, least);
1059 while (low--) {
1060 c = regexp_h_fetch(lp, 1);
1061 if (!regexp_h_cclass(ep, c,
1062 (ep[-1] & 0377 & ~REGEXP_H_LEAST)
1063 == (CCL|RNGE|CMB)))
1064 return 0;
1066 curlp = lp;
1067 while (size--) {
1068 regexp_h_push(&sb, &sp, &sc, lp);
1069 c = regexp_h_fetch(lp, 1);
1070 if (!regexp_h_cclass(ep, c,
1071 (ep[-1] & 0377 & ~REGEXP_H_LEAST)
1072 == (CCL|RNGE|CMB)))
1073 break;
1075 if (size < 0) {
1076 regexp_h_push(&sb, &sp, &sc, lp);
1077 regexp_h_fetch(lp, 1);
1079 ep += (*ep & 0377) + 19;
1080 goto star;
1081 #endif /* REGEXP_H_WCHARS */
1083 case CBACK:
1084 bbeg = braslist[*ep & 0377];
1085 ct = braelist[*ep++ & 0377] - bbeg;
1087 if(strncmp(bbeg, lp, ct) == 0) {
1088 lp += ct;
1089 continue;
1091 return(0);
1093 case CBACK|STAR:
1094 bbeg = braslist[*ep & 0377];
1095 ct = braelist[*ep++ & 0377] - bbeg;
1096 curlp = lp;
1097 while(strncmp(bbeg, lp, ct) == 0)
1098 lp += ct;
1100 while(lp >= curlp) {
1101 if(regexp_h_advance(lp, ep)) return(1);
1102 lp -= ct;
1104 return(0);
1107 case CDOT|STAR:
1108 curlp = lp;
1109 do
1110 regexp_h_push(&sb, &sp, &sc, lp);
1111 while (*lp++);
1112 goto star;
1114 #ifdef REGEXP_H_WCHARS
1115 case CDOT|STAR|CMB:
1116 curlp = lp;
1117 do
1118 regexp_h_push(&sb, &sp, &sc, lp);
1119 while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF);
1120 goto star;
1121 #endif /* REGEXP_H_WCHARS */
1123 case CCHR|STAR:
1124 curlp = lp;
1125 do
1126 regexp_h_push(&sb, &sp, &sc, lp);
1127 while (*lp++ == *ep);
1128 ep++;
1129 goto star;
1131 #ifdef REGEXP_H_WCHARS
1132 case CCHR|STAR|CMB:
1133 case CCH1|STAR:
1134 case CCH2|STAR:
1135 case CCH3|STAR:
1136 curlp = lp;
1137 d = regexp_h_fetch(ep, 0);
1138 do
1139 regexp_h_push(&sb, &sp, &sc, lp);
1140 while (regexp_h_fetch(lp, 1) == d);
1141 goto star;
1142 #endif /* REGEXP_H_WCHARS */
1144 case CCL|STAR:
1145 curlp = lp;
1146 do {
1147 regexp_h_push(&sb, &sp, &sc, lp);
1148 c = *lp++ & 0377;
1149 } while(ISTHERE(c));
1150 ep += 32;
1151 goto star;
1153 #ifdef REGEXP_H_WCHARS
1154 case CCL|STAR|CMB:
1155 case CNCL|STAR|CMB:
1156 curlp = lp;
1157 do {
1158 regexp_h_push(&sb, &sp, &sc, lp);
1159 c = regexp_h_fetch(lp, 1);
1160 } while (regexp_h_cclass(ep, c, (ep[-1] & 0377)
1161 == (CCL|STAR|CMB)));
1162 ep += (*ep & 0377) + 17;
1163 goto star;
1164 #endif /* REGEXP_H_WCHARS */
1166 star:
1167 #ifdef REGEXP_H_WCHARS
1168 if (regexp_h_wchars == 0) {
1169 #endif
1170 do {
1171 if(--lp == locs)
1172 break;
1173 if (regexp_h_advance(lp, ep))
1174 return(1);
1175 } while (lp > curlp);
1176 #ifdef REGEXP_H_WCHARS
1177 } else {
1178 do {
1179 lp = regexp_h_pop(&sb, &sp, &sc, lp);
1180 if (lp <= locs)
1181 break;
1182 if (regexp_h_advance(lp, ep)) {
1183 regexp_h_zerostak(&sb, &sp);
1184 return(1);
1186 } while (lp > curlp);
1187 regexp_h_zerostak(&sb, &sp);
1189 #endif /* REGEXP_H_WCHARS */
1190 return(0);
1195 static void
1196 regexp_h_getrnge(register const char *str, int least)
1198 low = *str++ & 0377;
1199 size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low;
1202 int
1203 advance(const char *lp, const char *ep)
1205 REGEXP_H_ADVANCE_INIT /* skip past circf */
1206 regexp_h_bol = lp;
1207 #ifdef REGEXP_H_WCHARS
1208 regexp_h_firstwc = NULL;
1209 #endif /* REGEXP_H_WCHARS */
1210 return regexp_h_advance(lp, ep);