Mercurial > heirloom-ed
comparison regexp.h @ 0:1493bea5ac22 0.1
Initial version of the standalone heirloom-ed
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Mon, 05 Sep 2011 16:31:35 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1493bea5ac22 |
---|---|
1 /* | |
2 * Simple Regular Expression functions. Derived from Unix 7th Edition, | |
3 * /usr/src/cmd/expr.y | |
4 * | |
5 * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002. | |
6 * | |
7 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. | |
8 * | |
9 * Redistribution and use in source and binary forms, with or without | |
10 * modification, are permitted provided that the following conditions | |
11 * are met: | |
12 * Redistributions of source code and documentation must retain the | |
13 * above copyright notice, this list of conditions and the following | |
14 * disclaimer. | |
15 * Redistributions in binary form must reproduce the above copyright | |
16 * notice, this list of conditions and the following disclaimer in the | |
17 * documentation and/or other materials provided with the distribution. | |
18 * All advertising materials mentioning features or use of this software | |
19 * must display the following acknowledgement: | |
20 * This product includes software developed or owned by Caldera | |
21 * International, Inc. | |
22 * Neither the name of Caldera International, Inc. nor the names of | |
23 * other contributors may be used to endorse or promote products | |
24 * derived from this software without specific prior written permission. | |
25 * | |
26 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA | |
27 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR | |
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
30 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE | |
31 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | |
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
35 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | |
36 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | |
37 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
38 */ | |
39 | |
40 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 | |
41 #define REGEXP_H_USED __attribute__ ((used)) | |
42 #elif defined __GNUC__ | |
43 #define REGEXP_H_USED __attribute__ ((unused)) | |
44 #else | |
45 #define REGEXP_H_USED | |
46 #endif | |
47 static const char regexp_h_sccsid[] REGEXP_H_USED = | |
48 "@(#)regexp.sl 1.56 (gritter) 5/29/05"; | |
49 | |
50 #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__) | |
51 #define REGEXP_H_WCHARS | |
52 #endif | |
53 | |
54 #define CBRA 2 | |
55 #define CCHR 4 | |
56 #define CDOT 8 | |
57 #define CCL 12 | |
58 /* CLNUM 14 used in sed */ | |
59 /* CEND 16 used in sed */ | |
60 #define CDOL 20 | |
61 #define CCEOF 22 | |
62 #define CKET 24 | |
63 #define CBACK 36 | |
64 #define CNCL 40 | |
65 #define CBRC 44 | |
66 #define CLET 48 | |
67 #define CCH1 52 | |
68 #define CCH2 56 | |
69 #define CCH3 60 | |
70 | |
71 #define STAR 01 | |
72 #define RNGE 03 | |
73 #define REGEXP_H_LEAST 0100 | |
74 | |
75 #ifdef REGEXP_H_WCHARS | |
76 #define CMB 0200 | |
77 #else /* !REGEXP_H_WCHARS */ | |
78 #define CMB 0 | |
79 #endif /* !REGEXP_H_WCHARS */ | |
80 | |
81 #define NBRA 9 | |
82 | |
83 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] | |
84 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) | |
85 | |
86 #ifdef REGEXP_H_WCHARS | |
87 #define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07]) | |
88 #endif | |
89 | |
90 #include <ctype.h> | |
91 #include <string.h> | |
92 #include <limits.h> | |
93 #ifdef REGEXP_H_WCHARS | |
94 #include <stdlib.h> | |
95 #include <wchar.h> | |
96 #include <wctype.h> | |
97 #endif /* REGEXP_H_WCHARS */ | |
98 | |
99 #define regexp_h_uletter(c) (isalpha(c) || (c) == '_') | |
100 #ifdef REGEXP_H_WCHARS | |
101 #define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_') | |
102 | |
103 /* | |
104 * Used to allocate memory for the multibyte star algorithm. | |
105 */ | |
106 #ifndef regexp_h_malloc | |
107 #define regexp_h_malloc(n) malloc(n) | |
108 #endif | |
109 #ifndef regexp_h_free | |
110 #define regexp_h_free(p) free(p) | |
111 #endif | |
112 | |
113 /* | |
114 * Can be predefined to 'inline' to inline some multibyte functions; | |
115 * may improve performance for files that contain many multibyte | |
116 * sequences. | |
117 */ | |
118 #ifndef regexp_h_inline | |
119 #define regexp_h_inline | |
120 #endif | |
121 | |
122 /* | |
123 * Mask to determine whether the first byte of a sequence possibly | |
124 * starts a multibyte character. Set to 0377 to force mbtowc() for | |
125 * any byte sequence (except 0). | |
126 */ | |
127 #ifndef REGEXP_H_MASK | |
128 #define REGEXP_H_MASK 0200 | |
129 #endif | |
130 #endif /* REGEXP_H_WCHARS */ | |
131 | |
132 /* | |
133 * For regexpr.h. | |
134 */ | |
135 #ifndef regexp_h_static | |
136 #define regexp_h_static | |
137 #endif | |
138 #ifndef REGEXP_H_STEP_INIT | |
139 #define REGEXP_H_STEP_INIT | |
140 #endif | |
141 #ifndef REGEXP_H_ADVANCE_INIT | |
142 #define REGEXP_H_ADVANCE_INIT | |
143 #endif | |
144 | |
145 char *braslist[NBRA]; | |
146 char *braelist[NBRA]; | |
147 int nbra; | |
148 char *loc1, *loc2, *locs; | |
149 int sed; | |
150 int nodelim; | |
151 | |
152 regexp_h_static int circf; | |
153 regexp_h_static int low; | |
154 regexp_h_static int size; | |
155 | |
156 regexp_h_static unsigned char bittab[] = { | |
157 1, | |
158 2, | |
159 4, | |
160 8, | |
161 16, | |
162 32, | |
163 64, | |
164 128 | |
165 }; | |
166 static int regexp_h_advance(register const char *lp, | |
167 register const char *ep); | |
168 static void regexp_h_getrnge(register const char *str, int least); | |
169 | |
170 static const char *regexp_h_bol; /* beginning of input line (for \<) */ | |
171 | |
172 #ifdef REGEXP_H_WCHARS | |
173 static int regexp_h_wchars; | |
174 static int regexp_h_mbcurmax; | |
175 | |
176 static const char *regexp_h_firstwc; /* location of first | |
177 multibyte character | |
178 on input line */ | |
179 | |
180 #define regexp_h_getwc(c) { \ | |
181 if (regexp_h_wchars) { \ | |
182 char mbbuf[MB_LEN_MAX + 1], *mbptr; \ | |
183 wchar_t wcbuf; \ | |
184 int mb, len; \ | |
185 mbptr = mbbuf; \ | |
186 do { \ | |
187 mb = GETC(); \ | |
188 *mbptr++ = mb; \ | |
189 *mbptr = '\0'; \ | |
190 } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \ | |
191 && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \ | |
192 if (len == -1) \ | |
193 ERROR(67); \ | |
194 c = wcbuf; \ | |
195 } else { \ | |
196 c = GETC(); \ | |
197 } \ | |
198 } | |
199 | |
200 #define regexp_h_store(wc, mb, me) { \ | |
201 int len; \ | |
202 if (wc == WEOF) \ | |
203 ERROR(67); \ | |
204 if ((len = me - mb) <= regexp_h_mbcurmax) { \ | |
205 char mt[MB_LEN_MAX]; \ | |
206 if (wctomb(mt, wc) >= len) \ | |
207 ERROR(50); \ | |
208 } \ | |
209 switch (len = wctomb(mb, wc)) { \ | |
210 case -1: \ | |
211 ERROR(67); \ | |
212 case 0: \ | |
213 mb++; \ | |
214 break; \ | |
215 default: \ | |
216 mb += len; \ | |
217 } \ | |
218 } | |
219 | |
220 static regexp_h_inline wint_t | |
221 regexp_h_fetchwc(const char **mb, int islp) | |
222 { | |
223 wchar_t wc; | |
224 int len; | |
225 | |
226 if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) { | |
227 (*mb)++; | |
228 return WEOF; | |
229 } | |
230 if (islp && regexp_h_firstwc == NULL) | |
231 regexp_h_firstwc = *mb; | |
232 /*if (len == 0) { | |
233 (*mb)++; | |
234 return L'\0'; | |
235 } handled in singlebyte code */ | |
236 *mb += len; | |
237 return wc; | |
238 } | |
239 | |
240 #define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \ | |
241 (*(mb)++&0377): \ | |
242 regexp_h_fetchwc(&(mb), islp)) | |
243 | |
244 static regexp_h_inline wint_t | |
245 regexp_h_showwc(const char *mb) | |
246 { | |
247 wchar_t wc; | |
248 | |
249 if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0) | |
250 return WEOF; | |
251 return wc; | |
252 } | |
253 | |
254 #define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \ | |
255 regexp_h_showwc(mb)) | |
256 | |
257 /* | |
258 * Return the character immediately preceding mb. Since no byte is | |
259 * required to be the first byte of a character, the longest multibyte | |
260 * character ending at &[mb-1] is searched. | |
261 */ | |
262 static regexp_h_inline wint_t | |
263 regexp_h_previous(const char *mb) | |
264 { | |
265 const char *p = mb; | |
266 wchar_t wc, lastwc = WEOF; | |
267 int len, max = 0; | |
268 | |
269 if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc) | |
270 return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF); | |
271 while (p-- > regexp_h_bol) { | |
272 mbtowc(NULL, NULL, 0); | |
273 if ((len = mbtowc(&wc, p, mb - p)) >= 0) { | |
274 if (len < max || len < mb - p) | |
275 break; | |
276 max = len; | |
277 lastwc = wc; | |
278 } else if (len < 0 && max > 0) | |
279 break; | |
280 } | |
281 return lastwc; | |
282 } | |
283 | |
284 #define regexp_h_cclass(set, c, af) \ | |
285 ((c) == 0 || (c) == WEOF ? 0 : ( \ | |
286 ((c) > 0177) ? \ | |
287 regexp_h_cclass_wc(set, c, af) : ( \ | |
288 REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \ | |
289 ) \ | |
290 ) \ | |
291 ) | |
292 | |
293 static regexp_h_inline int | |
294 regexp_h_cclass_wc(const char *set, register wint_t c, int af) | |
295 { | |
296 register wint_t wc, wl = WEOF; | |
297 const char *end; | |
298 | |
299 end = &set[18] + set[0] - 1; | |
300 set += 17; | |
301 while (set < end) { | |
302 wc = regexp_h_fetch(set, 0); | |
303 #ifdef REGEXP_H_VI_BACKSLASH | |
304 if (wc == '\\' && set < end && | |
305 (*set == ']' || *set == '-' || | |
306 *set == '^' || *set == '\\')) { | |
307 wc = regexp_h_fetch(set, 0); | |
308 } else | |
309 #endif /* REGEXP_H_VI_BACKSLASH */ | |
310 if (wc == '-' && wl != WEOF && set < end) { | |
311 wc = regexp_h_fetch(set, 0); | |
312 #ifdef REGEXP_H_VI_BACKSLASH | |
313 if (wc == '\\' && set < end && | |
314 (*set == ']' || *set == '-' || | |
315 *set == '^' || *set == '\\')) { | |
316 wc = regexp_h_fetch(set, 0); | |
317 } | |
318 #endif /* REGEXP_H_VI_BACKSLASH */ | |
319 if (c > wl && c < wc) | |
320 return af; | |
321 } | |
322 if (c == wc) | |
323 return af; | |
324 wl = wc; | |
325 } | |
326 return !af; | |
327 } | |
328 #else /* !REGEXP_H_WCHARS */ | |
329 #define regexp_h_wchars 0 | |
330 #define regexp_h_getwc(c) { c = GETC(); } | |
331 #endif /* !REGEXP_H_WCHARS */ | |
332 | |
333 regexp_h_static char * | |
334 compile(char *instring, char *ep, const char *endbuf, int seof) | |
335 { | |
336 INIT /* Dependent declarations and initializations */ | |
337 register int c; | |
338 register int eof = seof; | |
339 char *lastep = instring; | |
340 int cclcnt; | |
341 char bracket[NBRA], *bracketp; | |
342 int closed; | |
343 char neg; | |
344 int lc; | |
345 int i, cflg; | |
346 | |
347 #ifdef REGEXP_H_WCHARS | |
348 char *eq; | |
349 regexp_h_mbcurmax = MB_CUR_MAX; | |
350 regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0; | |
351 #endif | |
352 lastep = 0; | |
353 bracketp = bracket; | |
354 if((c = GETC()) == eof || c == '\n') { | |
355 if (c == '\n') { | |
356 UNGETC(c); | |
357 nodelim = 1; | |
358 } | |
359 if(*ep == 0 && !sed) | |
360 ERROR(41); | |
361 if (bracketp > bracket) | |
362 ERROR(42); | |
363 RETURN(ep); | |
364 } | |
365 circf = closed = nbra = 0; | |
366 if (c == '^') | |
367 circf++; | |
368 else | |
369 UNGETC(c); | |
370 for (;;) { | |
371 if (ep >= endbuf) | |
372 ERROR(50); | |
373 regexp_h_getwc(c); | |
374 if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) | |
375 lastep = ep; | |
376 if (c == eof) { | |
377 *ep++ = CCEOF; | |
378 if (bracketp > bracket) | |
379 ERROR(42); | |
380 RETURN(ep); | |
381 } | |
382 switch (c) { | |
383 | |
384 case '.': | |
385 *ep++ = CDOT|regexp_h_wchars; | |
386 continue; | |
387 | |
388 case '\n': | |
389 if (sed == 0) { | |
390 UNGETC(c); | |
391 *ep++ = CCEOF; | |
392 nodelim = 1; | |
393 RETURN(ep); | |
394 } | |
395 ERROR(36); | |
396 case '*': | |
397 if (lastep==0 || *lastep==CBRA || *lastep==CKET || | |
398 *lastep==(CBRC|regexp_h_wchars) || | |
399 *lastep==(CLET|regexp_h_wchars)) | |
400 goto defchar; | |
401 *lastep |= STAR; | |
402 continue; | |
403 | |
404 case '$': | |
405 if(PEEKC() != eof) | |
406 goto defchar; | |
407 *ep++ = CDOL; | |
408 continue; | |
409 | |
410 case '[': | |
411 #ifdef REGEXP_H_WCHARS | |
412 if (regexp_h_wchars == 0) { | |
413 #endif | |
414 if(&ep[33] >= endbuf) | |
415 ERROR(50); | |
416 | |
417 *ep++ = CCL; | |
418 lc = 0; | |
419 for(i = 0; i < 32; i++) | |
420 ep[i] = 0; | |
421 | |
422 neg = 0; | |
423 if((c = GETC()) == '^') { | |
424 neg = 1; | |
425 c = GETC(); | |
426 } | |
427 | |
428 do { | |
429 c &= 0377; | |
430 if(c == '\0' || c == '\n') | |
431 ERROR(49); | |
432 #ifdef REGEXP_H_VI_BACKSLASH | |
433 if(c == '\\' && ((c = PEEKC()) == ']' || | |
434 c == '-' || c == '^' || | |
435 c == '\\')) { | |
436 c = GETC(); | |
437 c &= 0377; | |
438 } else | |
439 #endif /* REGEXP_H_VI_BACKSLASH */ | |
440 if(c == '-' && lc != 0) { | |
441 if ((c = GETC()) == ']') { | |
442 PLACE('-'); | |
443 break; | |
444 } | |
445 #ifdef REGEXP_H_VI_BACKSLASH | |
446 if(c == '\\' && | |
447 ((c = PEEKC()) == ']' || | |
448 c == '-' || | |
449 c == '^' || | |
450 c == '\\')) | |
451 c = GETC(); | |
452 #endif /* REGEXP_H_VI_BACKSLASH */ | |
453 c &= 0377; | |
454 while(lc < c) { | |
455 PLACE(lc); | |
456 lc++; | |
457 } | |
458 } | |
459 lc = c; | |
460 PLACE(c); | |
461 } while((c = GETC()) != ']'); | |
462 if(neg) { | |
463 for(cclcnt = 0; cclcnt < 32; cclcnt++) | |
464 ep[cclcnt] ^= 0377; | |
465 ep[0] &= 0376; | |
466 } | |
467 | |
468 ep += 32; | |
469 #ifdef REGEXP_H_WCHARS | |
470 } else { | |
471 if (&ep[18] >= endbuf) | |
472 ERROR(50); | |
473 *ep++ = CCL|CMB; | |
474 *ep++ = 0; | |
475 lc = 0; | |
476 for (i = 0; i < 16; i++) | |
477 ep[i] = 0; | |
478 eq = &ep[16]; | |
479 regexp_h_getwc(c); | |
480 if (c == L'^') { | |
481 regexp_h_getwc(c); | |
482 ep[-2] = CNCL|CMB; | |
483 } | |
484 do { | |
485 if (c == '\0' || c == '\n') | |
486 ERROR(49); | |
487 #ifdef REGEXP_H_VI_BACKSLASH | |
488 if(c == '\\' && ((c = PEEKC()) == ']' || | |
489 c == '-' || c == '^' || | |
490 c == '\\')) { | |
491 regexp_h_store(c, eq, endbuf); | |
492 regexp_h_getwc(c); | |
493 } else | |
494 #endif /* REGEXP_H_VI_BACKSLASH */ | |
495 if (c == '-' && lc != 0 && lc <= 0177) { | |
496 regexp_h_store(c, eq, endbuf); | |
497 regexp_h_getwc(c); | |
498 if (c == ']') { | |
499 PLACE('-'); | |
500 break; | |
501 } | |
502 #ifdef REGEXP_H_VI_BACKSLASH | |
503 if(c == '\\' && | |
504 ((c = PEEKC()) == ']' || | |
505 c == '-' || | |
506 c == '^' || | |
507 c == '\\')) { | |
508 regexp_h_store(c, eq, | |
509 endbuf); | |
510 regexp_h_getwc(c); | |
511 } | |
512 #endif /* REGEXP_H_VI_BACKSLASH */ | |
513 while (lc < (c & 0177)) { | |
514 PLACE(lc); | |
515 lc++; | |
516 } | |
517 } | |
518 lc = c; | |
519 if (c <= 0177) | |
520 PLACE(c); | |
521 regexp_h_store(c, eq, endbuf); | |
522 regexp_h_getwc(c); | |
523 } while (c != L']'); | |
524 if ((i = eq - &ep[16]) > 255) | |
525 ERROR(50); | |
526 lastep[1] = i; | |
527 ep = eq; | |
528 } | |
529 #endif /* REGEXP_H_WCHARS */ | |
530 | |
531 continue; | |
532 | |
533 case '\\': | |
534 regexp_h_getwc(c); | |
535 switch(c) { | |
536 | |
537 case '(': | |
538 if(nbra >= NBRA) | |
539 ERROR(43); | |
540 *bracketp++ = nbra; | |
541 *ep++ = CBRA; | |
542 *ep++ = nbra++; | |
543 continue; | |
544 | |
545 case ')': | |
546 if(bracketp <= bracket) | |
547 ERROR(42); | |
548 *ep++ = CKET; | |
549 *ep++ = *--bracketp; | |
550 closed++; | |
551 continue; | |
552 | |
553 case '<': | |
554 *ep++ = CBRC|regexp_h_wchars; | |
555 continue; | |
556 | |
557 case '>': | |
558 *ep++ = CLET|regexp_h_wchars; | |
559 continue; | |
560 | |
561 case '{': | |
562 if(lastep == (char *) (0)) | |
563 goto defchar; | |
564 *lastep |= RNGE; | |
565 cflg = 0; | |
566 nlim: | |
567 c = GETC(); | |
568 i = 0; | |
569 do { | |
570 if ('0' <= c && c <= '9') | |
571 i = 10 * i + c - '0'; | |
572 else | |
573 ERROR(16); | |
574 } while(((c = GETC()) != '\\') && (c != ',')); | |
575 if (i > 255) | |
576 ERROR(11); | |
577 *ep++ = i; | |
578 if (c == ',') { | |
579 if(cflg++) | |
580 ERROR(44); | |
581 if((c = GETC()) == '\\') { | |
582 *ep++ = (char)255; | |
583 *lastep |= REGEXP_H_LEAST; | |
584 } else { | |
585 UNGETC(c); | |
586 goto nlim; /* get 2'nd number */ | |
587 } | |
588 } | |
589 if(GETC() != '}') | |
590 ERROR(45); | |
591 if(!cflg) /* one number */ | |
592 *ep++ = i; | |
593 else if((ep[-1] & 0377) < (ep[-2] & 0377)) | |
594 ERROR(46); | |
595 continue; | |
596 | |
597 case '\n': | |
598 ERROR(36); | |
599 | |
600 case 'n': | |
601 c = '\n'; | |
602 goto defchar; | |
603 | |
604 default: | |
605 if(c >= '1' && c <= '9') { | |
606 if((c -= '1') >= closed) | |
607 ERROR(25); | |
608 *ep++ = CBACK; | |
609 *ep++ = c; | |
610 continue; | |
611 } | |
612 } | |
613 /* Drop through to default to use \ to turn off special chars */ | |
614 | |
615 defchar: | |
616 default: | |
617 lastep = ep; | |
618 #ifdef REGEXP_H_WCHARS | |
619 if (regexp_h_wchars == 0) { | |
620 #endif | |
621 *ep++ = CCHR; | |
622 *ep++ = c; | |
623 #ifdef REGEXP_H_WCHARS | |
624 } else { | |
625 char mbbuf[MB_LEN_MAX]; | |
626 | |
627 switch (wctomb(mbbuf, c)) { | |
628 case 1: *ep++ = CCH1; | |
629 break; | |
630 case 2: *ep++ = CCH2; | |
631 break; | |
632 case 3: *ep++ = CCH3; | |
633 break; | |
634 default: | |
635 *ep++ = CCHR|CMB; | |
636 } | |
637 regexp_h_store(c, ep, endbuf); | |
638 } | |
639 #endif /* REGEXP_H_WCHARS */ | |
640 } | |
641 } | |
642 } | |
643 | |
644 int | |
645 step(const char *p1, const char *p2) | |
646 { | |
647 register int c; | |
648 #ifdef REGEXP_H_WCHARS | |
649 register int d; | |
650 #endif /* REGEXP_H_WCHARS */ | |
651 | |
652 REGEXP_H_STEP_INIT /* get circf */ | |
653 regexp_h_bol = p1; | |
654 #ifdef REGEXP_H_WCHARS | |
655 regexp_h_firstwc = NULL; | |
656 #endif /* REGEXP_H_WCHARS */ | |
657 if (circf) { | |
658 loc1 = (char *)p1; | |
659 return(regexp_h_advance(p1, p2)); | |
660 } | |
661 /* fast check for first character */ | |
662 if (*p2==CCHR) { | |
663 c = p2[1] & 0377; | |
664 do { | |
665 if ((*p1 & 0377) != c) | |
666 continue; | |
667 if (regexp_h_advance(p1, p2)) { | |
668 loc1 = (char *)p1; | |
669 return(1); | |
670 } | |
671 } while (*p1++); | |
672 return(0); | |
673 } | |
674 #ifdef REGEXP_H_WCHARS | |
675 else if (*p2==CCH1) { | |
676 do { | |
677 if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) { | |
678 loc1 = (char *)p1; | |
679 return(1); | |
680 } | |
681 c = regexp_h_fetch(p1, 1); | |
682 } while (c); | |
683 return(0); | |
684 } else if (*p2==CCH2) { | |
685 do { | |
686 if (p1[0] == p2[1] && p1[1] == p2[2] && | |
687 regexp_h_advance(p1, p2)) { | |
688 loc1 = (char *)p1; | |
689 return(1); | |
690 } | |
691 c = regexp_h_fetch(p1, 1); | |
692 } while (c); | |
693 return(0); | |
694 } else if (*p2==CCH3) { | |
695 do { | |
696 if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&& | |
697 regexp_h_advance(p1, p2)) { | |
698 loc1 = (char *)p1; | |
699 return(1); | |
700 } | |
701 c = regexp_h_fetch(p1, 1); | |
702 } while (c); | |
703 return(0); | |
704 } else if ((*p2&0377)==(CCHR|CMB)) { | |
705 d = regexp_h_fetch(p2, 0); | |
706 do { | |
707 c = regexp_h_fetch(p1, 1); | |
708 if (c == d && regexp_h_advance(p1, p2)) { | |
709 loc1 = (char *)p1; | |
710 return(1); | |
711 } | |
712 } while(c); | |
713 return(0); | |
714 } | |
715 /* regular algorithm */ | |
716 if (regexp_h_wchars) | |
717 do { | |
718 if (regexp_h_advance(p1, p2)) { | |
719 loc1 = (char *)p1; | |
720 return(1); | |
721 } | |
722 c = regexp_h_fetch(p1, 1); | |
723 } while (c); | |
724 else | |
725 #endif /* REGEXP_H_WCHARS */ | |
726 do { | |
727 if (regexp_h_advance(p1, p2)) { | |
728 loc1 = (char *)p1; | |
729 return(1); | |
730 } | |
731 } while (*p1++); | |
732 return(0); | |
733 } | |
734 | |
735 #ifdef REGEXP_H_WCHARS | |
736 /* | |
737 * It is painfully slow to read character-wise backwards in a | |
738 * multibyte string (see regexp_h_previous() above). For the star | |
739 * algorithm, we therefore keep track of every character as it is | |
740 * read in forward direction. | |
741 * | |
742 * Don't use alloca() for stack blocks since there is no measurable | |
743 * speedup and huge amounts of memory are used up for long input | |
744 * lines. | |
745 */ | |
746 #ifndef REGEXP_H_STAKBLOK | |
747 #define REGEXP_H_STAKBLOK 1000 | |
748 #endif | |
749 | |
750 struct regexp_h_stack { | |
751 struct regexp_h_stack *s_nxt; | |
752 struct regexp_h_stack *s_prv; | |
753 const char *s_ptr[REGEXP_H_STAKBLOK]; | |
754 }; | |
755 | |
756 #define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \ | |
757 regexp_h_pushwc(sb, sp, sc, lp) : (void)0) | |
758 | |
759 static regexp_h_inline void | |
760 regexp_h_pushwc(struct regexp_h_stack **sb, | |
761 struct regexp_h_stack **sp, | |
762 const char ***sc, const char *lp) | |
763 { | |
764 if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc) | |
765 return; | |
766 if (*sb == NULL) { | |
767 if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL) | |
768 return; | |
769 (*sb)->s_nxt = (*sb)->s_prv = NULL; | |
770 *sp = *sb; | |
771 *sc = &(*sb)->s_ptr[0]; | |
772 } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) { | |
773 if ((*sp)->s_nxt == NULL) { | |
774 struct regexp_h_stack *bq; | |
775 | |
776 if ((bq = regexp_h_malloc(sizeof *bq)) == NULL) | |
777 return; | |
778 bq->s_nxt = NULL; | |
779 bq->s_prv = *sp; | |
780 (*sp)->s_nxt = bq; | |
781 *sp = bq; | |
782 } else | |
783 *sp = (*sp)->s_nxt; | |
784 *sc = &(*sp)->s_ptr[0]; | |
785 } | |
786 *(*sc)++ = lp; | |
787 } | |
788 | |
789 static regexp_h_inline const char * | |
790 regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp, | |
791 const char ***sc, const char *lp) | |
792 { | |
793 if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc) | |
794 return &lp[-1]; | |
795 if (*sp == NULL) | |
796 return regexp_h_firstwc; | |
797 if (*sc == &(*sp)->s_ptr[0]) { | |
798 if ((*sp)->s_prv == NULL) { | |
799 regexp_h_free(*sp); | |
800 *sp = NULL; | |
801 *sb = NULL; | |
802 return regexp_h_firstwc; | |
803 } | |
804 *sp = (*sp)->s_prv; | |
805 regexp_h_free((*sp)->s_nxt); | |
806 (*sp)->s_nxt = NULL ; | |
807 *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK]; | |
808 } | |
809 return *(--(*sc)); | |
810 } | |
811 | |
812 static void | |
813 regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp) | |
814 { | |
815 for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt) | |
816 if ((*sp)->s_prv) | |
817 regexp_h_free((*sp)->s_prv); | |
818 if (*sp) { | |
819 if ((*sp)->s_prv) | |
820 regexp_h_free((*sp)->s_prv); | |
821 regexp_h_free(*sp); | |
822 } | |
823 *sp = *sb = NULL; | |
824 } | |
825 #else /* !REGEXP_H_WCHARS */ | |
826 #define regexp_h_push(sb, sp, sc, lp) | |
827 #endif /* !REGEXP_H_WCHARS */ | |
828 | |
829 static int | |
830 regexp_h_advance(const char *lp, const char *ep) | |
831 { | |
832 register const char *curlp; | |
833 int c, least; | |
834 #ifdef REGEXP_H_WCHARS | |
835 int d; | |
836 struct regexp_h_stack *sb = NULL, *sp = NULL; | |
837 const char **sc; | |
838 #endif /* REGEXP_H_WCHARS */ | |
839 char *bbeg; | |
840 int ct; | |
841 | |
842 for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) { | |
843 | |
844 case CCHR: | |
845 #ifdef REGEXP_H_WCHARS | |
846 case CCH1: | |
847 #endif | |
848 if (*ep++ == *lp++) | |
849 continue; | |
850 return(0); | |
851 | |
852 #ifdef REGEXP_H_WCHARS | |
853 case CCHR|CMB: | |
854 if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1)) | |
855 continue; | |
856 return(0); | |
857 | |
858 case CCH2: | |
859 if (ep[0] == lp[0] && ep[1] == lp[1]) { | |
860 ep += 2, lp += 2; | |
861 continue; | |
862 } | |
863 return(0); | |
864 | |
865 case CCH3: | |
866 if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) { | |
867 ep += 3, lp += 3; | |
868 continue; | |
869 } | |
870 return(0); | |
871 #endif /* REGEXP_H_WCHARS */ | |
872 | |
873 case CDOT: | |
874 if (*lp++) | |
875 continue; | |
876 return(0); | |
877 #ifdef REGEXP_H_WCHARS | |
878 case CDOT|CMB: | |
879 if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF) | |
880 continue; | |
881 return(0); | |
882 #endif /* REGEXP_H_WCHARS */ | |
883 | |
884 case CDOL: | |
885 if (*lp==0) | |
886 continue; | |
887 return(0); | |
888 | |
889 case CCEOF: | |
890 loc2 = (char *)lp; | |
891 return(1); | |
892 | |
893 case CCL: | |
894 c = *lp++ & 0377; | |
895 if(ISTHERE(c)) { | |
896 ep += 32; | |
897 continue; | |
898 } | |
899 return(0); | |
900 | |
901 #ifdef REGEXP_H_WCHARS | |
902 case CCL|CMB: | |
903 case CNCL|CMB: | |
904 c = regexp_h_fetch(lp, 1); | |
905 if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) { | |
906 ep += (*ep & 0377) + 17; | |
907 continue; | |
908 } | |
909 return 0; | |
910 #endif /* REGEXP_H_WCHARS */ | |
911 | |
912 case CBRA: | |
913 braslist[*ep++ & 0377] = (char *)lp; | |
914 continue; | |
915 | |
916 case CKET: | |
917 braelist[*ep++ & 0377] = (char *)lp; | |
918 continue; | |
919 | |
920 case CBRC: | |
921 if (lp == regexp_h_bol && locs == NULL) | |
922 continue; | |
923 if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377)) | |
924 && !regexp_h_uletter(lp[-1] & 0377) | |
925 && !isdigit(lp[-1] & 0377)) | |
926 continue; | |
927 return(0); | |
928 | |
929 #ifdef REGEXP_H_WCHARS | |
930 case CBRC|CMB: | |
931 c = regexp_h_show(lp); | |
932 d = regexp_h_previous(lp); | |
933 if ((iswdigit(c) || regexp_h_wuletter(c)) | |
934 && !regexp_h_wuletter(d) | |
935 && !iswdigit(d)) | |
936 continue; | |
937 return(0); | |
938 #endif /* REGEXP_H_WCHARS */ | |
939 | |
940 case CLET: | |
941 if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377)) | |
942 continue; | |
943 return(0); | |
944 | |
945 #ifdef REGEXP_H_WCHARS | |
946 case CLET|CMB: | |
947 c = regexp_h_show(lp); | |
948 if (!regexp_h_wuletter(c) && !iswdigit(c)) | |
949 continue; | |
950 return(0); | |
951 #endif /* REGEXP_H_WCHARS */ | |
952 | |
953 case CCHR|RNGE: | |
954 c = *ep++; | |
955 regexp_h_getrnge(ep, least); | |
956 while(low--) | |
957 if(*lp++ != c) | |
958 return(0); | |
959 curlp = lp; | |
960 while(size--) { | |
961 regexp_h_push(&sb, &sp, &sc, lp); | |
962 if(*lp++ != c) | |
963 break; | |
964 } | |
965 if(size < 0) { | |
966 regexp_h_push(&sb, &sp, &sc, lp); | |
967 lp++; | |
968 } | |
969 ep += 2; | |
970 goto star; | |
971 | |
972 #ifdef REGEXP_H_WCHARS | |
973 case CCHR|RNGE|CMB: | |
974 case CCH1|RNGE: | |
975 case CCH2|RNGE: | |
976 case CCH3|RNGE: | |
977 c = regexp_h_fetch(ep, 0); | |
978 regexp_h_getrnge(ep, least); | |
979 while (low--) | |
980 if (regexp_h_fetch(lp, 1) != c) | |
981 return 0; | |
982 curlp = lp; | |
983 while (size--) { | |
984 regexp_h_push(&sb, &sp, &sc, lp); | |
985 if (regexp_h_fetch(lp, 1) != c) | |
986 break; | |
987 } | |
988 if(size < 0) { | |
989 regexp_h_push(&sb, &sp, &sc, lp); | |
990 regexp_h_fetch(lp, 1); | |
991 } | |
992 ep += 2; | |
993 goto star; | |
994 #endif /* REGEXP_H_WCHARS */ | |
995 | |
996 case CDOT|RNGE: | |
997 regexp_h_getrnge(ep, least); | |
998 while(low--) | |
999 if(*lp++ == '\0') | |
1000 return(0); | |
1001 curlp = lp; | |
1002 while(size--) { | |
1003 regexp_h_push(&sb, &sp, &sc, lp); | |
1004 if(*lp++ == '\0') | |
1005 break; | |
1006 } | |
1007 if(size < 0) { | |
1008 regexp_h_push(&sb, &sp, &sc, lp); | |
1009 lp++; | |
1010 } | |
1011 ep += 2; | |
1012 goto star; | |
1013 | |
1014 #ifdef REGEXP_H_WCHARS | |
1015 case CDOT|RNGE|CMB: | |
1016 regexp_h_getrnge(ep, least); | |
1017 while (low--) | |
1018 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) | |
1019 return 0; | |
1020 curlp = lp; | |
1021 while (size--) { | |
1022 regexp_h_push(&sb, &sp, &sc, lp); | |
1023 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) | |
1024 break; | |
1025 } | |
1026 if (size < 0) { | |
1027 regexp_h_push(&sb, &sp, &sc, lp); | |
1028 regexp_h_fetch(lp, 1); | |
1029 } | |
1030 ep += 2; | |
1031 goto star; | |
1032 #endif /* REGEXP_H_WCHARS */ | |
1033 | |
1034 case CCL|RNGE: | |
1035 regexp_h_getrnge(ep + 32, least); | |
1036 while(low--) { | |
1037 c = *lp++ & 0377; | |
1038 if(!ISTHERE(c)) | |
1039 return(0); | |
1040 } | |
1041 curlp = lp; | |
1042 while(size--) { | |
1043 regexp_h_push(&sb, &sp, &sc, lp); | |
1044 c = *lp++ & 0377; | |
1045 if(!ISTHERE(c)) | |
1046 break; | |
1047 } | |
1048 if(size < 0) { | |
1049 regexp_h_push(&sb, &sp, &sc, lp); | |
1050 lp++; | |
1051 } | |
1052 ep += 34; /* 32 + 2 */ | |
1053 goto star; | |
1054 | |
1055 #ifdef REGEXP_H_WCHARS | |
1056 case CCL|RNGE|CMB: | |
1057 case CNCL|RNGE|CMB: | |
1058 regexp_h_getrnge(ep + (*ep & 0377) + 17, least); | |
1059 while (low--) { | |
1060 c = regexp_h_fetch(lp, 1); | |
1061 if (!regexp_h_cclass(ep, c, | |
1062 (ep[-1] & 0377 & ~REGEXP_H_LEAST) | |
1063 == (CCL|RNGE|CMB))) | |
1064 return 0; | |
1065 } | |
1066 curlp = lp; | |
1067 while (size--) { | |
1068 regexp_h_push(&sb, &sp, &sc, lp); | |
1069 c = regexp_h_fetch(lp, 1); | |
1070 if (!regexp_h_cclass(ep, c, | |
1071 (ep[-1] & 0377 & ~REGEXP_H_LEAST) | |
1072 == (CCL|RNGE|CMB))) | |
1073 break; | |
1074 } | |
1075 if (size < 0) { | |
1076 regexp_h_push(&sb, &sp, &sc, lp); | |
1077 regexp_h_fetch(lp, 1); | |
1078 } | |
1079 ep += (*ep & 0377) + 19; | |
1080 goto star; | |
1081 #endif /* REGEXP_H_WCHARS */ | |
1082 | |
1083 case CBACK: | |
1084 bbeg = braslist[*ep & 0377]; | |
1085 ct = braelist[*ep++ & 0377] - bbeg; | |
1086 | |
1087 if(strncmp(bbeg, lp, ct) == 0) { | |
1088 lp += ct; | |
1089 continue; | |
1090 } | |
1091 return(0); | |
1092 | |
1093 case CBACK|STAR: | |
1094 bbeg = braslist[*ep & 0377]; | |
1095 ct = braelist[*ep++ & 0377] - bbeg; | |
1096 curlp = lp; | |
1097 while(strncmp(bbeg, lp, ct) == 0) | |
1098 lp += ct; | |
1099 | |
1100 while(lp >= curlp) { | |
1101 if(regexp_h_advance(lp, ep)) return(1); | |
1102 lp -= ct; | |
1103 } | |
1104 return(0); | |
1105 | |
1106 | |
1107 case CDOT|STAR: | |
1108 curlp = lp; | |
1109 do | |
1110 regexp_h_push(&sb, &sp, &sc, lp); | |
1111 while (*lp++); | |
1112 goto star; | |
1113 | |
1114 #ifdef REGEXP_H_WCHARS | |
1115 case CDOT|STAR|CMB: | |
1116 curlp = lp; | |
1117 do | |
1118 regexp_h_push(&sb, &sp, &sc, lp); | |
1119 while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF); | |
1120 goto star; | |
1121 #endif /* REGEXP_H_WCHARS */ | |
1122 | |
1123 case CCHR|STAR: | |
1124 curlp = lp; | |
1125 do | |
1126 regexp_h_push(&sb, &sp, &sc, lp); | |
1127 while (*lp++ == *ep); | |
1128 ep++; | |
1129 goto star; | |
1130 | |
1131 #ifdef REGEXP_H_WCHARS | |
1132 case CCHR|STAR|CMB: | |
1133 case CCH1|STAR: | |
1134 case CCH2|STAR: | |
1135 case CCH3|STAR: | |
1136 curlp = lp; | |
1137 d = regexp_h_fetch(ep, 0); | |
1138 do | |
1139 regexp_h_push(&sb, &sp, &sc, lp); | |
1140 while (regexp_h_fetch(lp, 1) == d); | |
1141 goto star; | |
1142 #endif /* REGEXP_H_WCHARS */ | |
1143 | |
1144 case CCL|STAR: | |
1145 curlp = lp; | |
1146 do { | |
1147 regexp_h_push(&sb, &sp, &sc, lp); | |
1148 c = *lp++ & 0377; | |
1149 } while(ISTHERE(c)); | |
1150 ep += 32; | |
1151 goto star; | |
1152 | |
1153 #ifdef REGEXP_H_WCHARS | |
1154 case CCL|STAR|CMB: | |
1155 case CNCL|STAR|CMB: | |
1156 curlp = lp; | |
1157 do { | |
1158 regexp_h_push(&sb, &sp, &sc, lp); | |
1159 c = regexp_h_fetch(lp, 1); | |
1160 } while (regexp_h_cclass(ep, c, (ep[-1] & 0377) | |
1161 == (CCL|STAR|CMB))); | |
1162 ep += (*ep & 0377) + 17; | |
1163 goto star; | |
1164 #endif /* REGEXP_H_WCHARS */ | |
1165 | |
1166 star: | |
1167 #ifdef REGEXP_H_WCHARS | |
1168 if (regexp_h_wchars == 0) { | |
1169 #endif | |
1170 do { | |
1171 if(--lp == locs) | |
1172 break; | |
1173 if (regexp_h_advance(lp, ep)) | |
1174 return(1); | |
1175 } while (lp > curlp); | |
1176 #ifdef REGEXP_H_WCHARS | |
1177 } else { | |
1178 do { | |
1179 lp = regexp_h_pop(&sb, &sp, &sc, lp); | |
1180 if (lp <= locs) | |
1181 break; | |
1182 if (regexp_h_advance(lp, ep)) { | |
1183 regexp_h_zerostak(&sb, &sp); | |
1184 return(1); | |
1185 } | |
1186 } while (lp > curlp); | |
1187 regexp_h_zerostak(&sb, &sp); | |
1188 } | |
1189 #endif /* REGEXP_H_WCHARS */ | |
1190 return(0); | |
1191 | |
1192 } | |
1193 } | |
1194 | |
1195 static void | |
1196 regexp_h_getrnge(register const char *str, int least) | |
1197 { | |
1198 low = *str++ & 0377; | |
1199 size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low; | |
1200 } | |
1201 | |
1202 int | |
1203 advance(const char *lp, const char *ep) | |
1204 { | |
1205 REGEXP_H_ADVANCE_INIT /* skip past circf */ | |
1206 regexp_h_bol = lp; | |
1207 #ifdef REGEXP_H_WCHARS | |
1208 regexp_h_firstwc = NULL; | |
1209 #endif /* REGEXP_H_WCHARS */ | |
1210 return regexp_h_advance(lp, ep); | |
1211 } |