Mercurial > heirloom-ed
comparison regexp.h @ 4:4165f1b57d18 default tip
Become SUSv3 compatible and thus remove own regexp code
The Heirloom tools can be compiled to comply to several standards.
This version does not need this flexibility. We can omit the
regexp code and use the system's, by using the SU3 variant of ed.
This is the latest of the supported standards.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Mon, 13 Apr 2015 17:26:51 +0200 |
parents | ac52712b2b5e |
children |
comparison
equal
deleted
inserted
replaced
3:ac52712b2b5e | 4:4165f1b57d18 |
---|---|
1 /* | |
2 * Simple Regular Expression functions. Derived from Unix 7th Edition, | |
3 * /usr/src/cmd/expr.y | |
4 * | |
5 * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002. | |
6 * | |
7 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. | |
8 * | |
9 * Redistribution and use in source and binary forms, with or without | |
10 * modification, are permitted provided that the following conditions | |
11 * are met: | |
12 * Redistributions of source code and documentation must retain the | |
13 * above copyright notice, this list of conditions and the following | |
14 * disclaimer. | |
15 * Redistributions in binary form must reproduce the above copyright | |
16 * notice, this list of conditions and the following disclaimer in the | |
17 * documentation and/or other materials provided with the distribution. | |
18 * All advertising materials mentioning features or use of this software | |
19 * must display the following acknowledgement: | |
20 * This product includes software developed or owned by Caldera | |
21 * International, Inc. | |
22 * Neither the name of Caldera International, Inc. nor the names of | |
23 * other contributors may be used to endorse or promote products | |
24 * derived from this software without specific prior written permission. | |
25 * | |
26 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA | |
27 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR | |
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
30 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE | |
31 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | |
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
35 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | |
36 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | |
37 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
38 */ | |
39 | |
40 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 | |
41 #define REGEXP_H_USED __attribute__ ((used)) | |
42 #elif defined __GNUC__ | |
43 #define REGEXP_H_USED __attribute__ ((unused)) | |
44 #else | |
45 #define REGEXP_H_USED | |
46 #endif | |
47 static const char regexp_h_sccsid[] REGEXP_H_USED = | |
48 "@(#)regexp.sl 1.56 (gritter) 5/29/05"; | |
49 | |
50 #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__) | |
51 #define REGEXP_H_WCHARS | |
52 #endif | |
53 | |
54 #define CBRA 2 | |
55 #define CCHR 4 | |
56 #define CDOT 8 | |
57 #define CCL 12 | |
58 /* CLNUM 14 used in sed */ | |
59 /* CEND 16 used in sed */ | |
60 #define CDOL 20 | |
61 #define CCEOF 22 | |
62 #define CKET 24 | |
63 #define CBACK 36 | |
64 #define CNCL 40 | |
65 #define CBRC 44 | |
66 #define CLET 48 | |
67 #define CCH1 52 | |
68 #define CCH2 56 | |
69 #define CCH3 60 | |
70 | |
71 #define STAR 01 | |
72 #define RNGE 03 | |
73 #define REGEXP_H_LEAST 0100 | |
74 | |
75 #ifdef REGEXP_H_WCHARS | |
76 #define CMB 0200 | |
77 #else /* !REGEXP_H_WCHARS */ | |
78 #define CMB 0 | |
79 #endif /* !REGEXP_H_WCHARS */ | |
80 | |
81 #define NBRA 9 | |
82 | |
83 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] | |
84 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) | |
85 | |
86 #ifdef REGEXP_H_WCHARS | |
87 #define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07]) | |
88 #endif | |
89 | |
90 #include <ctype.h> | |
91 #include <string.h> | |
92 #include <limits.h> | |
93 #ifdef REGEXP_H_WCHARS | |
94 #include <stdlib.h> | |
95 #include <wchar.h> | |
96 #include <wctype.h> | |
97 #endif /* REGEXP_H_WCHARS */ | |
98 | |
99 #define regexp_h_uletter(c) (isalpha(c) || (c) == '_') | |
100 #ifdef REGEXP_H_WCHARS | |
101 #define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_') | |
102 | |
103 /* | |
104 * Used to allocate memory for the multibyte star algorithm. | |
105 */ | |
106 #ifndef regexp_h_malloc | |
107 #define regexp_h_malloc(n) malloc(n) | |
108 #endif | |
109 #ifndef regexp_h_free | |
110 #define regexp_h_free(p) free(p) | |
111 #endif | |
112 | |
113 /* | |
114 * Can be predefined to 'inline' to inline some multibyte functions; | |
115 * may improve performance for files that contain many multibyte | |
116 * sequences. | |
117 */ | |
118 #ifndef regexp_h_inline | |
119 #define regexp_h_inline | |
120 #endif | |
121 | |
122 /* | |
123 * Mask to determine whether the first byte of a sequence possibly | |
124 * starts a multibyte character. Set to 0377 to force mbtowc() for | |
125 * any byte sequence (except 0). | |
126 */ | |
127 #ifndef REGEXP_H_MASK | |
128 #define REGEXP_H_MASK 0200 | |
129 #endif | |
130 #endif /* REGEXP_H_WCHARS */ | |
131 | |
132 /* | |
133 * For regexpr.h. | |
134 */ | |
135 #ifndef regexp_h_static | |
136 #define regexp_h_static | |
137 #endif | |
138 #ifndef REGEXP_H_STEP_INIT | |
139 #define REGEXP_H_STEP_INIT | |
140 #endif | |
141 #ifndef REGEXP_H_ADVANCE_INIT | |
142 #define REGEXP_H_ADVANCE_INIT | |
143 #endif | |
144 | |
145 char *braslist[NBRA]; | |
146 char *braelist[NBRA]; | |
147 int nbra; | |
148 char *loc1, *loc2, *locs; | |
149 int sed; | |
150 int nodelim; | |
151 | |
152 regexp_h_static int circf; | |
153 regexp_h_static int low; | |
154 regexp_h_static int size; | |
155 | |
156 regexp_h_static unsigned char bittab[] = { | |
157 1, | |
158 2, | |
159 4, | |
160 8, | |
161 16, | |
162 32, | |
163 64, | |
164 128 | |
165 }; | |
166 static int regexp_h_advance(register const char *lp, | |
167 register const char *ep); | |
168 static void regexp_h_getrnge(register const char *str, int least); | |
169 | |
170 static const char *regexp_h_bol; /* beginning of input line (for \<) */ | |
171 | |
172 #ifdef REGEXP_H_WCHARS | |
173 static int regexp_h_wchars; | |
174 static int regexp_h_mbcurmax; | |
175 | |
176 static const char *regexp_h_firstwc; /* location of first | |
177 multibyte character | |
178 on input line */ | |
179 | |
180 #define regexp_h_getwc(c) { \ | |
181 if (regexp_h_wchars) { \ | |
182 char mbbuf[MB_LEN_MAX + 1], *mbptr; \ | |
183 wchar_t wcbuf; \ | |
184 int mb, len; \ | |
185 mbptr = mbbuf; \ | |
186 do { \ | |
187 mb = GETC(); \ | |
188 *mbptr++ = mb; \ | |
189 *mbptr = '\0'; \ | |
190 } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \ | |
191 && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \ | |
192 if (len == -1) \ | |
193 ERROR(67); \ | |
194 c = wcbuf; \ | |
195 } else { \ | |
196 c = GETC(); \ | |
197 } \ | |
198 } | |
199 | |
200 #define regexp_h_store(wc, mb, me) { \ | |
201 int len; \ | |
202 if (wc == WEOF) \ | |
203 ERROR(67); \ | |
204 if ((len = me - mb) <= regexp_h_mbcurmax) { \ | |
205 char mt[MB_LEN_MAX]; \ | |
206 if (wctomb(mt, wc) >= len) \ | |
207 ERROR(50); \ | |
208 } \ | |
209 switch (len = wctomb(mb, wc)) { \ | |
210 case -1: \ | |
211 ERROR(67); \ | |
212 case 0: \ | |
213 mb++; \ | |
214 break; \ | |
215 default: \ | |
216 mb += len; \ | |
217 } \ | |
218 } | |
219 | |
220 static regexp_h_inline wint_t | |
221 regexp_h_fetchwc(const char **mb, int islp) | |
222 { | |
223 wchar_t wc; | |
224 int len; | |
225 | |
226 if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) { | |
227 (*mb)++; | |
228 return WEOF; | |
229 } | |
230 if (islp && regexp_h_firstwc == NULL) | |
231 regexp_h_firstwc = *mb; | |
232 /*if (len == 0) { | |
233 (*mb)++; | |
234 return L'\0'; | |
235 } handled in singlebyte code */ | |
236 *mb += len; | |
237 return wc; | |
238 } | |
239 | |
240 #define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \ | |
241 (*(mb)++&0377): \ | |
242 regexp_h_fetchwc(&(mb), islp)) | |
243 | |
244 static regexp_h_inline wint_t | |
245 regexp_h_showwc(const char *mb) | |
246 { | |
247 wchar_t wc; | |
248 | |
249 if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0) | |
250 return WEOF; | |
251 return wc; | |
252 } | |
253 | |
254 #define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \ | |
255 regexp_h_showwc(mb)) | |
256 | |
257 /* | |
258 * Return the character immediately preceding mb. Since no byte is | |
259 * required to be the first byte of a character, the longest multibyte | |
260 * character ending at &[mb-1] is searched. | |
261 */ | |
262 static regexp_h_inline wint_t | |
263 regexp_h_previous(const char *mb) | |
264 { | |
265 const char *p = mb; | |
266 wchar_t wc, lastwc = WEOF; | |
267 int len, max = 0; | |
268 | |
269 if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc) | |
270 return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF); | |
271 while (p-- > regexp_h_bol) { | |
272 mbtowc(NULL, NULL, 0); | |
273 if ((len = mbtowc(&wc, p, mb - p)) >= 0) { | |
274 if (len < max || len < mb - p) | |
275 break; | |
276 max = len; | |
277 lastwc = wc; | |
278 } else if (len < 0 && max > 0) | |
279 break; | |
280 } | |
281 return lastwc; | |
282 } | |
283 | |
284 #define regexp_h_cclass(set, c, af) \ | |
285 ((c) == 0 || (c) == WEOF ? 0 : ( \ | |
286 ((c) > 0177) ? \ | |
287 regexp_h_cclass_wc(set, c, af) : ( \ | |
288 REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \ | |
289 ) \ | |
290 ) \ | |
291 ) | |
292 | |
293 static regexp_h_inline int | |
294 regexp_h_cclass_wc(const char *set, register wint_t c, int af) | |
295 { | |
296 register wint_t wc, wl = WEOF; | |
297 const char *end; | |
298 | |
299 end = &set[18] + set[0] - 1; | |
300 set += 17; | |
301 while (set < end) { | |
302 wc = regexp_h_fetch(set, 0); | |
303 #ifdef REGEXP_H_VI_BACKSLASH | |
304 if (wc == '\\' && set < end && | |
305 (*set == ']' || *set == '-' || | |
306 *set == '^' || *set == '\\')) { | |
307 wc = regexp_h_fetch(set, 0); | |
308 } else | |
309 #endif /* REGEXP_H_VI_BACKSLASH */ | |
310 if (wc == '-' && wl != WEOF && set < end) { | |
311 wc = regexp_h_fetch(set, 0); | |
312 #ifdef REGEXP_H_VI_BACKSLASH | |
313 if (wc == '\\' && set < end && | |
314 (*set == ']' || *set == '-' || | |
315 *set == '^' || *set == '\\')) { | |
316 wc = regexp_h_fetch(set, 0); | |
317 } | |
318 #endif /* REGEXP_H_VI_BACKSLASH */ | |
319 if (c > wl && c < wc) | |
320 return af; | |
321 } | |
322 if (c == wc) | |
323 return af; | |
324 wl = wc; | |
325 } | |
326 return !af; | |
327 } | |
328 #else /* !REGEXP_H_WCHARS */ | |
329 #define regexp_h_wchars 0 | |
330 #define regexp_h_getwc(c) { c = GETC(); } | |
331 #endif /* !REGEXP_H_WCHARS */ | |
332 | |
333 regexp_h_static char * | |
334 compile(char *instring, char *ep, const char *endbuf, int seof) | |
335 { | |
336 INIT /* Dependent declarations and initializations */ | |
337 register int c; | |
338 register int eof = seof; | |
339 char *lastep = instring; | |
340 int cclcnt; | |
341 char bracket[NBRA], *bracketp; | |
342 int closed; | |
343 char neg; | |
344 int lc; | |
345 int i, cflg; | |
346 | |
347 #ifdef REGEXP_H_WCHARS | |
348 char *eq; | |
349 regexp_h_mbcurmax = MB_CUR_MAX; | |
350 regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0; | |
351 #endif | |
352 lastep = 0; | |
353 bracketp = bracket; | |
354 if((c = GETC()) == eof || c == '\n') { | |
355 if (c == '\n') { | |
356 UNGETC(c); | |
357 nodelim = 1; | |
358 } | |
359 if(*ep == 0 && !sed) | |
360 ERROR(41); | |
361 if (bracketp > bracket) | |
362 ERROR(42); | |
363 RETURN(ep); | |
364 } | |
365 circf = closed = nbra = 0; | |
366 if (c == '^') | |
367 circf++; | |
368 else | |
369 UNGETC(c); | |
370 for (;;) { | |
371 if (ep >= endbuf) | |
372 ERROR(50); | |
373 regexp_h_getwc(c); | |
374 if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) | |
375 lastep = ep; | |
376 if (c == eof) { | |
377 *ep++ = CCEOF; | |
378 if (bracketp > bracket) | |
379 ERROR(42); | |
380 RETURN(ep); | |
381 } | |
382 switch (c) { | |
383 | |
384 case '.': | |
385 *ep++ = CDOT|regexp_h_wchars; | |
386 continue; | |
387 | |
388 case '\n': | |
389 if (sed == 0) { | |
390 UNGETC(c); | |
391 *ep++ = CCEOF; | |
392 nodelim = 1; | |
393 RETURN(ep); | |
394 } | |
395 ERROR(36); | |
396 case '*': | |
397 if (lastep==0 || *lastep==CBRA || *lastep==CKET || | |
398 *lastep==(CBRC|regexp_h_wchars) || | |
399 *lastep==(CLET|regexp_h_wchars)) | |
400 goto defchar; | |
401 *lastep |= STAR; | |
402 continue; | |
403 | |
404 case '$': | |
405 if(PEEKC() != eof) | |
406 goto defchar; | |
407 *ep++ = CDOL; | |
408 continue; | |
409 | |
410 case '[': | |
411 #ifdef REGEXP_H_WCHARS | |
412 if (regexp_h_wchars == 0) { | |
413 #endif | |
414 if(&ep[33] >= endbuf) | |
415 ERROR(50); | |
416 | |
417 *ep++ = CCL; | |
418 lc = 0; | |
419 for(i = 0; i < 32; i++) | |
420 ep[i] = 0; | |
421 | |
422 neg = 0; | |
423 if((c = GETC()) == '^') { | |
424 neg = 1; | |
425 c = GETC(); | |
426 } | |
427 | |
428 do { | |
429 c &= 0377; | |
430 if(c == '\0' || c == '\n') | |
431 ERROR(49); | |
432 #ifdef REGEXP_H_VI_BACKSLASH | |
433 if(c == '\\' && ((c = PEEKC()) == ']' || | |
434 c == '-' || c == '^' || | |
435 c == '\\')) { | |
436 c = GETC(); | |
437 c &= 0377; | |
438 } else | |
439 #endif /* REGEXP_H_VI_BACKSLASH */ | |
440 if(c == '-' && lc != 0) { | |
441 if ((c = GETC()) == ']') { | |
442 PLACE('-'); | |
443 break; | |
444 } | |
445 #ifdef REGEXP_H_VI_BACKSLASH | |
446 if(c == '\\' && | |
447 ((c = PEEKC()) == ']' || | |
448 c == '-' || | |
449 c == '^' || | |
450 c == '\\')) | |
451 c = GETC(); | |
452 #endif /* REGEXP_H_VI_BACKSLASH */ | |
453 c &= 0377; | |
454 while(lc < c) { | |
455 PLACE(lc); | |
456 lc++; | |
457 } | |
458 } | |
459 lc = c; | |
460 PLACE(c); | |
461 } while((c = GETC()) != ']'); | |
462 if(neg) { | |
463 for(cclcnt = 0; cclcnt < 32; cclcnt++) | |
464 ep[cclcnt] ^= 0377; | |
465 ep[0] &= 0376; | |
466 } | |
467 | |
468 ep += 32; | |
469 #ifdef REGEXP_H_WCHARS | |
470 } else { | |
471 if (&ep[18] >= endbuf) | |
472 ERROR(50); | |
473 *ep++ = CCL|CMB; | |
474 *ep++ = 0; | |
475 lc = 0; | |
476 for (i = 0; i < 16; i++) | |
477 ep[i] = 0; | |
478 eq = &ep[16]; | |
479 regexp_h_getwc(c); | |
480 if (c == L'^') { | |
481 regexp_h_getwc(c); | |
482 ep[-2] = CNCL|CMB; | |
483 } | |
484 do { | |
485 if (c == '\0' || c == '\n') | |
486 ERROR(49); | |
487 #ifdef REGEXP_H_VI_BACKSLASH | |
488 if(c == '\\' && ((c = PEEKC()) == ']' || | |
489 c == '-' || c == '^' || | |
490 c == '\\')) { | |
491 regexp_h_store(c, eq, endbuf); | |
492 regexp_h_getwc(c); | |
493 } else | |
494 #endif /* REGEXP_H_VI_BACKSLASH */ | |
495 if (c == '-' && lc != 0 && lc <= 0177) { | |
496 regexp_h_store(c, eq, endbuf); | |
497 regexp_h_getwc(c); | |
498 if (c == ']') { | |
499 PLACE('-'); | |
500 break; | |
501 } | |
502 #ifdef REGEXP_H_VI_BACKSLASH | |
503 if(c == '\\' && | |
504 ((c = PEEKC()) == ']' || | |
505 c == '-' || | |
506 c == '^' || | |
507 c == '\\')) { | |
508 regexp_h_store(c, eq, | |
509 endbuf); | |
510 regexp_h_getwc(c); | |
511 } | |
512 #endif /* REGEXP_H_VI_BACKSLASH */ | |
513 while (lc < (c & 0177)) { | |
514 PLACE(lc); | |
515 lc++; | |
516 } | |
517 } | |
518 lc = c; | |
519 if (c <= 0177) | |
520 PLACE(c); | |
521 regexp_h_store(c, eq, endbuf); | |
522 regexp_h_getwc(c); | |
523 } while (c != L']'); | |
524 if ((i = eq - &ep[16]) > 255) | |
525 ERROR(50); | |
526 lastep[1] = i; | |
527 ep = eq; | |
528 } | |
529 #endif /* REGEXP_H_WCHARS */ | |
530 | |
531 continue; | |
532 | |
533 case '\\': | |
534 regexp_h_getwc(c); | |
535 switch(c) { | |
536 | |
537 case '(': | |
538 if(nbra >= NBRA) | |
539 ERROR(43); | |
540 *bracketp++ = nbra; | |
541 *ep++ = CBRA; | |
542 *ep++ = nbra++; | |
543 continue; | |
544 | |
545 case ')': | |
546 if(bracketp <= bracket) | |
547 ERROR(42); | |
548 *ep++ = CKET; | |
549 *ep++ = *--bracketp; | |
550 closed++; | |
551 continue; | |
552 | |
553 case '<': | |
554 *ep++ = CBRC|regexp_h_wchars; | |
555 continue; | |
556 | |
557 case '>': | |
558 *ep++ = CLET|regexp_h_wchars; | |
559 continue; | |
560 | |
561 case '{': | |
562 if(lastep == (char *) (0)) | |
563 goto defchar; | |
564 *lastep |= RNGE; | |
565 cflg = 0; | |
566 nlim: | |
567 c = GETC(); | |
568 i = 0; | |
569 do { | |
570 if ('0' <= c && c <= '9') | |
571 i = 10 * i + c - '0'; | |
572 else | |
573 ERROR(16); | |
574 } while(((c = GETC()) != '\\') && (c != ',')); | |
575 if (i > 255) | |
576 ERROR(11); | |
577 *ep++ = i; | |
578 if (c == ',') { | |
579 if(cflg++) | |
580 ERROR(44); | |
581 if((c = GETC()) == '\\') { | |
582 *ep++ = (char)255; | |
583 *lastep |= REGEXP_H_LEAST; | |
584 } else { | |
585 UNGETC(c); | |
586 goto nlim; /* get 2'nd number */ | |
587 } | |
588 } | |
589 if(GETC() != '}') | |
590 ERROR(45); | |
591 if(!cflg) /* one number */ | |
592 *ep++ = i; | |
593 else if((ep[-1] & 0377) < (ep[-2] & 0377)) | |
594 ERROR(46); | |
595 continue; | |
596 | |
597 case '\n': | |
598 ERROR(36); | |
599 | |
600 case 'n': | |
601 c = '\n'; | |
602 goto defchar; | |
603 | |
604 default: | |
605 if(c >= '1' && c <= '9') { | |
606 if((c -= '1') >= closed) | |
607 ERROR(25); | |
608 *ep++ = CBACK; | |
609 *ep++ = c; | |
610 continue; | |
611 } | |
612 } | |
613 /* Drop through to default to use \ to turn off special chars */ | |
614 | |
615 defchar: | |
616 default: | |
617 lastep = ep; | |
618 #ifdef REGEXP_H_WCHARS | |
619 if (regexp_h_wchars == 0) { | |
620 #endif | |
621 *ep++ = CCHR; | |
622 *ep++ = c; | |
623 #ifdef REGEXP_H_WCHARS | |
624 } else { | |
625 char mbbuf[MB_LEN_MAX]; | |
626 | |
627 switch (wctomb(mbbuf, c)) { | |
628 case 1: *ep++ = CCH1; | |
629 break; | |
630 case 2: *ep++ = CCH2; | |
631 break; | |
632 case 3: *ep++ = CCH3; | |
633 break; | |
634 default: | |
635 *ep++ = CCHR|CMB; | |
636 } | |
637 regexp_h_store(c, ep, endbuf); | |
638 } | |
639 #endif /* REGEXP_H_WCHARS */ | |
640 } | |
641 } | |
642 } | |
643 | |
644 int | |
645 step(const char *p1, const char *p2) | |
646 { | |
647 register int c; | |
648 #ifdef REGEXP_H_WCHARS | |
649 register int d; | |
650 #endif /* REGEXP_H_WCHARS */ | |
651 | |
652 REGEXP_H_STEP_INIT /* get circf */ | |
653 regexp_h_bol = p1; | |
654 #ifdef REGEXP_H_WCHARS | |
655 regexp_h_firstwc = NULL; | |
656 #endif /* REGEXP_H_WCHARS */ | |
657 if (circf) { | |
658 loc1 = (char *)p1; | |
659 return(regexp_h_advance(p1, p2)); | |
660 } | |
661 /* fast check for first character */ | |
662 if (*p2==CCHR) { | |
663 c = p2[1] & 0377; | |
664 do { | |
665 if ((*p1 & 0377) != c) | |
666 continue; | |
667 if (regexp_h_advance(p1, p2)) { | |
668 loc1 = (char *)p1; | |
669 return(1); | |
670 } | |
671 } while (*p1++); | |
672 return(0); | |
673 } | |
674 #ifdef REGEXP_H_WCHARS | |
675 else if (*p2==CCH1) { | |
676 do { | |
677 if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) { | |
678 loc1 = (char *)p1; | |
679 return(1); | |
680 } | |
681 c = regexp_h_fetch(p1, 1); | |
682 } while (c); | |
683 return(0); | |
684 } else if (*p2==CCH2) { | |
685 do { | |
686 if (p1[0] == p2[1] && p1[1] == p2[2] && | |
687 regexp_h_advance(p1, p2)) { | |
688 loc1 = (char *)p1; | |
689 return(1); | |
690 } | |
691 c = regexp_h_fetch(p1, 1); | |
692 } while (c); | |
693 return(0); | |
694 } else if (*p2==CCH3) { | |
695 do { | |
696 if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&& | |
697 regexp_h_advance(p1, p2)) { | |
698 loc1 = (char *)p1; | |
699 return(1); | |
700 } | |
701 c = regexp_h_fetch(p1, 1); | |
702 } while (c); | |
703 return(0); | |
704 } else if ((*p2&0377)==(CCHR|CMB)) { | |
705 d = regexp_h_fetch(p2, 0); | |
706 do { | |
707 c = regexp_h_fetch(p1, 1); | |
708 if (c == d && regexp_h_advance(p1, p2)) { | |
709 loc1 = (char *)p1; | |
710 return(1); | |
711 } | |
712 } while(c); | |
713 return(0); | |
714 } | |
715 /* regular algorithm */ | |
716 if (regexp_h_wchars) | |
717 do { | |
718 if (regexp_h_advance(p1, p2)) { | |
719 loc1 = (char *)p1; | |
720 return(1); | |
721 } | |
722 c = regexp_h_fetch(p1, 1); | |
723 } while (c); | |
724 else | |
725 #endif /* REGEXP_H_WCHARS */ | |
726 do { | |
727 if (regexp_h_advance(p1, p2)) { | |
728 loc1 = (char *)p1; | |
729 return(1); | |
730 } | |
731 } while (*p1++); | |
732 return(0); | |
733 } | |
734 | |
735 #ifdef REGEXP_H_WCHARS | |
736 /* | |
737 * It is painfully slow to read character-wise backwards in a | |
738 * multibyte string (see regexp_h_previous() above). For the star | |
739 * algorithm, we therefore keep track of every character as it is | |
740 * read in forward direction. | |
741 * | |
742 * Don't use alloca() for stack blocks since there is no measurable | |
743 * speedup and huge amounts of memory are used up for long input | |
744 * lines. | |
745 */ | |
746 #ifndef REGEXP_H_STAKBLOK | |
747 #define REGEXP_H_STAKBLOK 1000 | |
748 #endif | |
749 | |
750 struct regexp_h_stack { | |
751 struct regexp_h_stack *s_nxt; | |
752 struct regexp_h_stack *s_prv; | |
753 const char *s_ptr[REGEXP_H_STAKBLOK]; | |
754 }; | |
755 | |
756 #define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \ | |
757 regexp_h_pushwc(sb, sp, sc, lp) : (void)0) | |
758 | |
759 static regexp_h_inline void | |
760 regexp_h_pushwc(struct regexp_h_stack **sb, | |
761 struct regexp_h_stack **sp, | |
762 const char ***sc, const char *lp) | |
763 { | |
764 if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc) | |
765 return; | |
766 if (*sb == NULL) { | |
767 if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL) | |
768 return; | |
769 (*sb)->s_nxt = (*sb)->s_prv = NULL; | |
770 *sp = *sb; | |
771 *sc = &(*sb)->s_ptr[0]; | |
772 } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) { | |
773 if ((*sp)->s_nxt == NULL) { | |
774 struct regexp_h_stack *bq; | |
775 | |
776 if ((bq = regexp_h_malloc(sizeof *bq)) == NULL) | |
777 return; | |
778 bq->s_nxt = NULL; | |
779 bq->s_prv = *sp; | |
780 (*sp)->s_nxt = bq; | |
781 *sp = bq; | |
782 } else | |
783 *sp = (*sp)->s_nxt; | |
784 *sc = &(*sp)->s_ptr[0]; | |
785 } | |
786 *(*sc)++ = lp; | |
787 } | |
788 | |
789 static regexp_h_inline const char * | |
790 regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp, | |
791 const char ***sc, const char *lp) | |
792 { | |
793 if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc) | |
794 return &lp[-1]; | |
795 if (*sp == NULL) | |
796 return regexp_h_firstwc; | |
797 if (*sc == &(*sp)->s_ptr[0]) { | |
798 if ((*sp)->s_prv == NULL) { | |
799 regexp_h_free(*sp); | |
800 *sp = NULL; | |
801 *sb = NULL; | |
802 return regexp_h_firstwc; | |
803 } | |
804 *sp = (*sp)->s_prv; | |
805 regexp_h_free((*sp)->s_nxt); | |
806 (*sp)->s_nxt = NULL ; | |
807 *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK]; | |
808 } | |
809 return *(--(*sc)); | |
810 } | |
811 | |
812 static void | |
813 regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp) | |
814 { | |
815 for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt) | |
816 if ((*sp)->s_prv) | |
817 regexp_h_free((*sp)->s_prv); | |
818 if (*sp) { | |
819 if ((*sp)->s_prv) | |
820 regexp_h_free((*sp)->s_prv); | |
821 regexp_h_free(*sp); | |
822 } | |
823 *sp = *sb = NULL; | |
824 } | |
825 #else /* !REGEXP_H_WCHARS */ | |
826 #define regexp_h_push(sb, sp, sc, lp) | |
827 #endif /* !REGEXP_H_WCHARS */ | |
828 | |
829 static int | |
830 regexp_h_advance(const char *lp, const char *ep) | |
831 { | |
832 register const char *curlp; | |
833 int c, least; | |
834 #ifdef REGEXP_H_WCHARS | |
835 int d; | |
836 struct regexp_h_stack *sb = NULL, *sp = NULL; | |
837 const char **sc; | |
838 #endif /* REGEXP_H_WCHARS */ | |
839 char *bbeg; | |
840 int ct; | |
841 | |
842 for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) { | |
843 | |
844 case CCHR: | |
845 #ifdef REGEXP_H_WCHARS | |
846 case CCH1: | |
847 #endif | |
848 if (*ep++ == *lp++) | |
849 continue; | |
850 return(0); | |
851 | |
852 #ifdef REGEXP_H_WCHARS | |
853 case CCHR|CMB: | |
854 if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1)) | |
855 continue; | |
856 return(0); | |
857 | |
858 case CCH2: | |
859 if (ep[0] == lp[0] && ep[1] == lp[1]) { | |
860 ep += 2, lp += 2; | |
861 continue; | |
862 } | |
863 return(0); | |
864 | |
865 case CCH3: | |
866 if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) { | |
867 ep += 3, lp += 3; | |
868 continue; | |
869 } | |
870 return(0); | |
871 #endif /* REGEXP_H_WCHARS */ | |
872 | |
873 case CDOT: | |
874 if (*lp++) | |
875 continue; | |
876 return(0); | |
877 #ifdef REGEXP_H_WCHARS | |
878 case CDOT|CMB: | |
879 if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF) | |
880 continue; | |
881 return(0); | |
882 #endif /* REGEXP_H_WCHARS */ | |
883 | |
884 case CDOL: | |
885 if (*lp==0) | |
886 continue; | |
887 return(0); | |
888 | |
889 case CCEOF: | |
890 loc2 = (char *)lp; | |
891 return(1); | |
892 | |
893 case CCL: | |
894 c = *lp++ & 0377; | |
895 if(ISTHERE(c)) { | |
896 ep += 32; | |
897 continue; | |
898 } | |
899 return(0); | |
900 | |
901 #ifdef REGEXP_H_WCHARS | |
902 case CCL|CMB: | |
903 case CNCL|CMB: | |
904 c = regexp_h_fetch(lp, 1); | |
905 if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) { | |
906 ep += (*ep & 0377) + 17; | |
907 continue; | |
908 } | |
909 return 0; | |
910 #endif /* REGEXP_H_WCHARS */ | |
911 | |
912 case CBRA: | |
913 braslist[*ep++ & 0377] = (char *)lp; | |
914 continue; | |
915 | |
916 case CKET: | |
917 braelist[*ep++ & 0377] = (char *)lp; | |
918 continue; | |
919 | |
920 case CBRC: | |
921 if (lp == regexp_h_bol && locs == NULL) | |
922 continue; | |
923 if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377)) | |
924 && !regexp_h_uletter(lp[-1] & 0377) | |
925 && !isdigit(lp[-1] & 0377)) | |
926 continue; | |
927 return(0); | |
928 | |
929 #ifdef REGEXP_H_WCHARS | |
930 case CBRC|CMB: | |
931 c = regexp_h_show(lp); | |
932 d = regexp_h_previous(lp); | |
933 if ((iswdigit(c) || regexp_h_wuletter(c)) | |
934 && !regexp_h_wuletter(d) | |
935 && !iswdigit(d)) | |
936 continue; | |
937 return(0); | |
938 #endif /* REGEXP_H_WCHARS */ | |
939 | |
940 case CLET: | |
941 if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377)) | |
942 continue; | |
943 return(0); | |
944 | |
945 #ifdef REGEXP_H_WCHARS | |
946 case CLET|CMB: | |
947 c = regexp_h_show(lp); | |
948 if (!regexp_h_wuletter(c) && !iswdigit(c)) | |
949 continue; | |
950 return(0); | |
951 #endif /* REGEXP_H_WCHARS */ | |
952 | |
953 case CCHR|RNGE: | |
954 c = *ep++; | |
955 regexp_h_getrnge(ep, least); | |
956 while(low--) | |
957 if(*lp++ != c) | |
958 return(0); | |
959 curlp = lp; | |
960 while(size--) { | |
961 regexp_h_push(&sb, &sp, &sc, lp); | |
962 if(*lp++ != c) | |
963 break; | |
964 } | |
965 if(size < 0) { | |
966 regexp_h_push(&sb, &sp, &sc, lp); | |
967 lp++; | |
968 } | |
969 ep += 2; | |
970 goto star; | |
971 | |
972 #ifdef REGEXP_H_WCHARS | |
973 case CCHR|RNGE|CMB: | |
974 case CCH1|RNGE: | |
975 case CCH2|RNGE: | |
976 case CCH3|RNGE: | |
977 c = regexp_h_fetch(ep, 0); | |
978 regexp_h_getrnge(ep, least); | |
979 while (low--) | |
980 if (regexp_h_fetch(lp, 1) != c) | |
981 return 0; | |
982 curlp = lp; | |
983 while (size--) { | |
984 regexp_h_push(&sb, &sp, &sc, lp); | |
985 if (regexp_h_fetch(lp, 1) != c) | |
986 break; | |
987 } | |
988 if(size < 0) { | |
989 regexp_h_push(&sb, &sp, &sc, lp); | |
990 regexp_h_fetch(lp, 1); | |
991 } | |
992 ep += 2; | |
993 goto star; | |
994 #endif /* REGEXP_H_WCHARS */ | |
995 | |
996 case CDOT|RNGE: | |
997 regexp_h_getrnge(ep, least); | |
998 while(low--) | |
999 if(*lp++ == '\0') | |
1000 return(0); | |
1001 curlp = lp; | |
1002 while(size--) { | |
1003 regexp_h_push(&sb, &sp, &sc, lp); | |
1004 if(*lp++ == '\0') | |
1005 break; | |
1006 } | |
1007 if(size < 0) { | |
1008 regexp_h_push(&sb, &sp, &sc, lp); | |
1009 lp++; | |
1010 } | |
1011 ep += 2; | |
1012 goto star; | |
1013 | |
1014 #ifdef REGEXP_H_WCHARS | |
1015 case CDOT|RNGE|CMB: | |
1016 regexp_h_getrnge(ep, least); | |
1017 while (low--) | |
1018 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) | |
1019 return 0; | |
1020 curlp = lp; | |
1021 while (size--) { | |
1022 regexp_h_push(&sb, &sp, &sc, lp); | |
1023 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) | |
1024 break; | |
1025 } | |
1026 if (size < 0) { | |
1027 regexp_h_push(&sb, &sp, &sc, lp); | |
1028 regexp_h_fetch(lp, 1); | |
1029 } | |
1030 ep += 2; | |
1031 goto star; | |
1032 #endif /* REGEXP_H_WCHARS */ | |
1033 | |
1034 case CCL|RNGE: | |
1035 regexp_h_getrnge(ep + 32, least); | |
1036 while(low--) { | |
1037 c = *lp++ & 0377; | |
1038 if(!ISTHERE(c)) | |
1039 return(0); | |
1040 } | |
1041 curlp = lp; | |
1042 while(size--) { | |
1043 regexp_h_push(&sb, &sp, &sc, lp); | |
1044 c = *lp++ & 0377; | |
1045 if(!ISTHERE(c)) | |
1046 break; | |
1047 } | |
1048 if(size < 0) { | |
1049 regexp_h_push(&sb, &sp, &sc, lp); | |
1050 lp++; | |
1051 } | |
1052 ep += 34; /* 32 + 2 */ | |
1053 goto star; | |
1054 | |
1055 #ifdef REGEXP_H_WCHARS | |
1056 case CCL|RNGE|CMB: | |
1057 case CNCL|RNGE|CMB: | |
1058 regexp_h_getrnge(ep + (*ep & 0377) + 17, least); | |
1059 while (low--) { | |
1060 c = regexp_h_fetch(lp, 1); | |
1061 if (!regexp_h_cclass(ep, c, | |
1062 (ep[-1] & 0377 & ~REGEXP_H_LEAST) | |
1063 == (CCL|RNGE|CMB))) | |
1064 return 0; | |
1065 } | |
1066 curlp = lp; | |
1067 while (size--) { | |
1068 regexp_h_push(&sb, &sp, &sc, lp); | |
1069 c = regexp_h_fetch(lp, 1); | |
1070 if (!regexp_h_cclass(ep, c, | |
1071 (ep[-1] & 0377 & ~REGEXP_H_LEAST) | |
1072 == (CCL|RNGE|CMB))) | |
1073 break; | |
1074 } | |
1075 if (size < 0) { | |
1076 regexp_h_push(&sb, &sp, &sc, lp); | |
1077 regexp_h_fetch(lp, 1); | |
1078 } | |
1079 ep += (*ep & 0377) + 19; | |
1080 goto star; | |
1081 #endif /* REGEXP_H_WCHARS */ | |
1082 | |
1083 case CBACK: | |
1084 bbeg = braslist[*ep & 0377]; | |
1085 ct = braelist[*ep++ & 0377] - bbeg; | |
1086 | |
1087 if(strncmp(bbeg, lp, ct) == 0) { | |
1088 lp += ct; | |
1089 continue; | |
1090 } | |
1091 return(0); | |
1092 | |
1093 case CBACK|STAR: | |
1094 bbeg = braslist[*ep & 0377]; | |
1095 ct = braelist[*ep++ & 0377] - bbeg; | |
1096 curlp = lp; | |
1097 while(strncmp(bbeg, lp, ct) == 0) | |
1098 lp += ct; | |
1099 | |
1100 while(lp >= curlp) { | |
1101 if(regexp_h_advance(lp, ep)) return(1); | |
1102 lp -= ct; | |
1103 } | |
1104 return(0); | |
1105 | |
1106 | |
1107 case CDOT|STAR: | |
1108 curlp = lp; | |
1109 do | |
1110 regexp_h_push(&sb, &sp, &sc, lp); | |
1111 while (*lp++); | |
1112 goto star; | |
1113 | |
1114 #ifdef REGEXP_H_WCHARS | |
1115 case CDOT|STAR|CMB: | |
1116 curlp = lp; | |
1117 do | |
1118 regexp_h_push(&sb, &sp, &sc, lp); | |
1119 while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF); | |
1120 goto star; | |
1121 #endif /* REGEXP_H_WCHARS */ | |
1122 | |
1123 case CCHR|STAR: | |
1124 curlp = lp; | |
1125 do | |
1126 regexp_h_push(&sb, &sp, &sc, lp); | |
1127 while (*lp++ == *ep); | |
1128 ep++; | |
1129 goto star; | |
1130 | |
1131 #ifdef REGEXP_H_WCHARS | |
1132 case CCHR|STAR|CMB: | |
1133 case CCH1|STAR: | |
1134 case CCH2|STAR: | |
1135 case CCH3|STAR: | |
1136 curlp = lp; | |
1137 d = regexp_h_fetch(ep, 0); | |
1138 do | |
1139 regexp_h_push(&sb, &sp, &sc, lp); | |
1140 while (regexp_h_fetch(lp, 1) == d); | |
1141 goto star; | |
1142 #endif /* REGEXP_H_WCHARS */ | |
1143 | |
1144 case CCL|STAR: | |
1145 curlp = lp; | |
1146 do { | |
1147 regexp_h_push(&sb, &sp, &sc, lp); | |
1148 c = *lp++ & 0377; | |
1149 } while(ISTHERE(c)); | |
1150 ep += 32; | |
1151 goto star; | |
1152 | |
1153 #ifdef REGEXP_H_WCHARS | |
1154 case CCL|STAR|CMB: | |
1155 case CNCL|STAR|CMB: | |
1156 curlp = lp; | |
1157 do { | |
1158 regexp_h_push(&sb, &sp, &sc, lp); | |
1159 c = regexp_h_fetch(lp, 1); | |
1160 } while (regexp_h_cclass(ep, c, (ep[-1] & 0377) | |
1161 == (CCL|STAR|CMB))); | |
1162 ep += (*ep & 0377) + 17; | |
1163 goto star; | |
1164 #endif /* REGEXP_H_WCHARS */ | |
1165 | |
1166 star: | |
1167 #ifdef REGEXP_H_WCHARS | |
1168 if (regexp_h_wchars == 0) { | |
1169 #endif | |
1170 do { | |
1171 if(--lp == locs) | |
1172 break; | |
1173 if (regexp_h_advance(lp, ep)) | |
1174 return(1); | |
1175 } while (lp > curlp); | |
1176 #ifdef REGEXP_H_WCHARS | |
1177 } else { | |
1178 do { | |
1179 lp = regexp_h_pop(&sb, &sp, &sc, lp); | |
1180 if (lp <= locs) | |
1181 break; | |
1182 if (regexp_h_advance(lp, ep)) { | |
1183 regexp_h_zerostak(&sb, &sp); | |
1184 return(1); | |
1185 } | |
1186 } while (lp > curlp); | |
1187 regexp_h_zerostak(&sb, &sp); | |
1188 } | |
1189 #endif /* REGEXP_H_WCHARS */ | |
1190 return(0); | |
1191 | |
1192 } | |
1193 } | |
1194 | |
1195 static void | |
1196 regexp_h_getrnge(register const char *str, int least) | |
1197 { | |
1198 low = *str++ & 0377; | |
1199 size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low; | |
1200 } | |
1201 | |
1202 int | |
1203 advance(const char *lp, const char *ep) | |
1204 { | |
1205 REGEXP_H_ADVANCE_INIT /* skip past circf */ | |
1206 regexp_h_bol = lp; | |
1207 #ifdef REGEXP_H_WCHARS | |
1208 regexp_h_firstwc = NULL; | |
1209 #endif /* REGEXP_H_WCHARS */ | |
1210 return regexp_h_advance(lp, ep); | |
1211 } |