Bug Summary

File:regex/regcomp.c
Location:line 1585, column 7
Description:Value stored to 'status' is never read

Annotated Source Code

1/*
2 regcomp.c - TRE POSIX compatible regex compilation functions.
3
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30*/
31
32#include <string.h>
33#include <stdlib.h>
34#include <regex.h>
35#include <limits.h>
36#include <stdint.h>
37#include <ctype.h>
38
39#include "tre.h"
40
41#include <assert.h>
42
43/***********************************************************************
44 from tre-compile.h
45***********************************************************************/
46
47typedef struct {
48 int position;
49 int code_min;
50 int code_max;
51 int *tags;
52 int assertions;
53 tre_ctype_t class;
54 tre_ctype_t *neg_classes;
55 int backref;
56} tre_pos_and_tags_t;
57
58
59/***********************************************************************
60 from tre-ast.c and tre-ast.h
61***********************************************************************/
62
63/* The different AST node types. */
64typedef enum {
65 LITERAL,
66 CATENATION,
67 ITERATION,
68 UNION
69} tre_ast_type_t;
70
71/* Special subtypes of TRE_LITERAL. */
72#define EMPTY-1 -1 /* Empty leaf (denotes empty string). */
73#define ASSERTION-2 -2 /* Assertion leaf. */
74#define TAG-3 -3 /* Tag leaf. */
75#define BACKREF-4 -4 /* Back reference leaf. */
76
77#define IS_SPECIAL(x)((x)->code_min < 0) ((x)->code_min < 0)
78#define IS_EMPTY(x)((x)->code_min == -1) ((x)->code_min == EMPTY-1)
79#define IS_ASSERTION(x)((x)->code_min == -2) ((x)->code_min == ASSERTION-2)
80#define IS_TAG(x)((x)->code_min == -3) ((x)->code_min == TAG-3)
81#define IS_BACKREF(x)((x)->code_min == -4) ((x)->code_min == BACKREF-4)
82
83
84/* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
86typedef struct {
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
89 int nullable;
90 int submatch_id;
91 int num_submatches;
92 int num_tags;
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
95} tre_ast_node_t;
96
97
98/* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
100 character. */
101typedef struct {
102 long code_min;
103 long code_max;
104 int position;
105 tre_ctype_t class;
106 tre_ctype_t *neg_classes;
107} tre_literal_t;
108
109/* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
113typedef struct {
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
116} tre_catenation_t;
117
118/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
119 operators. */
120typedef struct {
121 /* Subexpression to match. */
122 tre_ast_node_t *arg;
123 /* Minimum number of consecutive matches. */
124 int min;
125 /* Maximum number of consecutive matches. */
126 int max;
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
131} tre_iteration_t;
132
133/* An "union" node. These are created for the "|" operator. */
134typedef struct {
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
137} tre_union_t;
138
139
140static tre_ast_node_t *
141tre_ast_new_node(tre_mem_t mem, int type, void *obj)
142{
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *node);
144 if (!node || !obj)
145 return 0;
146 node->obj = obj;
147 node->type = type;
148 node->nullable = -1;
149 node->submatch_id = -1;
150 return node;
151}
152
153static tre_ast_node_t *
154tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
155{
156 tre_ast_node_t *node;
157 tre_literal_t *lit;
158
159 lit = tre_mem_calloc(mem, sizeof *lit)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
161 if (!node)
162 return 0;
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
166 return node;
167}
168
169static tre_ast_node_t *
170tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
171{
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
174
175 iter = tre_mem_calloc(mem, sizeof *iter)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
177 if (!node)
178 return 0;
179 iter->arg = arg;
180 iter->min = min;
181 iter->max = max;
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
184 return node;
185}
186
187static tre_ast_node_t *
188tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
189{
190 tre_ast_node_t *node;
191 tre_union_t *un;
192
193 if (!left)
194 return right;
195 un = tre_mem_calloc(mem, sizeof *un)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
197 if (!node || !right)
198 return 0;
199 un->left = left;
200 un->right = right;
201 node->num_submatches = left->num_submatches + right->num_submatches;
202 return node;
203}
204
205static tre_ast_node_t *
206tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
207{
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
210
211 if (!left)
212 return right;
213 cat = tre_mem_calloc(mem, sizeof *cat)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
215 if (!node)
216 return 0;
217 cat->left = left;
218 cat->right = right;
219 node->num_submatches = left->num_submatches + right->num_submatches;
220 return node;
221}
222
223
224/***********************************************************************
225 from tre-stack.c and tre-stack.h
226***********************************************************************/
227
228typedef struct tre_stack_rec tre_stack_t;
229
230/* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
234static tre_stack_t *
235tre_stack_new(int size, int max_size, int increment);
236
237/* Frees the stack object. */
238static void
239tre_stack_destroy(tre_stack_t *s);
240
241/* Returns the current number of objects in the stack. */
242static int
243tre_stack_num_objects(tre_stack_t *s);
244
245/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249#define declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type
value)
\
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
251
252declare_pushf(voidptr, void *)static reg_errcode_t tre_stack_push_voidptr(tre_stack_t *s, void
* value)
;
253declare_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value
)
;
254
255/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
257 empty. */
258#define declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
260
261declare_popf(voidptr, void *)static void * tre_stack_pop_voidptr(tre_stack_t *s);
262declare_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s);
263
264/* Just to save some typing. */
265#define STACK_PUSH(s, typetag, value)do { status = tre_stack_push_typetag(s, value); } while ( 0) \
266 do \
267 { \
268 status = tre_stack_push_ ## typetag(s, value); \
269 } \
270 while (/*CONSTCOND*/0)
271
272#define STACK_PUSHX(s, typetag, value){ status = tre_stack_push_typetag(s, value); if (status != 0)
break; }
\
273 { \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK0) \
276 break; \
277 }
278
279#define STACK_PUSHR(s, typetag, value){ reg_errcode_t _status; _status = tre_stack_push_typetag(s, value
); if (_status != 0) return _status; }
\
280 { \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK0) \
284 return _status; \
285 }
286
287union tre_stack_item {
288 void *voidptr_value;
289 int int_value;
290};
291
292struct tre_stack_rec {
293 int size;
294 int max_size;
295 int increment;
296 int ptr;
297 union tre_stack_item *stack;
298};
299
300
301static tre_stack_t *
302tre_stack_new(int size, int max_size, int increment)
303{
304 tre_stack_t *s;
305
306 s = xmallocmalloc(sizeof(*s));
307 if (s != NULL((void*)0))
308 {
309 s->stack = xmallocmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL((void*)0))
311 {
312 xfreefree(s);
313 return NULL((void*)0);
314 }
315 s->size = size;
316 s->max_size = max_size;
317 s->increment = increment;
318 s->ptr = 0;
319 }
320 return s;
321}
322
323static void
324tre_stack_destroy(tre_stack_t *s)
325{
326 xfreefree(s->stack);
327 xfreefree(s);
328}
329
330static int
331tre_stack_num_objects(tre_stack_t *s)
332{
333 return s->ptr;
334}
335
336static reg_errcode_t
337tre_stack_push(tre_stack_t *s, union tre_stack_item value)
338{
339 if (s->ptr < s->size)
340 {
341 s->stack[s->ptr] = value;
342 s->ptr++;
343 }
344 else
345 {
346 if (s->size >= s->max_size)
347 {
348 return REG_ESPACE12;
349 }
350 else
351 {
352 union tre_stack_item *new_buffer;
353 int new_size;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xreallocrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL((void*)0))
359 {
360 return REG_ESPACE12;
361 }
362 assert(new_size > s->size)(void)0;
363 s->size = new_size;
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
366 }
367 }
368 return REG_OK0;
369}
370
371#define define_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type
value) { union tre_stack_item item; item.typetag_value = value
; return tre_stack_push(s, item); }
\
372 declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type
value)
{ \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
376}
377
378define_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value
) { union tre_stack_item item; item.int_value = value; return
tre_stack_push(s, item); }
379define_pushf(voidptr, void *)static reg_errcode_t tre_stack_push_voidptr(tre_stack_t *s, void
* value) { union tre_stack_item item; item.voidptr_value = value
; return tre_stack_push(s, item); }
380
381#define define_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { return s->
stack[--s->ptr].typetag_value; }
\
382 declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { \
383 return s->stack[--s->ptr].typetag ## _value; \
384 }
385
386define_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s) { return s->stack
[--s->ptr].int_value; }
387define_popf(voidptr, void *)static void * tre_stack_pop_voidptr(tre_stack_t *s) { return s
->stack[--s->ptr].voidptr_value; }
388
389
390/***********************************************************************
391 from tre-parse.c and tre-parse.h
392***********************************************************************/
393
394/* Parse context. */
395typedef struct {
396 /* Memory allocator. The AST is allocated using this. */
397 tre_mem_t mem;
398 /* Stack used for keeping track of regexp syntax. */
399 tre_stack_t *stack;
400 /* The parsed node after a parse function returns. */
401 tre_ast_node_t *n;
402 /* Position in the regexp pattern after a parse function returns. */
403 const char *s;
404 /* The first character of the regexp. */
405 const char *re;
406 /* Current submatch ID. */
407 int submatch_id;
408 /* Current position (number of literal). */
409 int position;
410 /* The highest back reference or -1 if none seen so far. */
411 int max_backref;
412 /* Compilation flags. */
413 int cflags;
414} tre_parse_ctx_t;
415
416/* Some macros for expanding \w, \s, etc. */
417static const struct {
418 char c;
419 const char *expansion;
420} tre_macros[] = {
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
425 { 0, 0 }
426};
427
428/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431static const char *tre_expand_macro(const char *s)
432{
433 int i;
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
436}
437
438static int
439tre_compare_lit(const void *a, const void *b)
440{
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
445}
446
447struct literals {
448 tre_mem_t mem;
449 tre_literal_t **a;
450 int len;
451 int cap;
452};
453
454static tre_literal_t *tre_new_lit(struct literals *p)
455{
456 tre_literal_t **a;
457 if (p->len >= p->cap) {
458 if (p->cap >= 1<<15)
459 return 0;
460 p->cap *= 2;
461 a = xreallocrealloc(p->a, p->cap * sizeof *p->a);
462 if (!a)
463 return 0;
464 p->a = a;
465 }
466 a = p->a + p->len++;
467 *a = tre_mem_calloc(p->mem, sizeof **a)__tre_mem_alloc_impl(p->mem, 0, ((void*)0), 1, sizeof **a);
468 return *a;
469}
470
471static int add_icase_literals(struct literals *ls, int min, int max)
472{
473 tre_literal_t *lit;
474 int b, e, c;
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_isloweriswlower(c)) {
480 b = e = tre_touppertowupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_touppertowupper(c) != e) break;
483 } else if (tre_isupperiswupper(c)) {
484 b = e = tre_tolowertowlower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolowertowlower(c) != e) break;
487 } else {
488 c++;
489 continue;
490 }
491 lit = tre_new_lit(ls);
492 if (!lit)
493 return -1;
494 lit->code_min = b;
495 lit->code_max = e-1;
496 lit->position = -1;
497 }
498 return 0;
499}
500
501
502/* Maximum number of character classes in a negated bracket expression. */
503#define MAX_NEG_CLASSES64 64
504
505struct neg {
506 int negate;
507 int len;
508 tre_ctype_t a[MAX_NEG_CLASSES64];
509};
510
511// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
512
513/*
514bracket grammar:
515Bracket = '[' List ']' | '[^' List ']'
516List = Term | List Term
517Term = Char | Range | Chclass | Eqclass
518Range = Char '-' Char | Char '-' '-'
519Char = Coll | coll_single
520Meta = ']' | '-'
521Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523Chclass = '[:' class ':]'
524
525coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
529*/
530
531static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
532{
533 const char *start = s;
534 tre_ctype_t class;
535 int min, max;
536 wchar_t wc;
537 int len;
538
539 for (;;) {
540 class = 0;
541 len = mbtowc(&wc, s, -1);
542 if (len <= 0)
543 return *s ? REG_BADPAT2 : REG_EBRACK7;
544 if (*s == ']' && s != start) {
545 ctx->s = s+1;
546 return REG_OK0;
547 }
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
551 return REG_ERANGE11;
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
554 return REG_ECOLLATE3;
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX14+1];
557 s += 2;
558 for (len=0; len < CHARCLASS_NAME_MAX14 && s[len]; len++) {
559 if (s[len] == ':') {
560 memcpy(tmp, s, len);
561 tmp[len] = 0;
562 class = tre_ctypewctype(tmp);
563 break;
564 }
565 }
566 if (!class || s[len+1] != ']')
567 return REG_ECTYPE4;
568 min = 0;
569 max = TRE_CHAR_MAX0x10ffff;
570 s += len+2;
571 } else {
572 min = max = wc;
573 s += len;
574 if (*s == '-' && s[1] != ']') {
575 s++;
576 len = mbtowc(&wc, s, -1);
577 max = wc;
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
581 return REG_ERANGE11;
582 s += len;
583 }
584 }
585
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES64)
588 return REG_ESPACE12;
589 neg->a[neg->len++] = class;
590 } else {
591 tre_literal_t *lit = tre_new_lit(ls);
592 if (!lit)
593 return REG_ESPACE12;
594 lit->code_min = min;
595 lit->code_max = max;
596 lit->class = class;
597 lit->position = -1;
598
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE2 && !class)
607 if (add_icase_literals(ls, min, max))
608 return REG_ESPACE12;
609 }
610 }
611}
612
613static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
614{
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
617 tre_ctype_t *nc = 0;
618 tre_literal_t *lit;
619 struct literals ls;
620 struct neg neg;
621 reg_errcode_t err;
622
623 ls.mem = ctx->mem;
624 ls.len = 0;
625 ls.cap = 32;
626 ls.a = xmallocmalloc(ls.cap * sizeof *ls.a);
627 if (!ls.a)
628 return REG_ESPACE12;
629 neg.len = 0;
630 neg.negate = *s == '^';
631 if (neg.negate)
632 s++;
633
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
635 if (err != REG_OK0)
636 goto parse_bracket_done;
637
638 if (neg.negate) {
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
643 if (!lit) {
644 err = REG_ESPACE12;
645 goto parse_bracket_done;
646 }
647 lit->code_min = TRE_CHAR_MAX0x10ffff+1;
648 lit->code_max = TRE_CHAR_MAX0x10ffff+1;
649 lit->position = -1;
650 /* negated classes */
651 if (neg.len) {
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a)__tre_mem_alloc_impl(ctx->mem, 0, ((void*)0), 0, (neg.len+
1)*sizeof *neg.a)
;
653 if (!nc) {
654 err = REG_ESPACE12;
655 goto parse_bracket_done;
656 }
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
658 nc[neg.len] = 0;
659 }
660 }
661
662 /* Build a union of the items in the array, negated if necessary. */
663 negmax = negmin = 0;
664 for (i = 0; i < ls.len; i++) {
665 lit = ls.a[i];
666 min = lit->code_min;
667 max = lit->code_max;
668 if (neg.negate) {
669 if (min <= negmin) {
670 /* Overlap. */
671 negmin = MAX(max + 1, negmin)(((max + 1) >= (negmin)) ? (max + 1) : (negmin));
672 continue;
673 }
674 negmax = min - 1;
675 lit->code_min = negmin;
676 lit->code_max = negmax;
677 negmin = max + 1;
678 }
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
683 if (!node) {
684 err = REG_ESPACE12;
685 break;
686 }
687 }
688
689parse_bracket_done:
690 xfreefree(ls.a);
691 ctx->position++;
692 ctx->n = node;
693 return err;
694}
695
696static const char *parse_dup_count(const char *s, int *n)
697{
698 *n = -1;
699 if (!isdigit(*s)(0 ? isdigit(*s) : ((unsigned)(*s)-'0') < 10))
700 return s;
701 *n = 0;
702 for (;;) {
703 *n = 10 * *n + (*s - '0');
704 s++;
705 if (!isdigit(*s)(0 ? isdigit(*s) : ((unsigned)(*s)-'0') < 10) || *n > RE_DUP_MAX255)
706 break;
707 }
708 return s;
709}
710
711static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
712{
713 int min, max;
714
715 s = parse_dup_count(s, &min);
716 if (*s == ',')
717 s = parse_dup_count(s+1, &max);
718 else
719 max = min;
720
721 if (
722 (max < min && max >= 0) ||
723 max > RE_DUP_MAX255 ||
724 min > RE_DUP_MAX255 ||
725 min < 0 ||
726 (!(ctx->cflags & REG_EXTENDED1) && *s++ != '\\') ||
727 *s++ != '}'
728 )
729 return REG_BADBR10;
730
731 if (min == 0 && max == 0)
732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
733 else
734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
735 if (!ctx->n)
736 return REG_ESPACE12;
737 ctx->s = s;
738 return REG_OK0;
739}
740
741static int hexval(unsigned c)
742{
743 if (c-'0'<10) return c-'0';
744 c |= 32;
745 if (c-'a'<6) return c-'a'+10;
746 return -1;
747}
748
749static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
750{
751 if (node->submatch_id >= 0) {
752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
753 if (!n)
754 return REG_ESPACE12;
755 n = tre_ast_new_catenation(ctx->mem, n, node);
756 if (!n)
757 return REG_ESPACE12;
758 n->num_submatches = node->num_submatches;
759 node = n;
760 }
761 node->submatch_id = subid;
762 node->num_submatches++;
763 ctx->n = node;
764 return REG_OK0;
765}
766
767/*
768BRE grammar:
769Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
770Branch = Atom | Branch Atom
771Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
772Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
773
774(leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
775
776ERE grammar:
777Regex = Branch | Regex '|' Branch
778Branch = Atom | Branch Atom
779Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
780Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
781
782(a*+?, ^*, $+, \X, {, (|a) are unspecified)
783*/
784
785static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
786{
787 int len, ere = ctx->cflags & REG_EXTENDED1;
788 const char *p;
789 tre_ast_node_t *node;
790 wchar_t wc;
791 switch (*s) {
792 case '[':
793 return parse_bracket(ctx, s+1);
794 case '\\':
795 p = tre_expand_macro(s+1);
796 if (p) {
797 /* assume \X expansion is a single atom */
798 reg_errcode_t err = parse_atom(ctx, p);
799 ctx->s = s+2;
800 return err;
801 }
802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
803 switch (*++s) {
804 case 0:
805 return REG_EESCAPE5;
806 case 'b':
807 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB64, -1);
808 break;
809 case 'B':
810 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB_NEG128, -1);
811 break;
812 case '<':
813 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOW16, -1);
814 break;
815 case '>':
816 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOW32, -1);
817 break;
818 case 'x':
819 s++;
820 int i, v = 0, c;
821 len = 2;
822 if (*s == '{') {
823 len = 8;
824 s++;
825 }
826 for (i=0; i<len && v<0x110000; i++) {
827 c = hexval(s[i]);
828 if (c < 0) break;
829 v = 16*v + c;
830 }
831 s += i;
832 if (len == 8) {
833 if (*s != '}')
834 return REG_EBRACE9;
835 s++;
836 }
837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position);
838 ctx->position++;
839 s--;
840 break;
841 default:
842 if (!ere && (unsigned)*s-'1' < 9) {
843 /* back reference */
844 int val = *s - '0';
845 node = tre_ast_new_literal(ctx->mem, BACKREF-4, val, ctx->position);
846 ctx->max_backref = MAX(val, ctx->max_backref)(((val) >= (ctx->max_backref)) ? (val) : (ctx->max_backref
))
;
847 } else {
848 /* extension: accept unknown escaped char
849 as a literal */
850 goto parse_literal;
851 }
852 ctx->position++;
853 }
854 s++;
855 break;
856 case '.':
857 if (ctx->cflags & REG_NEWLINE4) {
858 tre_ast_node_t *tmp1, *tmp2;
859 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
860 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX0x10ffff, ctx->position++);
861 if (tmp1 && tmp2)
862 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
863 else
864 node = 0;
865 } else {
866 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX0x10ffff, ctx->position++);
867 }
868 s++;
869 break;
870 case '^':
871 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
872 if (!ere && s != ctx->re)
873 goto parse_literal;
874 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOL1, -1);
875 s++;
876 break;
877 case '$':
878 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
879 if (!ere && s[1])
880 goto parse_literal;
881 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOL2, -1);
882 s++;
883 break;
884 case '*':
885 case '|':
886 case '{':
887 case '+':
888 case '?':
889 if (!ere)
890 goto parse_literal;
891 case 0:
892 node = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
893 break;
894 default:
895parse_literal:
896 len = mbtowc(&wc, s, -1);
897 if (len < 0)
898 return REG_BADPAT2;
899 if (ctx->cflags & REG_ICASE2 && (tre_isupperiswupper(wc) || tre_isloweriswlower(wc))) {
900 tre_ast_node_t *tmp1, *tmp2;
901 /* multiple opposite case characters are not supported */
902 tmp1 = tre_ast_new_literal(ctx->mem, tre_touppertowupper(wc), tre_touppertowupper(wc), ctx->position);
903 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolowertowlower(wc), tre_tolowertowlower(wc), ctx->position);
904 if (tmp1 && tmp2)
905 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
906 else
907 node = 0;
908 } else {
909 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
910 }
911 ctx->position++;
912 s += len;
913 break;
914 }
915 if (!node)
916 return REG_ESPACE12;
917 ctx->n = node;
918 ctx->s = s;
919 return REG_OK0;
920}
921
922#define PUSHPTR(err, s, v)do { if ((err = tre_stack_push_voidptr(s, v)) != 0) return err
; } while(0)
do { \
923 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK0) \
924 return err; \
925} while(0)
926
927#define PUSHINT(err, s, v)do { if ((err = tre_stack_push_int(s, v)) != 0) return err; }
while(0)
do { \
928 if ((err = tre_stack_push_int(s, v)) != REG_OK0) \
929 return err; \
930} while(0)
931
932static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
933{
934 tre_ast_node_t *nbranch=0, *nunion=0;
935 int ere = ctx->cflags & REG_EXTENDED1;
936 const char *s = ctx->re;
937 int subid = 0;
938 int depth = 0;
939 reg_errcode_t err;
940 tre_stack_t *stack = ctx->stack;
941
942 PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return
err; } while(0)
;
943 for (;;) {
944 if ((!ere && *s == '\\' && s[1] == '(') ||
945 (ere && *s == '(')) {
946 PUSHPTR(err, stack, nunion)do { if ((err = tre_stack_push_voidptr(stack, nunion)) != 0) return
err; } while(0)
;
947 PUSHPTR(err, stack, nbranch)do { if ((err = tre_stack_push_voidptr(stack, nbranch)) != 0)
return err; } while(0)
;
948 PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return
err; } while(0)
;
949 s++;
950 if (!ere)
951 s++;
952 depth++;
953 nbranch = nunion = 0;
954 continue;
955 }
956 if ((!ere && *s == '\\' && s[1] == ')') ||
957 (ere && *s == ')' && depth)) {
958 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
959 if (!ctx->n)
960 return REG_ESPACE12;
961 } else {
962 err = parse_atom(ctx, s);
963 if (err != REG_OK0)
964 return err;
965 s = ctx->s;
966 }
967
968 parse_iter:
969 /* extension: repetitions are accepted after an empty node
970 eg. (+), ^*, a$?, a|{2} */
971 switch (*s) {
972 case '+':
973 case '?':
974 if (!ere)
975 break;
976 /* fallthrough */
977 case '*':;
978 int min=0, max=-1;
979 if (*s == '+')
980 min = 1;
981 if (*s == '?')
982 max = 1;
983 s++;
984 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
985 if (!ctx->n)
986 return REG_ESPACE12;
987 /* extension: multiple consecutive *+?{,} is unspecified,
988 but (a+)+ has to be supported so accepting a++ makes
989 sense, note however that the RE_DUP_MAX limit can be
990 circumvented: (a{255}){255} uses a lot of memory.. */
991 goto parse_iter;
992 case '\\':
993 if (ere || s[1] != '{')
994 break;
995 s++;
996 goto parse_brace;
997 case '{':
998 if (!ere)
999 break;
1000 parse_brace:
1001 err = parse_dup(ctx, s+1);
1002 if (err != REG_OK0)
1003 return err;
1004 s = ctx->s;
1005 goto parse_iter;
1006 }
1007
1008 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1009 if ((ere && *s == '|') ||
1010 (ere && *s == ')' && depth) ||
1011 (!ere && *s == '\\' && s[1] == ')') ||
1012 !*s) {
1013 /* extension: empty branch is unspecified (), (|a), (a|)
1014 here they are not rejected but match on empty string */
1015 int c = *s;
1016 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1017 nbranch = 0;
1018 if (c != '|') {
1019 if (c == '\\') {
1020 if (!depth) return REG_EPAREN8;
1021 s+=2;
1022 } else if (c == ')')
1023 s++;
1024 depth--;
1025 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1026 if (err != REG_OK0)
1027 return err;
1028 if (!c && depth<0) {
1029 ctx->submatch_id = subid;
1030 return REG_OK0;
1031 }
1032 if (!c || depth<0)
1033 return REG_EPAREN8;
1034 nbranch = tre_stack_pop_voidptr(stack);
1035 nunion = tre_stack_pop_voidptr(stack);
1036 goto parse_iter;
1037 }
1038 s++;
1039 }
1040 }
1041}
1042
1043
1044/***********************************************************************
1045 from tre-compile.c
1046***********************************************************************/
1047
1048
1049/*
1050 TODO:
1051 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1052 function calls.
1053*/
1054
1055/*
1056 Algorithms to setup tags so that submatch addressing can be done.
1057*/
1058
1059
1060/* Inserts a catenation node to the root of the tree given in `node'.
1061 As the left child a new tag with number `tag_id' to `node' is added,
1062 and the right child is the old root. */
1063static reg_errcode_t
1064tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1065{
1066 tre_catenation_t *c;
1067
1068 c = tre_mem_alloc(mem, sizeof(*c))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*c));
1069 if (c == NULL((void*)0))
1070 return REG_ESPACE12;
1071 c->left = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1072 if (c->left == NULL((void*)0))
1073 return REG_ESPACE12;
1074 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t
))
;
1075 if (c->right == NULL((void*)0))
1076 return REG_ESPACE12;
1077
1078 c->right->obj = node->obj;
1079 c->right->type = node->type;
1080 c->right->nullable = -1;
1081 c->right->submatch_id = -1;
1082 c->right->firstpos = NULL((void*)0);
1083 c->right->lastpos = NULL((void*)0);
1084 c->right->num_tags = 0;
1085 node->obj = c;
1086 node->type = CATENATION;
1087 return REG_OK0;
1088}
1089
1090/* Inserts a catenation node to the root of the tree given in `node'.
1091 As the right child a new tag with number `tag_id' to `node' is added,
1092 and the left child is the old root. */
1093static reg_errcode_t
1094tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1095{
1096 tre_catenation_t *c;
1097
1098 c = tre_mem_alloc(mem, sizeof(*c))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*c));
1099 if (c == NULL((void*)0))
1100 return REG_ESPACE12;
1101 c->right = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1102 if (c->right == NULL((void*)0))
1103 return REG_ESPACE12;
1104 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t
))
;
1105 if (c->left == NULL((void*)0))
1106 return REG_ESPACE12;
1107
1108 c->left->obj = node->obj;
1109 c->left->type = node->type;
1110 c->left->nullable = -1;
1111 c->left->submatch_id = -1;
1112 c->left->firstpos = NULL((void*)0);
1113 c->left->lastpos = NULL((void*)0);
1114 c->left->num_tags = 0;
1115 node->obj = c;
1116 node->type = CATENATION;
1117 return REG_OK0;
1118}
1119
1120typedef enum {
1121 ADDTAGS_RECURSE,
1122 ADDTAGS_AFTER_ITERATION,
1123 ADDTAGS_AFTER_UNION_LEFT,
1124 ADDTAGS_AFTER_UNION_RIGHT,
1125 ADDTAGS_AFTER_CAT_LEFT,
1126 ADDTAGS_AFTER_CAT_RIGHT,
1127 ADDTAGS_SET_SUBMATCH_END
1128} tre_addtags_symbol_t;
1129
1130
1131typedef struct {
1132 int tag;
1133 int next_tag;
1134} tre_tag_states_t;
1135
1136
1137/* Go through `regset' and set submatch data for submatches that are
1138 using this tag. */
1139static void
1140tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1141{
1142 int i;
1143
1144 for (i = 0; regset[i] >= 0; i++)
1145 {
1146 int id = regset[i] / 2;
1147 int start = !(regset[i] % 2);
1148 if (start)
1149 tnfa->submatch_data[id].so_tag = tag;
1150 else
1151 tnfa->submatch_data[id].eo_tag = tag;
1152 }
1153 regset[0] = -1;
1154}
1155
1156
1157/* Adds tags to appropriate locations in the parse tree in `tree', so that
1158 subexpressions marked for submatch addressing can be traced. */
1159static reg_errcode_t
1160tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1161 tre_tnfa_t *tnfa)
1162{
1163 reg_errcode_t status = REG_OK0;
1164 tre_addtags_symbol_t symbol;
1165 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1166 int bottom = tre_stack_num_objects(stack);
1167 /* True for first pass (counting number of needed tags) */
1168 int first_pass = (mem == NULL((void*)0) || tnfa == NULL((void*)0));
1169 int *regset, *orig_regset;
1170 int num_tags = 0; /* Total number of tags. */
1171 int num_minimals = 0; /* Number of special minimal tags. */
1172 int tag = 0; /* The tag that is to be added next. */
1173 int next_tag = 1; /* Next tag to use after this one. */
1174 int *parents; /* Stack of submatches the current submatch is
1175 contained in. */
1176 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1177 tre_tag_states_t *saved_states;
1178
1179 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1180 if (!first_pass)
1181 {
1182 tnfa->end_tag = 0;
1183 tnfa->minimal_tags[0] = -1;
1184 }
1185
1186 regset = xmallocmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1187 if (regset == NULL((void*)0))
1188 return REG_ESPACE12;
1189 regset[0] = -1;
1190 orig_regset = regset;
1191
1192 parents = xmallocmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1193 if (parents == NULL((void*)0))
1194 {
1195 xfreefree(regset);
1196 return REG_ESPACE12;
1197 }
1198 parents[0] = -1;
1199
1200 saved_states = xmallocmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1201 if (saved_states == NULL((void*)0))
1202 {
1203 xfreefree(regset);
1204 xfreefree(parents);
1205 return REG_ESPACE12;
1206 }
1207 else
1208 {
1209 unsigned int i;
1210 for (i = 0; i <= tnfa->num_submatches; i++)
1211 saved_states[i].tag = -1;
1212 }
1213
1214 STACK_PUSH(stack, voidptr, node)do { status = tre_stack_push_voidptr(stack, node); } while ( 0
)
;
1215 STACK_PUSH(stack, int, ADDTAGS_RECURSE)do { status = tre_stack_push_int(stack, ADDTAGS_RECURSE); } while
( 0)
;
1216
1217 while (tre_stack_num_objects(stack) > bottom)
1218 {
1219 if (status != REG_OK0)
1220 break;
1221
1222 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1223 switch (symbol)
1224 {
1225
1226 case ADDTAGS_SET_SUBMATCH_END:
1227 {
1228 int id = tre_stack_pop_int(stack);
1229 int i;
1230
1231 /* Add end of this submatch to regset. */
1232 for (i = 0; regset[i] >= 0; i++);
1233 regset[i] = id * 2 + 1;
1234 regset[i + 1] = -1;
1235
1236 /* Pop this submatch from the parents stack. */
1237 for (i = 0; parents[i] >= 0; i++);
1238 parents[i - 1] = -1;
1239 break;
1240 }
1241
1242 case ADDTAGS_RECURSE:
1243 node = tre_stack_pop_voidptr(stack);
1244
1245 if (node->submatch_id >= 0)
1246 {
1247 int id = node->submatch_id;
1248 int i;
1249
1250
1251 /* Add start of this submatch to regset. */
1252 for (i = 0; regset[i] >= 0; i++);
1253 regset[i] = id * 2;
1254 regset[i + 1] = -1;
1255
1256 if (!first_pass)
1257 {
1258 for (i = 0; parents[i] >= 0; i++);
1259 tnfa->submatch_data[id].parents = NULL((void*)0);
1260 if (i > 0)
1261 {
1262 int *p = xmallocmalloc(sizeof(*p) * (i + 1));
1263 if (p == NULL((void*)0))
1264 {
1265 status = REG_ESPACE12;
1266 break;
1267 }
1268 assert(tnfa->submatch_data[id].parents == NULL)(void)0;
1269 tnfa->submatch_data[id].parents = p;
1270 for (i = 0; parents[i] >= 0; i++)
1271 p[i] = parents[i];
1272 p[i] = -1;
1273 }
1274 }
1275
1276 /* Add end of this submatch to regset after processing this
1277 node. */
1278 STACK_PUSHX(stack, int, node->submatch_id){ status = tre_stack_push_int(stack, node->submatch_id); if
(status != 0) break; }
;
1279 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END){ status = tre_stack_push_int(stack, ADDTAGS_SET_SUBMATCH_END
); if (status != 0) break; }
;
1280 }
1281
1282 switch (node->type)
1283 {
1284 case LITERAL:
1285 {
1286 tre_literal_t *lit = node->obj;
1287
1288 if (!IS_SPECIAL(lit)((lit)->code_min < 0) || IS_BACKREF(lit)((lit)->code_min == -4))
1289 {
1290 int i;
1291 if (regset[0] >= 0)
1292 {
1293 /* Regset is not empty, so add a tag before the
1294 literal or backref. */
1295 if (!first_pass)
1296 {
1297 status = tre_add_tag_left(mem, node, tag);
1298 tnfa->tag_directions[tag] = direction;
1299 if (minimal_tag >= 0)
1300 {
1301 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1302 tnfa->minimal_tags[i] = tag;
1303 tnfa->minimal_tags[i + 1] = minimal_tag;
1304 tnfa->minimal_tags[i + 2] = -1;
1305 minimal_tag = -1;
1306 num_minimals++;
1307 }
1308 tre_purge_regset(regset, tnfa, tag);
1309 }
1310 else
1311 {
1312 node->num_tags = 1;
1313 }
1314
1315 regset[0] = -1;
1316 tag = next_tag;
1317 num_tags++;
1318 next_tag++;
1319 }
1320 }
1321 else
1322 {
1323 assert(!IS_TAG(lit))(void)0;
1324 }
1325 break;
1326 }
1327 case CATENATION:
1328 {
1329 tre_catenation_t *cat = node->obj;
1330 tre_ast_node_t *left = cat->left;
1331 tre_ast_node_t *right = cat->right;
1332 int reserved_tag = -1;
1333
1334
1335 /* After processing right child. */
1336 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1337 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_RIGHT)
; if (status != 0) break; }
;
1338
1339 /* Process right child. */
1340 STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status !=
0) break; }
;
1341 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1342
1343 /* After processing left child. */
1344 STACK_PUSHX(stack, int, next_tag + left->num_tags){ status = tre_stack_push_int(stack, next_tag + left->num_tags
); if (status != 0) break; }
;
1345 if (left->num_tags > 0 && right->num_tags > 0)
1346 {
1347 /* Reserve the next tag to the right child. */
1348 reserved_tag = next_tag;
1349 next_tag++;
1350 }
1351 STACK_PUSHX(stack, int, reserved_tag){ status = tre_stack_push_int(stack, reserved_tag); if (status
!= 0) break; }
;
1352 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_LEFT);
if (status != 0) break; }
;
1353
1354 /* Process left child. */
1355 STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status !=
0) break; }
;
1356 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1357
1358 }
1359 break;
1360 case ITERATION:
1361 {
1362 tre_iteration_t *iter = node->obj;
1363
1364 if (first_pass)
1365 {
1366 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal){ status = tre_stack_push_int(stack, regset[0] >= 0 || iter
->minimal); if (status != 0) break; }
;
1367 }
1368 else
1369 {
1370 STACK_PUSHX(stack, int, tag){ status = tre_stack_push_int(stack, tag); if (status != 0) break
; }
;
1371 STACK_PUSHX(stack, int, iter->minimal){ status = tre_stack_push_int(stack, iter->minimal); if (status
!= 0) break; }
;
1372 }
1373 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1374 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_ITERATION)
; if (status != 0) break; }
;
1375
1376 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
1377 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1378
1379 /* Regset is not empty, so add a tag here. */
1380 if (regset[0] >= 0 || iter->minimal)
1381 {
1382 if (!first_pass)
1383 {
1384 int i;
1385 status = tre_add_tag_left(mem, node, tag);
1386 if (iter->minimal)
1387 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1388 else
1389 tnfa->tag_directions[tag] = direction;
1390 if (minimal_tag >= 0)
1391 {
1392 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1393 tnfa->minimal_tags[i] = tag;
1394 tnfa->minimal_tags[i + 1] = minimal_tag;
1395 tnfa->minimal_tags[i + 2] = -1;
1396 minimal_tag = -1;
1397 num_minimals++;
1398 }
1399 tre_purge_regset(regset, tnfa, tag);
1400 }
1401
1402 regset[0] = -1;
1403 tag = next_tag;
1404 num_tags++;
1405 next_tag++;
1406 }
1407 direction = TRE_TAG_MINIMIZE;
1408 }
1409 break;
1410 case UNION:
1411 {
1412 tre_union_t *uni = node->obj;
1413 tre_ast_node_t *left = uni->left;
1414 tre_ast_node_t *right = uni->right;
1415 int left_tag;
1416 int right_tag;
1417
1418 if (regset[0] >= 0)
1419 {
1420 left_tag = next_tag;
1421 right_tag = next_tag + 1;
1422 }
1423 else
1424 {
1425 left_tag = tag;
1426 right_tag = next_tag;
1427 }
1428
1429 /* After processing right child. */
1430 STACK_PUSHX(stack, int, right_tag){ status = tre_stack_push_int(stack, right_tag); if (status !=
0) break; }
;
1431 STACK_PUSHX(stack, int, left_tag){ status = tre_stack_push_int(stack, left_tag); if (status !=
0) break; }
;
1432 STACK_PUSHX(stack, voidptr, regset){ status = tre_stack_push_voidptr(stack, regset); if (status !=
0) break; }
;
1433 STACK_PUSHX(stack, int, regset[0] >= 0){ status = tre_stack_push_int(stack, regset[0] >= 0); if (
status != 0) break; }
;
1434 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1435 STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status !=
0) break; }
;
1436 STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status !=
0) break; }
;
1437 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_RIGHT
); if (status != 0) break; }
;
1438
1439 /* Process right child. */
1440 STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status !=
0) break; }
;
1441 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1442
1443 /* After processing left child. */
1444 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_LEFT
); if (status != 0) break; }
;
1445
1446 /* Process left child. */
1447 STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status !=
0) break; }
;
1448 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1449
1450 /* Regset is not empty, so add a tag here. */
1451 if (regset[0] >= 0)
1452 {
1453 if (!first_pass)
1454 {
1455 int i;
1456 status = tre_add_tag_left(mem, node, tag);
1457 tnfa->tag_directions[tag] = direction;
1458 if (minimal_tag >= 0)
1459 {
1460 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1461 tnfa->minimal_tags[i] = tag;
1462 tnfa->minimal_tags[i + 1] = minimal_tag;
1463 tnfa->minimal_tags[i + 2] = -1;
1464 minimal_tag = -1;
1465 num_minimals++;
1466 }
1467 tre_purge_regset(regset, tnfa, tag);
1468 }
1469
1470 regset[0] = -1;
1471 tag = next_tag;
1472 num_tags++;
1473 next_tag++;
1474 }
1475
1476 if (node->num_submatches > 0)
1477 {
1478 /* The next two tags are reserved for markers. */
1479 next_tag++;
1480 tag = next_tag;
1481 next_tag++;
1482 }
1483
1484 break;
1485 }
1486 }
1487
1488 if (node->submatch_id >= 0)
1489 {
1490 int i;
1491 /* Push this submatch on the parents stack. */
1492 for (i = 0; parents[i] >= 0; i++);
1493 parents[i] = node->submatch_id;
1494 parents[i + 1] = -1;
1495 }
1496
1497 break; /* end case: ADDTAGS_RECURSE */
1498
1499 case ADDTAGS_AFTER_ITERATION:
1500 {
1501 int minimal = 0;
1502 int enter_tag;
1503 node = tre_stack_pop_voidptr(stack);
1504 if (first_pass)
1505 {
1506 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1507 + tre_stack_pop_int(stack);
1508 minimal_tag = -1;
1509 }
1510 else
1511 {
1512 minimal = tre_stack_pop_int(stack);
1513 enter_tag = tre_stack_pop_int(stack);
1514 if (minimal)
1515 minimal_tag = enter_tag;
1516 }
1517
1518 if (!first_pass)
1519 {
1520 if (minimal)
1521 direction = TRE_TAG_MINIMIZE;
1522 else
1523 direction = TRE_TAG_MAXIMIZE;
1524 }
1525 break;
1526 }
1527
1528 case ADDTAGS_AFTER_CAT_LEFT:
1529 {
1530 int new_tag = tre_stack_pop_int(stack);
1531 next_tag = tre_stack_pop_int(stack);
1532 if (new_tag >= 0)
1533 {
1534 tag = new_tag;
1535 }
1536 break;
1537 }
1538
1539 case ADDTAGS_AFTER_CAT_RIGHT:
1540 node = tre_stack_pop_voidptr(stack);
1541 if (first_pass)
1542 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1543 + ((tre_catenation_t *)node->obj)->right->num_tags;
1544 break;
1545
1546 case ADDTAGS_AFTER_UNION_LEFT:
1547 /* Lift the bottom of the `regset' array so that when processing
1548 the right operand the items currently in the array are
1549 invisible. The original bottom was saved at ADDTAGS_UNION and
1550 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1551 while (*regset >= 0)
1552 regset++;
1553 break;
1554
1555 case ADDTAGS_AFTER_UNION_RIGHT:
1556 {
1557 int added_tags, tag_left, tag_right;
1558 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1559 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1560 node = tre_stack_pop_voidptr(stack);
1561 added_tags = tre_stack_pop_int(stack);
1562 if (first_pass)
1563 {
1564 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1565 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1566 + ((node->num_submatches > 0) ? 2 : 0);
1567 }
1568 regset = tre_stack_pop_voidptr(stack);
1569 tag_left = tre_stack_pop_int(stack);
1570 tag_right = tre_stack_pop_int(stack);
1571
1572 /* Add tags after both children, the left child gets a smaller
1573 tag than the right child. This guarantees that we prefer
1574 the left child over the right child. */
1575 /* XXX - This is not always necessary (if the children have
1576 tags which must be seen for every match of that child). */
1577 /* XXX - Check if this is the only place where tre_add_tag_right
1578 is used. If so, use tre_add_tag_left (putting the tag before
1579 the child as opposed after the child) and throw away
1580 tre_add_tag_right. */
1581 if (node->num_submatches > 0)
1582 {
1583 if (!first_pass)
1584 {
1585 status = tre_add_tag_right(mem, left, tag_left);
Value stored to 'status' is never read
1586 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1587 status = tre_add_tag_right(mem, right, tag_right);
1588 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1589 }
1590 num_tags += 2;
1591 }
1592 direction = TRE_TAG_MAXIMIZE;
1593 break;
1594 }
1595
1596 default:
1597 assert(0)(void)0;
1598 break;
1599
1600 } /* end switch(symbol) */
1601 } /* end while(tre_stack_num_objects(stack) > bottom) */
1602
1603 if (!first_pass)
1604 tre_purge_regset(regset, tnfa, tag);
1605
1606 if (!first_pass && minimal_tag >= 0)
1607 {
1608 int i;
1609 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1610 tnfa->minimal_tags[i] = tag;
1611 tnfa->minimal_tags[i + 1] = minimal_tag;
1612 tnfa->minimal_tags[i + 2] = -1;
1613 minimal_tag = -1;
1614 num_minimals++;
1615 }
1616
1617 assert(tree->num_tags == num_tags)(void)0;
1618 tnfa->end_tag = num_tags;
1619 tnfa->num_tags = num_tags;
1620 tnfa->num_minimals = num_minimals;
1621 xfreefree(orig_regset);
1622 xfreefree(parents);
1623 xfreefree(saved_states);
1624 return status;
1625}
1626
1627
1628
1629/*
1630 AST to TNFA compilation routines.
1631*/
1632
1633typedef enum {
1634 COPY_RECURSE,
1635 COPY_SET_RESULT_PTR
1636} tre_copyast_symbol_t;
1637
1638/* Flags for tre_copy_ast(). */
1639#define COPY_REMOVE_TAGS1 1
1640#define COPY_MAXIMIZE_FIRST_TAG2 2
1641
1642static reg_errcode_t
1643tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1644 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1645 tre_ast_node_t **copy, int *max_pos)
1646{
1647 reg_errcode_t status = REG_OK0;
1648 int bottom = tre_stack_num_objects(stack);
1649 int num_copied = 0;
1650 int first_tag = 1;
1651 tre_ast_node_t **result = copy;
1652 tre_copyast_symbol_t symbol;
1653
1654 STACK_PUSH(stack, voidptr, ast)do { status = tre_stack_push_voidptr(stack, ast); } while ( 0
)
;
1655 STACK_PUSH(stack, int, COPY_RECURSE)do { status = tre_stack_push_int(stack, COPY_RECURSE); } while
( 0)
;
1656
1657 while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1658 {
1659 tre_ast_node_t *node;
1660 if (status != REG_OK0)
1661 break;
1662
1663 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1664 switch (symbol)
1665 {
1666 case COPY_SET_RESULT_PTR:
1667 result = tre_stack_pop_voidptr(stack);
1668 break;
1669 case COPY_RECURSE:
1670 node = tre_stack_pop_voidptr(stack);
1671 switch (node->type)
1672 {
1673 case LITERAL:
1674 {
1675 tre_literal_t *lit = node->obj;
1676 int pos = lit->position;
1677 int min = lit->code_min;
1678 int max = lit->code_max;
1679 if (!IS_SPECIAL(lit)((lit)->code_min < 0) || IS_BACKREF(lit)((lit)->code_min == -4))
1680 {
1681 /* XXX - e.g. [ab] has only one position but two
1682 nodes, so we are creating holes in the state space
1683 here. Not fatal, just wastes memory. */
1684 pos += *pos_add;
1685 num_copied++;
1686 }
1687 else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_REMOVE_TAGS1))
1688 {
1689 /* Change this tag to empty. */
1690 min = EMPTY-1;
1691 max = pos = -1;
1692 }
1693 else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_MAXIMIZE_FIRST_TAG2)
1694 && first_tag)
1695 {
1696 /* Maximize the first tag. */
1697 tag_directions[max] = TRE_TAG_MAXIMIZE;
1698 first_tag = 0;
1699 }
1700 *result = tre_ast_new_literal(mem, min, max, pos);
1701 if (*result == NULL((void*)0))
1702 status = REG_ESPACE12;
1703 else {
1704 tre_literal_t *p = (*result)->obj;
1705 p->class = lit->class;
1706 p->neg_classes = lit->neg_classes;
1707 }
1708
1709 if (pos > *max_pos)
1710 *max_pos = pos;
1711 break;
1712 }
1713 case UNION:
1714 {
1715 tre_union_t *uni = node->obj;
1716 tre_union_t *tmp;
1717 *result = tre_ast_new_union(mem, uni->left, uni->right);
1718 if (*result == NULL((void*)0))
1719 {
1720 status = REG_ESPACE12;
1721 break;
1722 }
1723 tmp = (*result)->obj;
1724 result = &tmp->left;
1725 STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if (
status != 0) break; }
;
1726 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1727 STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right);
if (status != 0) break; }
;
1728 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if
(status != 0) break; }
;
1729 STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status
!= 0) break; }
;
1730 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1731 break;
1732 }
1733 case CATENATION:
1734 {
1735 tre_catenation_t *cat = node->obj;
1736 tre_catenation_t *tmp;
1737 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1738 if (*result == NULL((void*)0))
1739 {
1740 status = REG_ESPACE12;
1741 break;
1742 }
1743 tmp = (*result)->obj;
1744 tmp->left = NULL((void*)0);
1745 tmp->right = NULL((void*)0);
1746 result = &tmp->left;
1747
1748 STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if (
status != 0) break; }
;
1749 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1750 STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right);
if (status != 0) break; }
;
1751 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if
(status != 0) break; }
;
1752 STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status
!= 0) break; }
;
1753 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1754 break;
1755 }
1756 case ITERATION:
1757 {
1758 tre_iteration_t *iter = node->obj;
1759 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
1760 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1761 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1762 iter->max, iter->minimal);
1763 if (*result == NULL((void*)0))
1764 {
1765 status = REG_ESPACE12;
1766 break;
1767 }
1768 iter = (*result)->obj;
1769 result = &iter->arg;
1770 break;
1771 }
1772 default:
1773 assert(0)(void)0;
1774 break;
1775 }
1776 break;
1777 }
1778 }
1779 *pos_add += num_copied;
1780 return status;
1781}
1782
1783typedef enum {
1784 EXPAND_RECURSE,
1785 EXPAND_AFTER_ITER
1786} tre_expand_ast_symbol_t;
1787
1788/* Expands each iteration node that has a finite nonzero minimum or maximum
1789 iteration count to a catenated sequence of copies of the node. */
1790static reg_errcode_t
1791tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1792 int *position, tre_tag_direction_t *tag_directions)
1793{
1794 reg_errcode_t status = REG_OK0;
1795 int bottom = tre_stack_num_objects(stack);
1796 int pos_add = 0;
1797 int pos_add_total = 0;
1798 int max_pos = 0;
1799 int iter_depth = 0;
1800
1801 STACK_PUSHR(stack, voidptr, ast){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ast); if (_status != 0) return _status; }
;
1802 STACK_PUSHR(stack, int, EXPAND_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, EXPAND_RECURSE
); if (_status != 0) return _status; }
;
1803 while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1804 {
1805 tre_ast_node_t *node;
1806 tre_expand_ast_symbol_t symbol;
1807
1808 if (status != REG_OK0)
1809 break;
1810
1811 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1812 node = tre_stack_pop_voidptr(stack);
1813 switch (symbol)
1814 {
1815 case EXPAND_RECURSE:
1816 switch (node->type)
1817 {
1818 case LITERAL:
1819 {
1820 tre_literal_t *lit= node->obj;
1821 if (!IS_SPECIAL(lit)((lit)->code_min < 0) || IS_BACKREF(lit)((lit)->code_min == -4))
1822 {
1823 lit->position += pos_add;
1824 if (lit->position > max_pos)
1825 max_pos = lit->position;
1826 }
1827 break;
1828 }
1829 case UNION:
1830 {
1831 tre_union_t *uni = node->obj;
1832 STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if (
status != 0) break; }
;
1833 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1834 STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status
!= 0) break; }
;
1835 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1836 break;
1837 }
1838 case CATENATION:
1839 {
1840 tre_catenation_t *cat = node->obj;
1841 STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if (
status != 0) break; }
;
1842 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1843 STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status
!= 0) break; }
;
1844 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1845 break;
1846 }
1847 case ITERATION:
1848 {
1849 tre_iteration_t *iter = node->obj;
1850 STACK_PUSHX(stack, int, pos_add){ status = tre_stack_push_int(stack, pos_add); if (status != 0
) break; }
;
1851 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1852 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER){ status = tre_stack_push_int(stack, EXPAND_AFTER_ITER); if (
status != 0) break; }
;
1853 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
1854 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1855 /* If we are going to expand this node at EXPAND_AFTER_ITER
1856 then don't increase the `pos' fields of the nodes now, it
1857 will get done when expanding. */
1858 if (iter->min > 1 || iter->max > 1)
1859 pos_add = 0;
1860 iter_depth++;
1861 break;
1862 }
1863 default:
1864 assert(0)(void)0;
1865 break;
1866 }
1867 break;
1868 case EXPAND_AFTER_ITER:
1869 {
1870 tre_iteration_t *iter = node->obj;
1871 int pos_add_last;
1872 pos_add = tre_stack_pop_int(stack);
1873 pos_add_last = pos_add;
1874 if (iter->min > 1 || iter->max > 1)
1875 {
1876 tre_ast_node_t *seq1 = NULL((void*)0), *seq2 = NULL((void*)0);
1877 int j;
1878 int pos_add_save = pos_add;
1879
1880 /* Create a catenated sequence of copies of the node. */
1881 for (j = 0; j < iter->min; j++)
1882 {
1883 tre_ast_node_t *copy;
1884 /* Remove tags from all but the last copy. */
1885 int flags = ((j + 1 < iter->min)
1886 ? COPY_REMOVE_TAGS1
1887 : COPY_MAXIMIZE_FIRST_TAG2);
1888 pos_add_save = pos_add;
1889 status = tre_copy_ast(mem, stack, iter->arg, flags,
1890 &pos_add, tag_directions, &copy,
1891 &max_pos);
1892 if (status != REG_OK0)
1893 return status;
1894 if (seq1 != NULL((void*)0))
1895 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1896 else
1897 seq1 = copy;
1898 if (seq1 == NULL((void*)0))
1899 return REG_ESPACE12;
1900 }
1901
1902 if (iter->max == -1)
1903 {
1904 /* No upper limit. */
1905 pos_add_save = pos_add;
1906 status = tre_copy_ast(mem, stack, iter->arg, 0,
1907 &pos_add, NULL((void*)0), &seq2, &max_pos);
1908 if (status != REG_OK0)
1909 return status;
1910 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1911 if (seq2 == NULL((void*)0))
1912 return REG_ESPACE12;
1913 }
1914 else
1915 {
1916 for (j = iter->min; j < iter->max; j++)
1917 {
1918 tre_ast_node_t *tmp, *copy;
1919 pos_add_save = pos_add;
1920 status = tre_copy_ast(mem, stack, iter->arg, 0,
1921 &pos_add, NULL((void*)0), &copy, &max_pos);
1922 if (status != REG_OK0)
1923 return status;
1924 if (seq2 != NULL((void*)0))
1925 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1926 else
1927 seq2 = copy;
1928 if (seq2 == NULL((void*)0))
1929 return REG_ESPACE12;
1930 tmp = tre_ast_new_literal(mem, EMPTY-1, -1, -1);
1931 if (tmp == NULL((void*)0))
1932 return REG_ESPACE12;
1933 seq2 = tre_ast_new_union(mem, tmp, seq2);
1934 if (seq2 == NULL((void*)0))
1935 return REG_ESPACE12;
1936 }
1937 }
1938
1939 pos_add = pos_add_save;
1940 if (seq1 == NULL((void*)0))
1941 seq1 = seq2;
1942 else if (seq2 != NULL((void*)0))
1943 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1944 if (seq1 == NULL((void*)0))
1945 return REG_ESPACE12;
1946 node->obj = seq1->obj;
1947 node->type = seq1->type;
1948 }
1949
1950 iter_depth--;
1951 pos_add_total += pos_add - pos_add_last;
1952 if (iter_depth == 0)
1953 pos_add = pos_add_total;
1954
1955 break;
1956 }
1957 default:
1958 assert(0)(void)0;
1959 break;
1960 }
1961 }
1962
1963 *position += pos_add_total;
1964
1965 /* `max_pos' should never be larger than `*position' if the above
1966 code works, but just an extra safeguard let's make sure
1967 `*position' is set large enough so enough memory will be
1968 allocated for the transition table. */
1969 if (max_pos > *position)
1970 *position = max_pos;
1971
1972 return status;
1973}
1974
1975static tre_pos_and_tags_t *
1976tre_set_empty(tre_mem_t mem)
1977{
1978 tre_pos_and_tags_t *new_set;
1979
1980 new_set = tre_mem_calloc(mem, sizeof(*new_set))__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof(*new_set));
1981 if (new_set == NULL((void*)0))
1982 return NULL((void*)0);
1983
1984 new_set[0].position = -1;
1985 new_set[0].code_min = -1;
1986 new_set[0].code_max = -1;
1987
1988 return new_set;
1989}
1990
1991static tre_pos_and_tags_t *
1992tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1993 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1994{
1995 tre_pos_and_tags_t *new_set;
1996
1997 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof(*new_set) *
2)
;
1998 if (new_set == NULL((void*)0))
1999 return NULL((void*)0);
2000
2001 new_set[0].position = position;
2002 new_set[0].code_min = code_min;
2003 new_set[0].code_max = code_max;
2004 new_set[0].class = class;
2005 new_set[0].neg_classes = neg_classes;
2006 new_set[0].backref = backref;
2007 new_set[1].position = -1;
2008 new_set[1].code_min = -1;
2009 new_set[1].code_max = -1;
2010
2011 return new_set;
2012}
2013
2014static tre_pos_and_tags_t *
2015tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2016 int *tags, int assertions)
2017{
2018 int s1, s2, i, j;
2019 tre_pos_and_tags_t *new_set;
2020 int *new_tags;
2021 int num_tags;
2022
2023 for (num_tags = 0; tags != NULL((void*)0) && tags[num_tags] >= 0; num_tags++);
2024 for (s1 = 0; set1[s1].position >= 0; s1++);
2025 for (s2 = 0; set2[s2].position >= 0; s2++);
2026 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1))__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof(*new_set) *
(s1 + s2 + 1))
;
2027 if (!new_set )
2028 return NULL((void*)0);
2029
2030 for (s1 = 0; set1[s1].position >= 0; s1++)
2031 {
2032 new_set[s1].position = set1[s1].position;
2033 new_set[s1].code_min = set1[s1].code_min;
2034 new_set[s1].code_max = set1[s1].code_max;
2035 new_set[s1].assertions = set1[s1].assertions | assertions;
2036 new_set[s1].class = set1[s1].class;
2037 new_set[s1].neg_classes = set1[s1].neg_classes;
2038 new_set[s1].backref = set1[s1].backref;
2039 if (set1[s1].tags == NULL((void*)0) && tags == NULL((void*)0))
2040 new_set[s1].tags = NULL((void*)0);
2041 else
2042 {
2043 for (i = 0; set1[s1].tags != NULL((void*)0) && set1[s1].tags[i] >= 0; i++);
2044 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, (sizeof(*new_tags
) * (i + num_tags + 1)))
2045 * (i + num_tags + 1)))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, (sizeof(*new_tags
) * (i + num_tags + 1)))
;
2046 if (new_tags == NULL((void*)0))
2047 return NULL((void*)0);
2048 for (j = 0; j < i; j++)
2049 new_tags[j] = set1[s1].tags[j];
2050 for (i = 0; i < num_tags; i++)
2051 new_tags[j + i] = tags[i];
2052 new_tags[j + i] = -1;
2053 new_set[s1].tags = new_tags;
2054 }
2055 }
2056
2057 for (s2 = 0; set2[s2].position >= 0; s2++)
2058 {
2059 new_set[s1 + s2].position = set2[s2].position;
2060 new_set[s1 + s2].code_min = set2[s2].code_min;
2061 new_set[s1 + s2].code_max = set2[s2].code_max;
2062 /* XXX - why not | assertions here as well? */
2063 new_set[s1 + s2].assertions = set2[s2].assertions;
2064 new_set[s1 + s2].class = set2[s2].class;
2065 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2066 new_set[s1 + s2].backref = set2[s2].backref;
2067 if (set2[s2].tags == NULL((void*)0))
2068 new_set[s1 + s2].tags = NULL((void*)0);
2069 else
2070 {
2071 for (i = 0; set2[s2].tags[i] >= 0; i++);
2072 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*new_tags)
* (i + 1))
;
2073 if (new_tags == NULL((void*)0))
2074 return NULL((void*)0);
2075 for (j = 0; j < i; j++)
2076 new_tags[j] = set2[s2].tags[j];
2077 new_tags[j] = -1;
2078 new_set[s1 + s2].tags = new_tags;
2079 }
2080 }
2081 new_set[s1 + s2].position = -1;
2082 return new_set;
2083}
2084
2085/* Finds the empty path through `node' which is the one that should be
2086 taken according to POSIX.2 rules, and adds the tags on that path to
2087 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2088 set to the number of tags seen on the path. */
2089static reg_errcode_t
2090tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2091 int *assertions, int *num_tags_seen)
2092{
2093 tre_literal_t *lit;
2094 tre_union_t *uni;
2095 tre_catenation_t *cat;
2096 tre_iteration_t *iter;
2097 int i;
2098 int bottom = tre_stack_num_objects(stack);
2099 reg_errcode_t status = REG_OK0;
2100 if (num_tags_seen)
2101 *num_tags_seen = 0;
2102
2103 status = tre_stack_push_voidptr(stack, node);
2104
2105 /* Walk through the tree recursively. */
2106 while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
2107 {
2108 node = tre_stack_pop_voidptr(stack);
2109
2110 switch (node->type)
2111 {
2112 case LITERAL:
2113 lit = (tre_literal_t *)node->obj;
2114 switch (lit->code_min)
2115 {
2116 case TAG-3:
2117 if (lit->code_max >= 0)
2118 {
2119 if (tags != NULL((void*)0))
2120 {
2121 /* Add the tag to `tags'. */
2122 for (i = 0; tags[i] >= 0; i++)
2123 if (tags[i] == lit->code_max)
2124 break;
2125 if (tags[i] < 0)
2126 {
2127 tags[i] = lit->code_max;
2128 tags[i + 1] = -1;
2129 }
2130 }
2131 if (num_tags_seen)
2132 (*num_tags_seen)++;
2133 }
2134 break;
2135 case ASSERTION-2:
2136 assert(lit->code_max >= 1(void)0
2137 || lit->code_max <= ASSERT_LAST)(void)0;
2138 if (assertions != NULL((void*)0))
2139 *assertions |= lit->code_max;
2140 break;
2141 case EMPTY-1:
2142 break;
2143 default:
2144 assert(0)(void)0;
2145 break;
2146 }
2147 break;
2148
2149 case UNION:
2150 /* Subexpressions starting earlier take priority over ones
2151 starting later, so we prefer the left subexpression over the
2152 right subexpression. */
2153 uni = (tre_union_t *)node->obj;
2154 if (uni->left->nullable)
2155 STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status
!= 0) break; }
2156 else if (uni->right->nullable)
2157 STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if (
status != 0) break; }
2158 else
2159 assert(0)(void)0;
2160 break;
2161
2162 case CATENATION:
2163 /* The path must go through both children. */
2164 cat = (tre_catenation_t *)node->obj;
2165 assert(cat->left->nullable)(void)0;
2166 assert(cat->right->nullable)(void)0;
2167 STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status
!= 0) break; }
;
2168 STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if (
status != 0) break; }
;
2169 break;
2170
2171 case ITERATION:
2172 /* A match with an empty string is preferred over no match at
2173 all, so we go through the argument if possible. */
2174 iter = (tre_iteration_t *)node->obj;
2175 if (iter->arg->nullable)
2176 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
2177 break;
2178
2179 default:
2180 assert(0)(void)0;
2181 break;
2182 }
2183 }
2184
2185 return status;
2186}
2187
2188
2189typedef enum {
2190 NFL_RECURSE,
2191 NFL_POST_UNION,
2192 NFL_POST_CATENATION,
2193 NFL_POST_ITERATION
2194} tre_nfl_stack_symbol_t;
2195
2196
2197/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2198 the nodes of the AST `tree'. */
2199static reg_errcode_t
2200tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2201{
2202 int bottom = tre_stack_num_objects(stack);
2203
2204 STACK_PUSHR(stack, voidptr, tree){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, tree); if (_status != 0) return _status; }
;
2205 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2206
2207 while (tre_stack_num_objects(stack) > bottom)
2208 {
2209 tre_nfl_stack_symbol_t symbol;
2210 tre_ast_node_t *node;
2211
2212 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2213 node = tre_stack_pop_voidptr(stack);
2214 switch (symbol)
2215 {
2216 case NFL_RECURSE:
2217 switch (node->type)
2218 {
2219 case LITERAL:
2220 {
2221 tre_literal_t *lit = (tre_literal_t *)node->obj;
2222 if (IS_BACKREF(lit)((lit)->code_min == -4))
2223 {
2224 /* Back references: nullable = false, firstpos = {i},
2225 lastpos = {i}. */
2226 node->nullable = 0;
2227 node->firstpos = tre_set_one(mem, lit->position, 0,
2228 TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0), -1);
2229 if (!node->firstpos)
2230 return REG_ESPACE12;
2231 node->lastpos = tre_set_one(mem, lit->position, 0,
2232 TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0),
2233 (int)lit->code_max);
2234 if (!node->lastpos)
2235 return REG_ESPACE12;
2236 }
2237 else if (lit->code_min < 0)
2238 {
2239 /* Tags, empty strings, params, and zero width assertions:
2240 nullable = true, firstpos = {}, and lastpos = {}. */
2241 node->nullable = 1;
2242 node->firstpos = tre_set_empty(mem);
2243 if (!node->firstpos)
2244 return REG_ESPACE12;
2245 node->lastpos = tre_set_empty(mem);
2246 if (!node->lastpos)
2247 return REG_ESPACE12;
2248 }
2249 else
2250 {
2251 /* Literal at position i: nullable = false, firstpos = {i},
2252 lastpos = {i}. */
2253 node->nullable = 0;
2254 node->firstpos =
2255 tre_set_one(mem, lit->position, (int)lit->code_min,
2256 (int)lit->code_max, 0, NULL((void*)0), -1);
2257 if (!node->firstpos)
2258 return REG_ESPACE12;
2259 node->lastpos = tre_set_one(mem, lit->position,
2260 (int)lit->code_min,
2261 (int)lit->code_max,
2262 lit->class, lit->neg_classes,
2263 -1);
2264 if (!node->lastpos)
2265 return REG_ESPACE12;
2266 }
2267 break;
2268 }
2269
2270 case UNION:
2271 /* Compute the attributes for the two subtrees, and after that
2272 for this node. */
2273 STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, node); if (_status != 0) return _status; }
;
2274 STACK_PUSHR(stack, int, NFL_POST_UNION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_UNION
); if (_status != 0) return _status; }
;
2275 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_union_t *)node->obj)->right); if (_status != 0)
return _status; }
;
2276 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2277 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_union_t *)node->obj)->left); if (_status != 0) return
_status; }
;
2278 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2279 break;
2280
2281 case CATENATION:
2282 /* Compute the attributes for the two subtrees, and after that
2283 for this node. */
2284 STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, node); if (_status != 0) return _status; }
;
2285 STACK_PUSHR(stack, int, NFL_POST_CATENATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_CATENATION
); if (_status != 0) return _status; }
;
2286 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_catenation_t *)node->obj)->right); if (_status !=
0) return _status; }
;
2287 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2288 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_catenation_t *)node->obj)->left); if (_status !=
0) return _status; }
;
2289 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2290 break;
2291
2292 case ITERATION:
2293 /* Compute the attributes for the subtree, and after that for
2294 this node. */
2295 STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, node); if (_status != 0) return _status; }
;
2296 STACK_PUSHR(stack, int, NFL_POST_ITERATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_ITERATION
); if (_status != 0) return _status; }
;
2297 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_iteration_t *)node->obj)->arg); if (_status != 0
) return _status; }
;
2298 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2299 break;
2300 }
2301 break; /* end case: NFL_RECURSE */
2302
2303 case NFL_POST_UNION:
2304 {
2305 tre_union_t *uni = (tre_union_t *)node->obj;
2306 node->nullable = uni->left->nullable || uni->right->nullable;
2307 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2308 uni->right->firstpos, NULL((void*)0), 0);
2309 if (!node->firstpos)
2310 return REG_ESPACE12;
2311 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2312 uni->right->lastpos, NULL((void*)0), 0);
2313 if (!node->lastpos)
2314 return REG_ESPACE12;
2315 break;
2316 }
2317
2318 case NFL_POST_ITERATION:
2319 {
2320 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2321
2322 if (iter->min == 0 || iter->arg->nullable)
2323 node->nullable = 1;
2324 else
2325 node->nullable = 0;
2326 node->firstpos = iter->arg->firstpos;
2327 node->lastpos = iter->arg->lastpos;
2328 break;
2329 }
2330
2331 case NFL_POST_CATENATION:
2332 {
2333 int num_tags, *tags, assertions;
2334 reg_errcode_t status;
2335 tre_catenation_t *cat = node->obj;
2336 node->nullable = cat->left->nullable && cat->right->nullable;
2337
2338 /* Compute firstpos. */
2339 if (cat->left->nullable)
2340 {
2341 /* The left side matches the empty string. Make a first pass
2342 with tre_match_empty() to get the number of tags and
2343 parameters. */
2344 status = tre_match_empty(stack, cat->left,
2345 NULL((void*)0), NULL((void*)0), &num_tags);
2346 if (status != REG_OK0)
2347 return status;
2348 /* Allocate arrays for the tags and parameters. */
2349 tags = xmallocmalloc(sizeof(*tags) * (num_tags + 1));
2350 if (!tags)
2351 return REG_ESPACE12;
2352 tags[0] = -1;
2353 assertions = 0;
2354 /* Second pass with tre_mach_empty() to get the list of
2355 tags and parameters. */
2356 status = tre_match_empty(stack, cat->left, tags,
2357 &assertions, NULL((void*)0));
2358 if (status != REG_OK0)
2359 {
2360 xfreefree(tags);
2361 return status;
2362 }
2363 node->firstpos =
2364 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2365 tags, assertions);
2366 xfreefree(tags);
2367 if (!node->firstpos)
2368 return REG_ESPACE12;
2369 }
2370 else
2371 {
2372 node->firstpos = cat->left->firstpos;
2373 }
2374
2375 /* Compute lastpos. */
2376 if (cat->right->nullable)
2377 {
2378 /* The right side matches the empty string. Make a first pass
2379 with tre_match_empty() to get the number of tags and
2380 parameters. */
2381 status = tre_match_empty(stack, cat->right,
2382 NULL((void*)0), NULL((void*)0), &num_tags);
2383 if (status != REG_OK0)
2384 return status;
2385 /* Allocate arrays for the tags and parameters. */
2386 tags = xmallocmalloc(sizeof(int) * (num_tags + 1));
2387 if (!tags)
2388 return REG_ESPACE12;
2389 tags[0] = -1;
2390 assertions = 0;
2391 /* Second pass with tre_mach_empty() to get the list of
2392 tags and parameters. */
2393 status = tre_match_empty(stack, cat->right, tags,
2394 &assertions, NULL((void*)0));
2395 if (status != REG_OK0)
2396 {
2397 xfreefree(tags);
2398 return status;
2399 }
2400 node->lastpos =
2401 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2402 tags, assertions);
2403 xfreefree(tags);
2404 if (!node->lastpos)
2405 return REG_ESPACE12;
2406 }
2407 else
2408 {
2409 node->lastpos = cat->right->lastpos;
2410 }
2411 break;
2412 }
2413
2414 default:
2415 assert(0)(void)0;
2416 break;
2417 }
2418 }
2419
2420 return REG_OK0;
2421}
2422
2423
2424/* Adds a transition from each position in `p1' to each position in `p2'. */
2425static reg_errcode_t
2426tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2427 tre_tnfa_transition_t *transitions,
2428 int *counts, int *offs)
2429{
2430 tre_pos_and_tags_t *orig_p2 = p2;
2431 tre_tnfa_transition_t *trans;
2432 int i, j, k, l, dup, prev_p2_pos;
2433
2434 if (transitions != NULL((void*)0))
2435 while (p1->position >= 0)
2436 {
2437 p2 = orig_p2;
2438 prev_p2_pos = -1;
2439 while (p2->position >= 0)
2440 {
2441 /* Optimization: if this position was already handled, skip it. */
2442 if (p2->position == prev_p2_pos)
2443 {
2444 p2++;
2445 continue;
2446 }
2447 prev_p2_pos = p2->position;
2448 /* Set `trans' to point to the next unused transition from
2449 position `p1->position'. */
2450 trans = transitions + offs[p1->position];
2451 while (trans->state != NULL((void*)0))
2452 {
2453#if 0
2454 /* If we find a previous transition from `p1->position' to
2455 `p2->position', it is overwritten. This can happen only
2456 if there are nested loops in the regexp, like in "((a)*)*".
2457 In POSIX.2 repetition using the outer loop is always
2458 preferred over using the inner loop. Therefore the
2459 transition for the inner loop is useless and can be thrown
2460 away. */
2461 /* XXX - The same position is used for all nodes in a bracket
2462 expression, so this optimization cannot be used (it will
2463 break bracket expressions) unless I figure out a way to
2464 detect it here. */
2465 if (trans->state_id == p2->position)
2466 {
2467 break;
2468 }
2469#endif
2470 trans++;
2471 }
2472
2473 if (trans->state == NULL((void*)0))
2474 (trans + 1)->state = NULL((void*)0);
2475 /* Use the character ranges, assertions, etc. from `p1' for
2476 the transition from `p1' to `p2'. */
2477 trans->code_min = p1->code_min;
2478 trans->code_max = p1->code_max;
2479 trans->state = transitions + offs[p2->position];
2480 trans->state_id = p2->position;
2481 trans->assertions = p1->assertions | p2->assertions
2482 | (p1->class ? ASSERT_CHAR_CLASS4 : 0)
2483 | (p1->neg_classes != NULL((void*)0) ? ASSERT_CHAR_CLASS_NEG8 : 0);
2484 if (p1->backref >= 0)
2485 {
2486 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0)(void)0;
2487 assert(p2->backref < 0)(void)0;
2488 trans->u.backref = p1->backref;
2489 trans->assertions |= ASSERT_BACKREF256;
2490 }
2491 else
2492 trans->u.class = p1->class;
2493 if (p1->neg_classes != NULL((void*)0))
2494 {
2495 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2496 trans->neg_classes =
2497 xmallocmalloc(sizeof(*trans->neg_classes) * (i + 1));
2498 if (trans->neg_classes == NULL((void*)0))
2499 return REG_ESPACE12;
2500 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2501 trans->neg_classes[i] = p1->neg_classes[i];
2502 trans->neg_classes[i] = (tre_ctype_t)0;
2503 }
2504 else
2505 trans->neg_classes = NULL((void*)0);
2506
2507 /* Find out how many tags this transition has. */
2508 i = 0;
2509 if (p1->tags != NULL((void*)0))
2510 while(p1->tags[i] >= 0)
2511 i++;
2512 j = 0;
2513 if (p2->tags != NULL((void*)0))
2514 while(p2->tags[j] >= 0)
2515 j++;
2516
2517 /* If we are overwriting a transition, free the old tag array. */
2518 if (trans->tags != NULL((void*)0))
2519 xfreefree(trans->tags);
2520 trans->tags = NULL((void*)0);
2521
2522 /* If there were any tags, allocate an array and fill it. */
2523 if (i + j > 0)
2524 {
2525 trans->tags = xmallocmalloc(sizeof(*trans->tags) * (i + j + 1));
2526 if (!trans->tags)
2527 return REG_ESPACE12;
2528 i = 0;
2529 if (p1->tags != NULL((void*)0))
2530 while(p1->tags[i] >= 0)
2531 {
2532 trans->tags[i] = p1->tags[i];
2533 i++;
2534 }
2535 l = i;
2536 j = 0;
2537 if (p2->tags != NULL((void*)0))
2538 while (p2->tags[j] >= 0)
2539 {
2540 /* Don't add duplicates. */
2541 dup = 0;
2542 for (k = 0; k < i; k++)
2543 if (trans->tags[k] == p2->tags[j])
2544 {
2545 dup = 1;
2546 break;
2547 }
2548 if (!dup)
2549 trans->tags[l++] = p2->tags[j];
2550 j++;
2551 }
2552 trans->tags[l] = -1;
2553 }
2554
2555 p2++;
2556 }
2557 p1++;
2558 }
2559 else
2560 /* Compute a maximum limit for the number of transitions leaving
2561 from each state. */
2562 while (p1->position >= 0)
2563 {
2564 p2 = orig_p2;
2565 while (p2->position >= 0)
2566 {
2567 counts[p1->position]++;
2568 p2++;
2569 }
2570 p1++;
2571 }
2572 return REG_OK0;
2573}
2574
2575/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2576 labelled with one character range (there are no transitions on empty
2577 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2578 the regexp. */
2579static reg_errcode_t
2580tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2581 int *counts, int *offs)
2582{
2583 tre_union_t *uni;
2584 tre_catenation_t *cat;
2585 tre_iteration_t *iter;
2586 reg_errcode_t errcode = REG_OK0;
2587
2588 /* XXX - recurse using a stack!. */
2589 switch (node->type)
2590 {
2591 case LITERAL:
2592 break;
2593 case UNION:
2594 uni = (tre_union_t *)node->obj;
2595 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2596 if (errcode != REG_OK0)
2597 return errcode;
2598 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2599 break;
2600
2601 case CATENATION:
2602 cat = (tre_catenation_t *)node->obj;
2603 /* Add a transition from each position in cat->left->lastpos
2604 to each position in cat->right->firstpos. */
2605 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2606 transitions, counts, offs);
2607 if (errcode != REG_OK0)
2608 return errcode;
2609 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2610 if (errcode != REG_OK0)
2611 return errcode;
2612 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2613 break;
2614
2615 case ITERATION:
2616 iter = (tre_iteration_t *)node->obj;
2617 assert(iter->max == -1 || iter->max == 1)(void)0;
2618
2619 if (iter->max == -1)
2620 {
2621 assert(iter->min == 0 || iter->min == 1)(void)0;
2622 /* Add a transition from each last position in the iterated
2623 expression to each first position. */
2624 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2625 transitions, counts, offs);
2626 if (errcode != REG_OK0)
2627 return errcode;
2628 }
2629 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2630 break;
2631 }
2632 return errcode;
2633}
2634
2635
2636#define ERROR_EXIT(err)do { errcode = err; if ( 1) goto error_exit; } while ( 0) \
2637 do \
2638 { \
2639 errcode = err; \
2640 if (/*CONSTCOND*/1) \
2641 goto error_exit; \
2642 } \
2643 while (/*CONSTCOND*/0)
2644
2645
2646int
2647regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2648{
2649 tre_stack_t *stack;
2650 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2651 tre_pos_and_tags_t *p;
2652 int *counts = NULL((void*)0), *offs = NULL((void*)0);
2653 int i, add = 0;
2654 tre_tnfa_transition_t *transitions, *initial;
2655 tre_tnfa_t *tnfa = NULL((void*)0);
2656 tre_submatch_data_t *submatch_data;
2657 tre_tag_direction_t *tag_directions = NULL((void*)0);
2658 reg_errcode_t errcode;
2659 tre_mem_t mem;
2660
2661 /* Parse context. */
2662 tre_parse_ctx_t parse_ctx;
2663
2664 /* Allocate a stack used throughout the compilation process for various
2665 purposes. */
2666 stack = tre_stack_new(512, 10240, 128);
2667 if (!stack)
2668 return REG_ESPACE12;
2669 /* Allocate a fast memory allocator. */
2670 mem = tre_mem_new()__tre_mem_new_impl(0, ((void*)0));
2671 if (!mem)
2672 {
2673 tre_stack_destroy(stack);
2674 return REG_ESPACE12;
2675 }
2676
2677 /* Parse the regexp. */
2678 memset(&parse_ctx, 0, sizeof(parse_ctx));
2679 parse_ctx.mem = mem;
2680 parse_ctx.stack = stack;
2681 parse_ctx.re = regex;
2682 parse_ctx.cflags = cflags;
2683 parse_ctx.max_backref = -1;
2684 errcode = tre_parse(&parse_ctx);
2685 if (errcode != REG_OK0)
2686 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2687 preg->re_nsub = parse_ctx.submatch_id - 1;
2688 tree = parse_ctx.n;
2689
2690#ifdef TRE_DEBUG
2691 tre_ast_print(tree);
2692#endif /* TRE_DEBUG */
2693
2694 /* Referring to nonexistent subexpressions is illegal. */
2695 if (parse_ctx.max_backref > (int)preg->re_nsub)
2696 ERROR_EXIT(REG_ESUBREG)do { errcode = 6; if ( 1) goto error_exit; } while ( 0);
2697
2698 /* Allocate the TNFA struct. */
2699 tnfa = xcalloccalloc(1, sizeof(tre_tnfa_t));
2700 if (tnfa == NULL((void*)0))
2701 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2702 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2703 tnfa->have_approx = 0;
2704 tnfa->num_submatches = parse_ctx.submatch_id;
2705
2706 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2707 regexp does not have back references, this can be skipped. */
2708 if (tnfa->have_backrefs || !(cflags & REG_NOSUB8))
2709 {
2710
2711 /* Figure out how many tags we will need. */
2712 errcode = tre_add_tags(NULL((void*)0), stack, tree, tnfa);
2713 if (errcode != REG_OK0)
2714 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2715
2716 if (tnfa->num_tags > 0)
2717 {
2718 tag_directions = xmallocmalloc(sizeof(*tag_directions)
2719 * (tnfa->num_tags + 1));
2720 if (tag_directions == NULL((void*)0))
2721 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2722 tnfa->tag_directions = tag_directions;
2723 memset(tag_directions, -1,
2724 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2725 }
2726 tnfa->minimal_tags = xcalloccalloc((unsigned)tnfa->num_tags * 2 + 1,
2727 sizeof(*tnfa->minimal_tags));
2728 if (tnfa->minimal_tags == NULL((void*)0))
2729 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2730
2731 submatch_data = xcalloccalloc((unsigned)parse_ctx.submatch_id,
2732 sizeof(*submatch_data));
2733 if (submatch_data == NULL((void*)0))
2734 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2735 tnfa->submatch_data = submatch_data;
2736
2737 errcode = tre_add_tags(mem, stack, tree, tnfa);
2738 if (errcode != REG_OK0)
2739 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2740
2741 }
2742
2743 /* Expand iteration nodes. */
2744 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2745 tag_directions);
2746 if (errcode != REG_OK0)
2747 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2748
2749 /* Add a dummy node for the final state.
2750 XXX - For certain patterns this dummy node can be optimized away,
2751 for example "a*" or "ab*". Figure out a simple way to detect
2752 this possibility. */
2753 tmp_ast_l = tree;
2754 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2755 if (tmp_ast_r == NULL((void*)0))
2756 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2757
2758 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2759 if (tree == NULL((void*)0))
2760 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2761
2762 errcode = tre_compute_nfl(mem, stack, tree);
2763 if (errcode != REG_OK0)
2764 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2765
2766 counts = xmallocmalloc(sizeof(int) * parse_ctx.position);
2767 if (counts == NULL((void*)0))
2768 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2769
2770 offs = xmallocmalloc(sizeof(int) * parse_ctx.position);
2771 if (offs == NULL((void*)0))
2772 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2773
2774 for (i = 0; i < parse_ctx.position; i++)
2775 counts[i] = 0;
2776 tre_ast_to_tnfa(tree, NULL((void*)0), counts, NULL((void*)0));
2777
2778 add = 0;
2779 for (i = 0; i < parse_ctx.position; i++)
2780 {
2781 offs[i] = add;
2782 add += counts[i] + 1;
2783 counts[i] = 0;
2784 }
2785 transitions = xcalloccalloc((unsigned)add + 1, sizeof(*transitions));
2786 if (transitions == NULL((void*)0))
2787 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2788 tnfa->transitions = transitions;
2789 tnfa->num_transitions = add;
2790
2791 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2792 if (errcode != REG_OK0)
2793 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2794
2795 tnfa->firstpos_chars = NULL((void*)0);
2796
2797 p = tree->firstpos;
2798 i = 0;
2799 while (p->position >= 0)
2800 {
2801 i++;
2802 p++;
2803 }
2804
2805 initial = xcalloccalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2806 if (initial == NULL((void*)0))
2807 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2808 tnfa->initial = initial;
2809
2810 i = 0;
2811 for (p = tree->firstpos; p->position >= 0; p++)
2812 {
2813 initial[i].state = transitions + offs[p->position];
2814 initial[i].state_id = p->position;
2815 initial[i].tags = NULL((void*)0);
2816 /* Copy the arrays p->tags, and p->params, they are allocated
2817 from a tre_mem object. */
2818 if (p->tags)
2819 {
2820 int j;
2821 for (j = 0; p->tags[j] >= 0; j++);
2822 initial[i].tags = xmallocmalloc(sizeof(*p->tags) * (j + 1));
2823 if (!initial[i].tags)
2824 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2825 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2826 }
2827 initial[i].assertions = p->assertions;
2828 i++;
2829 }
2830 initial[i].state = NULL((void*)0);
2831
2832 tnfa->num_transitions = add;
2833 tnfa->final = transitions + offs[tree->lastpos[0].position];
2834 tnfa->num_states = parse_ctx.position;
2835 tnfa->cflags = cflags;
2836
2837 tre_mem_destroy__tre_mem_destroy(mem);
2838 tre_stack_destroy(stack);
2839 xfreefree(counts);
2840 xfreefree(offs);
2841
2842 preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2843 return REG_OK0;
2844
2845 error_exit:
2846 /* Free everything that was allocated and return the error code. */
2847 tre_mem_destroy__tre_mem_destroy(mem);
2848 if (stack != NULL((void*)0))
2849 tre_stack_destroy(stack);
2850 if (counts != NULL((void*)0))
2851 xfreefree(counts);
2852 if (offs != NULL((void*)0))
2853 xfreefree(offs);
2854 preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2855 regfree(preg);
2856 return errcode;
2857}
2858
2859
2860
2861
2862void
2863regfree(regex_t *preg)
2864{
2865 tre_tnfa_t *tnfa;
2866 unsigned int i;
2867 tre_tnfa_transition_t *trans;
2868
2869 tnfa = (void *)preg->TRE_REGEX_T_FIELD__opaque;
2870 if (!tnfa)
2871 return;
2872
2873 for (i = 0; i < tnfa->num_transitions; i++)
2874 if (tnfa->transitions[i].state)
2875 {
2876 if (tnfa->transitions[i].tags)
2877 xfreefree(tnfa->transitions[i].tags);
2878 if (tnfa->transitions[i].neg_classes)
2879 xfreefree(tnfa->transitions[i].neg_classes);
2880 }
2881 if (tnfa->transitions)
2882 xfreefree(tnfa->transitions);
2883
2884 if (tnfa->initial)
2885 {
2886 for (trans = tnfa->initial; trans->state; trans++)
2887 {
2888 if (trans->tags)
2889 xfreefree(trans->tags);
2890 }
2891 xfreefree(tnfa->initial);
2892 }
2893
2894 if (tnfa->submatch_data)
2895 {
2896 for (i = 0; i < tnfa->num_submatches; i++)
2897 if (tnfa->submatch_data[i].parents)
2898 xfreefree(tnfa->submatch_data[i].parents);
2899 xfreefree(tnfa->submatch_data);
2900 }
2901
2902 if (tnfa->tag_directions)
2903 xfreefree(tnfa->tag_directions);
2904 if (tnfa->firstpos_chars)
2905 xfreefree(tnfa->firstpos_chars);
2906 if (tnfa->minimal_tags)
2907 xfreefree(tnfa->minimal_tags);
2908 xfreefree(tnfa);
2909}