Bug Summary

File:src/regex/regcomp.c
Location:line 663, column 2
Description:Value stored to 'negmax' is never read

Annotated Source Code

1/*
2 regcomp.c - TRE POSIX compatible regex compilation functions.
3
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30*/
31
32#include <string.h>
33#include <stdlib.h>
34#include <regex.h>
35#include <limits.h>
36#include <stdint.h>
37#include <ctype.h>
38
39#include "tre.h"
40
41#include <assert.h>
42
43/***********************************************************************
44 from tre-compile.h
45***********************************************************************/
46
47typedef struct {
48 int position;
49 int code_min;
50 int code_max;
51 int *tags;
52 int assertions;
53 tre_ctype_t class;
54 tre_ctype_t *neg_classes;
55 int backref;
56} tre_pos_and_tags_t;
57
58
59/***********************************************************************
60 from tre-ast.c and tre-ast.h
61***********************************************************************/
62
63/* The different AST node types. */
64typedef enum {
65 LITERAL,
66 CATENATION,
67 ITERATION,
68 UNION
69} tre_ast_type_t;
70
71/* Special subtypes of TRE_LITERAL. */
72#define EMPTY-1 -1 /* Empty leaf (denotes empty string). */
73#define ASSERTION-2 -2 /* Assertion leaf. */
74#define TAG-3 -3 /* Tag leaf. */
75#define BACKREF-4 -4 /* Back reference leaf. */
76
77#define IS_SPECIAL(x)((x)->code_min < 0) ((x)->code_min < 0)
78#define IS_EMPTY(x)((x)->code_min == -1) ((x)->code_min == EMPTY-1)
79#define IS_ASSERTION(x)((x)->code_min == -2) ((x)->code_min == ASSERTION-2)
80#define IS_TAG(x)((x)->code_min == -3) ((x)->code_min == TAG-3)
81#define IS_BACKREF(x)((x)->code_min == -4) ((x)->code_min == BACKREF-4)
82
83
84/* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
86typedef struct {
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
89 int nullable;
90 int submatch_id;
91 int num_submatches;
92 int num_tags;
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
95} tre_ast_node_t;
96
97
98/* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
100 character. */
101typedef struct {
102 long code_min;
103 long code_max;
104 int position;
105 tre_ctype_t class;
106 tre_ctype_t *neg_classes;
107} tre_literal_t;
108
109/* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
113typedef struct {
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
116} tre_catenation_t;
117
118/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
119 operators. */
120typedef struct {
121 /* Subexpression to match. */
122 tre_ast_node_t *arg;
123 /* Minimum number of consecutive matches. */
124 int min;
125 /* Maximum number of consecutive matches. */
126 int max;
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
131} tre_iteration_t;
132
133/* An "union" node. These are created for the "|" operator. */
134typedef struct {
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
137} tre_union_t;
138
139
140static tre_ast_node_t *
141tre_ast_new_node(tre_mem_t mem, int type, void *obj)
142{
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *node);
144 if (!node || !obj)
145 return 0;
146 node->obj = obj;
147 node->type = type;
148 node->nullable = -1;
149 node->submatch_id = -1;
150 return node;
151}
152
153static tre_ast_node_t *
154tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
155{
156 tre_ast_node_t *node;
157 tre_literal_t *lit;
158
159 lit = tre_mem_calloc(mem, sizeof *lit)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
161 if (!node)
162 return 0;
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
166 return node;
167}
168
169static tre_ast_node_t *
170tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
171{
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
174
175 iter = tre_mem_calloc(mem, sizeof *iter)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
177 if (!node)
178 return 0;
179 iter->arg = arg;
180 iter->min = min;
181 iter->max = max;
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
184 return node;
185}
186
187static tre_ast_node_t *
188tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
189{
190 tre_ast_node_t *node;
191 tre_union_t *un;
192
193 if (!left)
194 return right;
195 un = tre_mem_calloc(mem, sizeof *un)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
197 if (!node || !right)
198 return 0;
199 un->left = left;
200 un->right = right;
201 node->num_submatches = left->num_submatches + right->num_submatches;
202 return node;
203}
204
205static tre_ast_node_t *
206tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
207{
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
210
211 if (!left)
212 return right;
213 cat = tre_mem_calloc(mem, sizeof *cat)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
215 if (!node)
216 return 0;
217 cat->left = left;
218 cat->right = right;
219 node->num_submatches = left->num_submatches + right->num_submatches;
220 return node;
221}
222
223
224/***********************************************************************
225 from tre-stack.c and tre-stack.h
226***********************************************************************/
227
228typedef struct tre_stack_rec tre_stack_t;
229
230/* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
234static tre_stack_t *
235tre_stack_new(int size, int max_size, int increment);
236
237/* Frees the stack object. */
238static void
239tre_stack_destroy(tre_stack_t *s);
240
241/* Returns the current number of objects in the stack. */
242static int
243tre_stack_num_objects(tre_stack_t *s);
244
245/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249#define declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type
value)
\
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
251
252declare_pushf(voidptr, void *)static reg_errcode_t tre_stack_push_voidptr(tre_stack_t *s, void
* value)
;
253declare_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value
)
;
254
255/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
257 empty. */
258#define declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
260
261declare_popf(voidptr, void *)static void * tre_stack_pop_voidptr(tre_stack_t *s);
262declare_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s);
263
264/* Just to save some typing. */
265#define STACK_PUSH(s, typetag, value)do { status = tre_stack_push_typetag(s, value); } while ( 0) \
266 do \
267 { \
268 status = tre_stack_push_ ## typetag(s, value); \
269 } \
270 while (/*CONSTCOND*/0)
271
272#define STACK_PUSHX(s, typetag, value){ status = tre_stack_push_typetag(s, value); if (status != 0)
break; }
\
273 { \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK0) \
276 break; \
277 }
278
279#define STACK_PUSHR(s, typetag, value){ reg_errcode_t _status; _status = tre_stack_push_typetag(s, value
); if (_status != 0) return _status; }
\
280 { \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK0) \
284 return _status; \
285 }
286
287union tre_stack_item {
288 void *voidptr_value;
289 int int_value;
290};
291
292struct tre_stack_rec {
293 int size;
294 int max_size;
295 int increment;
296 int ptr;
297 union tre_stack_item *stack;
298};
299
300
301static tre_stack_t *
302tre_stack_new(int size, int max_size, int increment)
303{
304 tre_stack_t *s;
305
306 s = xmallocmalloc(sizeof(*s));
307 if (s != NULL((void*)0))
308 {
309 s->stack = xmallocmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL((void*)0))
311 {
312 xfreefree(s);
313 return NULL((void*)0);
314 }
315 s->size = size;
316 s->max_size = max_size;
317 s->increment = increment;
318 s->ptr = 0;
319 }
320 return s;
321}
322
323static void
324tre_stack_destroy(tre_stack_t *s)
325{
326 xfreefree(s->stack);
327 xfreefree(s);
328}
329
330static int
331tre_stack_num_objects(tre_stack_t *s)
332{
333 return s->ptr;
334}
335
336static reg_errcode_t
337tre_stack_push(tre_stack_t *s, union tre_stack_item value)
338{
339 if (s->ptr < s->size)
340 {
341 s->stack[s->ptr] = value;
342 s->ptr++;
343 }
344 else
345 {
346 if (s->size >= s->max_size)
347 {
348 return REG_ESPACE12;
349 }
350 else
351 {
352 union tre_stack_item *new_buffer;
353 int new_size;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xreallocrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL((void*)0))
359 {
360 return REG_ESPACE12;
361 }
362 assert(new_size > s->size)(void)0;
363 s->size = new_size;
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
366 }
367 }
368 return REG_OK0;
369}
370
371#define define_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type
value) { union tre_stack_item item; item.typetag_value = value
; return tre_stack_push(s, item); }
\
372 declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type
value)
{ \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
376}
377
378define_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value
) { union tre_stack_item item; item.int_value = value; return
tre_stack_push(s, item); }
379define_pushf(voidptr, void *)static reg_errcode_t tre_stack_push_voidptr(tre_stack_t *s, void
* value) { union tre_stack_item item; item.voidptr_value = value
; return tre_stack_push(s, item); }
380
381#define define_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { return s->
stack[--s->ptr].typetag_value; }
\
382 declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { \
383 return s->stack[--s->ptr].typetag ## _value; \
384 }
385
386define_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s) { return s->stack
[--s->ptr].int_value; }
387define_popf(voidptr, void *)static void * tre_stack_pop_voidptr(tre_stack_t *s) { return s
->stack[--s->ptr].voidptr_value; }
388
389
390/***********************************************************************
391 from tre-parse.c and tre-parse.h
392***********************************************************************/
393
394/* Parse context. */
395typedef struct {
396 /* Memory allocator. The AST is allocated using this. */
397 tre_mem_t mem;
398 /* Stack used for keeping track of regexp syntax. */
399 tre_stack_t *stack;
400 /* The parsed node after a parse function returns. */
401 tre_ast_node_t *n;
402 /* Position in the regexp pattern after a parse function returns. */
403 const char *s;
404 /* The first character of the regexp. */
405 const char *re;
406 /* Current submatch ID. */
407 int submatch_id;
408 /* Current position (number of literal). */
409 int position;
410 /* The highest back reference or -1 if none seen so far. */
411 int max_backref;
412 /* Compilation flags. */
413 int cflags;
414} tre_parse_ctx_t;
415
416/* Some macros for expanding \w, \s, etc. */
417static const struct {
418 char c;
419 const char *expansion;
420} tre_macros[] = {
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
425 { 0, 0 }
426};
427
428/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431static const char *tre_expand_macro(const char *s)
432{
433 int i;
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
436}
437
438static int
439tre_compare_lit(const void *a, const void *b)
440{
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
445}
446
447struct literals {
448 tre_mem_t mem;
449 tre_literal_t **a;
450 int len;
451 int cap;
452};
453
454static tre_literal_t *tre_new_lit(struct literals *p)
455{
456 tre_literal_t **a;
457 if (p->len >= p->cap) {
458 if (p->cap >= 1<<15)
459 return 0;
460 p->cap *= 2;
461 a = xreallocrealloc(p->a, p->cap * sizeof *p->a);
462 if (!a)
463 return 0;
464 p->a = a;
465 }
466 a = p->a + p->len++;
467 *a = tre_mem_calloc(p->mem, sizeof **a)__tre_mem_alloc_impl(p->mem, 0, ((void*)0), 1, sizeof **a);
468 return *a;
469}
470
471static int add_icase_literals(struct literals *ls, int min, int max)
472{
473 tre_literal_t *lit;
474 int b, e, c;
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_isloweriswlower(c)) {
480 b = e = tre_touppertowupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_touppertowupper(c) != e) break;
483 } else if (tre_isupperiswupper(c)) {
484 b = e = tre_tolowertowlower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolowertowlower(c) != e) break;
487 } else {
488 c++;
489 continue;
490 }
491 lit = tre_new_lit(ls);
492 if (!lit)
493 return -1;
494 lit->code_min = b;
495 lit->code_max = e-1;
496 lit->position = -1;
497 }
498 return 0;
499}
500
501
502/* Maximum number of character classes in a negated bracket expression. */
503#define MAX_NEG_CLASSES64 64
504
505struct neg {
506 int negate;
507 int len;
508 tre_ctype_t a[MAX_NEG_CLASSES64];
509};
510
511// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
512
513/*
514bracket grammar:
515Bracket = '[' List ']' | '[^' List ']'
516List = Term | List Term
517Term = Char | Range | Chclass | Eqclass
518Range = Char '-' Char | Char '-' '-'
519Char = Coll | coll_single
520Meta = ']' | '-'
521Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523Chclass = '[:' class ':]'
524
525coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
529*/
530
531static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
532{
533 const char *start = s;
534 tre_ctype_t class;
535 int min, max;
536 wchar_t wc;
537 int len;
538
539 for (;;) {
540 class = 0;
541 len = mbtowc(&wc, s, -1);
542 if (len <= 0)
543 return *s ? REG_BADPAT2 : REG_EBRACK7;
544 if (*s == ']' && s != start) {
545 ctx->s = s+1;
546 return REG_OK0;
547 }
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
551 return REG_ERANGE11;
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
554 return REG_ECOLLATE3;
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX14+1];
557 s += 2;
558 for (len=0; len < CHARCLASS_NAME_MAX14 && s[len]; len++) {
559 if (s[len] == ':') {
560 memcpy(tmp, s, len);
561 tmp[len] = 0;
562 class = tre_ctypewctype(tmp);
563 break;
564 }
565 }
566 if (!class || s[len+1] != ']')
567 return REG_ECTYPE4;
568 min = 0;
569 max = TRE_CHAR_MAX0x10ffff;
570 s += len+2;
571 } else {
572 min = max = wc;
573 s += len;
574 if (*s == '-' && s[1] != ']') {
575 s++;
576 len = mbtowc(&wc, s, -1);
577 max = wc;
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
581 return REG_ERANGE11;
582 s += len;
583 }
584 }
585
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES64)
588 return REG_ESPACE12;
589 neg->a[neg->len++] = class;
590 } else {
591 tre_literal_t *lit = tre_new_lit(ls);
592 if (!lit)
593 return REG_ESPACE12;
594 lit->code_min = min;
595 lit->code_max = max;
596 lit->class = class;
597 lit->position = -1;
598
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE2 && !class)
607 if (add_icase_literals(ls, min, max))
608 return REG_ESPACE12;
609 }
610 }
611}
612
613static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
614{
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
617 tre_ctype_t *nc = 0;
618 tre_literal_t *lit;
619 struct literals ls;
620 struct neg neg;
621 reg_errcode_t err;
622
623 ls.mem = ctx->mem;
624 ls.len = 0;
625 ls.cap = 32;
626 ls.a = xmallocmalloc(ls.cap * sizeof *ls.a);
627 if (!ls.a)
628 return REG_ESPACE12;
629 neg.len = 0;
630 neg.negate = *s == '^';
631 if (neg.negate)
632 s++;
633
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
635 if (err != REG_OK0)
636 goto parse_bracket_done;
637
638 if (neg.negate) {
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
643 if (!lit) {
644 err = REG_ESPACE12;
645 goto parse_bracket_done;
646 }
647 lit->code_min = TRE_CHAR_MAX0x10ffff+1;
648 lit->code_max = TRE_CHAR_MAX0x10ffff+1;
649 lit->position = -1;
650 /* negated classes */
651 if (neg.len) {
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a)__tre_mem_alloc_impl(ctx->mem, 0, ((void*)0), 0, (neg.len+
1)*sizeof *neg.a)
;
653 if (!nc) {
654 err = REG_ESPACE12;
655 goto parse_bracket_done;
656 }
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
658 nc[neg.len] = 0;
659 }
660 }
661
662 /* Build a union of the items in the array, negated if necessary. */
663 negmax = negmin = 0;
Value stored to 'negmax' is never read
664 for (i = 0; i < ls.len; i++) {
665 lit = ls.a[i];
666 min = lit->code_min;
667 max = lit->code_max;
668 if (neg.negate) {
669 if (min <= negmin) {
670 /* Overlap. */
671 negmin = MAX(max + 1, negmin)(((max + 1) >= (negmin)) ? (max + 1) : (negmin));
672 continue;
673 }
674 negmax = min - 1;
675 lit->code_min = negmin;
676 lit->code_max = negmax;
677 negmin = max + 1;
678 }
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
683 if (!node) {
684 err = REG_ESPACE12;
685 break;
686 }
687 }
688
689parse_bracket_done:
690 xfreefree(ls.a);
691 ctx->position++;
692 ctx->n = node;
693 return err;
694}
695
696static const char *parse_dup_count(const char *s, int *n)
697{
698 *n = -1;
699 if (!isdigit(*s)(0 ? isdigit(*s) : ((unsigned)(*s)-'0') < 10))
700 return s;
701 *n = 0;
702 for (;;) {
703 *n = 10 * *n + (*s - '0');
704 s++;
705 if (!isdigit(*s)(0 ? isdigit(*s) : ((unsigned)(*s)-'0') < 10) || *n > RE_DUP_MAX255)
706 break;
707 }
708 return s;
709}
710
711static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
712{
713 int min, max;
714
715 s = parse_dup_count(s, &min);
716 if (*s == ',')
717 s = parse_dup_count(s+1, &max);
718 else
719 max = min;
720
721 if (
722 (max < min && max >= 0) ||
723 max > RE_DUP_MAX255 ||
724 min > RE_DUP_MAX255 ||
725 min < 0 ||
726 (!ere && *s++ != '\\') ||
727 *s++ != '}'
728 )
729 return 0;
730 *pmin = min;
731 *pmax = max;
732 return s;
733}
734
735static int hexval(unsigned c)
736{
737 if (c-'0'<10) return c-'0';
738 c |= 32;
739 if (c-'a'<6) return c-'a'+10;
740 return -1;
741}
742
743static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
744{
745 if (node->submatch_id >= 0) {
746 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
747 if (!n)
748 return REG_ESPACE12;
749 n = tre_ast_new_catenation(ctx->mem, n, node);
750 if (!n)
751 return REG_ESPACE12;
752 n->num_submatches = node->num_submatches;
753 node = n;
754 }
755 node->submatch_id = subid;
756 node->num_submatches++;
757 ctx->n = node;
758 return REG_OK0;
759}
760
761/*
762BRE grammar:
763Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
764Branch = Atom | Branch Atom
765Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
766Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
767
768(leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
769
770ERE grammar:
771Regex = Branch | Regex '|' Branch
772Branch = Atom | Branch Atom
773Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
774Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
775
776(a*+?, ^*, $+, \X, {, (|a) are unspecified)
777*/
778
779static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
780{
781 int len, ere = ctx->cflags & REG_EXTENDED1;
782 const char *p;
783 tre_ast_node_t *node;
784 wchar_t wc;
785 switch (*s) {
786 case '[':
787 return parse_bracket(ctx, s+1);
788 case '\\':
789 p = tre_expand_macro(s+1);
790 if (p) {
791 /* assume \X expansion is a single atom */
792 reg_errcode_t err = parse_atom(ctx, p);
793 ctx->s = s+2;
794 return err;
795 }
796 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
797 switch (*++s) {
798 case 0:
799 return REG_EESCAPE5;
800 case 'b':
801 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB64, -1);
802 break;
803 case 'B':
804 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB_NEG128, -1);
805 break;
806 case '<':
807 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOW16, -1);
808 break;
809 case '>':
810 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOW32, -1);
811 break;
812 case 'x':
813 s++;
814 int i, v = 0, c;
815 len = 2;
816 if (*s == '{') {
817 len = 8;
818 s++;
819 }
820 for (i=0; i<len && v<0x110000; i++) {
821 c = hexval(s[i]);
822 if (c < 0) break;
823 v = 16*v + c;
824 }
825 s += i;
826 if (len == 8) {
827 if (*s != '}')
828 return REG_EBRACE9;
829 s++;
830 }
831 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
832 s--;
833 break;
834 case '{':
835 case '+':
836 case '?':
837 /* extension: treat \+, \? as repetitions in BRE */
838 /* reject repetitions after empty expression in BRE */
839 if (!ere)
840 return REG_BADRPT13;
841 case '|':
842 /* extension: treat \| as alternation in BRE */
843 if (!ere) {
844 node = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
845 s--;
846 goto end;
847 }
848 /* fallthrough */
849 default:
850 if (!ere && (unsigned)*s-'1' < 9) {
851 /* back reference */
852 int val = *s - '0';
853 node = tre_ast_new_literal(ctx->mem, BACKREF-4, val, ctx->position++);
854 ctx->max_backref = MAX(val, ctx->max_backref)(((val) >= (ctx->max_backref)) ? (val) : (ctx->max_backref
))
;
855 } else {
856 /* extension: accept unknown escaped char
857 as a literal */
858 goto parse_literal;
859 }
860 }
861 s++;
862 break;
863 case '.':
864 if (ctx->cflags & REG_NEWLINE4) {
865 tre_ast_node_t *tmp1, *tmp2;
866 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
867 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX0x10ffff, ctx->position++);
868 if (tmp1 && tmp2)
869 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
870 else
871 node = 0;
872 } else {
873 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX0x10ffff, ctx->position++);
874 }
875 s++;
876 break;
877 case '^':
878 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
879 if (!ere && s != ctx->re)
880 goto parse_literal;
881 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOL1, -1);
882 s++;
883 break;
884 case '$':
885 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
886 if (!ere && s[1])
887 goto parse_literal;
888 node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOL2, -1);
889 s++;
890 break;
891 case '*':
892 case '{':
893 case '+':
894 case '?':
895 /* reject repetitions after empty expression in ERE */
896 if (ere)
897 return REG_BADRPT13;
898 case '|':
899 if (!ere)
900 goto parse_literal;
901 case 0:
902 node = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
903 break;
904 default:
905parse_literal:
906 len = mbtowc(&wc, s, -1);
907 if (len < 0)
908 return REG_BADPAT2;
909 if (ctx->cflags & REG_ICASE2 && (tre_isupperiswupper(wc) || tre_isloweriswlower(wc))) {
910 tre_ast_node_t *tmp1, *tmp2;
911 /* multiple opposite case characters are not supported */
912 tmp1 = tre_ast_new_literal(ctx->mem, tre_touppertowupper(wc), tre_touppertowupper(wc), ctx->position);
913 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolowertowlower(wc), tre_tolowertowlower(wc), ctx->position);
914 if (tmp1 && tmp2)
915 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
916 else
917 node = 0;
918 } else {
919 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
920 }
921 ctx->position++;
922 s += len;
923 break;
924 }
925end:
926 if (!node)
927 return REG_ESPACE12;
928 ctx->n = node;
929 ctx->s = s;
930 return REG_OK0;
931}
932
933#define PUSHPTR(err, s, v)do { if ((err = tre_stack_push_voidptr(s, v)) != 0) return err
; } while(0)
do { \
934 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK0) \
935 return err; \
936} while(0)
937
938#define PUSHINT(err, s, v)do { if ((err = tre_stack_push_int(s, v)) != 0) return err; }
while(0)
do { \
939 if ((err = tre_stack_push_int(s, v)) != REG_OK0) \
940 return err; \
941} while(0)
942
943static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
944{
945 tre_ast_node_t *nbranch=0, *nunion=0;
946 int ere = ctx->cflags & REG_EXTENDED1;
947 const char *s = ctx->re;
948 int subid = 0;
949 int depth = 0;
950 reg_errcode_t err;
951 tre_stack_t *stack = ctx->stack;
952
953 PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return
err; } while(0)
;
954 for (;;) {
955 if ((!ere && *s == '\\' && s[1] == '(') ||
956 (ere && *s == '(')) {
957 PUSHPTR(err, stack, nunion)do { if ((err = tre_stack_push_voidptr(stack, nunion)) != 0) return
err; } while(0)
;
958 PUSHPTR(err, stack, nbranch)do { if ((err = tre_stack_push_voidptr(stack, nbranch)) != 0)
return err; } while(0)
;
959 PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return
err; } while(0)
;
960 s++;
961 if (!ere)
962 s++;
963 depth++;
964 nbranch = nunion = 0;
965 continue;
966 }
967 if ((!ere && *s == '\\' && s[1] == ')') ||
968 (ere && *s == ')' && depth)) {
969 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
970 if (!ctx->n)
971 return REG_ESPACE12;
972 } else {
973 err = parse_atom(ctx, s);
974 if (err != REG_OK0)
975 return err;
976 s = ctx->s;
977 }
978
979 parse_iter:
980 for (;;) {
981 int min, max;
982
983 if (*s!='\\' && *s!='*') {
984 if (!ere)
985 break;
986 if (*s!='+' && *s!='?' && *s!='{')
987 break;
988 }
989 if (*s=='\\' && ere)
990 break;
991 /* extension: treat \+, \? as repetitions in BRE */
992 if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
993 break;
994 if (*s=='\\')
995 s++;
996
997 /* handle ^* at the start of a complete BRE. */
998 if (!ere && s==ctx->re+1 && s[-1]=='^')
999 break;
1000
1001 /* extension: multiple consecutive *+?{,} is unspecified,
1002 but (a+)+ has to be supported so accepting a++ makes
1003 sense, note however that the RE_DUP_MAX limit can be
1004 circumvented: (a{255}){255} uses a lot of memory.. */
1005 if (*s=='{') {
1006 s = parse_dup(s+1, ere, &min, &max);
1007 if (!s)
1008 return REG_BADBR10;
1009 } else {
1010 min=0;
1011 max=-1;
1012 if (*s == '+')
1013 min = 1;
1014 if (*s == '?')
1015 max = 1;
1016 s++;
1017 }
1018 if (max == 0)
1019 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
1020 else
1021 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1022 if (!ctx->n)
1023 return REG_ESPACE12;
1024 }
1025
1026 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1027 if ((ere && *s == '|') ||
1028 (ere && *s == ')' && depth) ||
1029 (!ere && *s == '\\' && s[1] == ')') ||
1030 /* extension: treat \| as alternation in BRE */
1031 (!ere && *s == '\\' && s[1] == '|') ||
1032 !*s) {
1033 /* extension: empty branch is unspecified (), (|a), (a|)
1034 here they are not rejected but match on empty string */
1035 int c = *s;
1036 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1037 nbranch = 0;
1038
1039 if (c == '\\' && s[1] == '|') {
1040 s+=2;
1041 } else if (c == '|') {
1042 s++;
1043 } else {
1044 if (c == '\\') {
1045 if (!depth) return REG_EPAREN8;
1046 s+=2;
1047 } else if (c == ')')
1048 s++;
1049 depth--;
1050 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1051 if (err != REG_OK0)
1052 return err;
1053 if (!c && depth<0) {
1054 ctx->submatch_id = subid;
1055 return REG_OK0;
1056 }
1057 if (!c || depth<0)
1058 return REG_EPAREN8;
1059 nbranch = tre_stack_pop_voidptr(stack);
1060 nunion = tre_stack_pop_voidptr(stack);
1061 goto parse_iter;
1062 }
1063 }
1064 }
1065}
1066
1067
1068/***********************************************************************
1069 from tre-compile.c
1070***********************************************************************/
1071
1072
1073/*
1074 TODO:
1075 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1076 function calls.
1077*/
1078
1079/*
1080 Algorithms to setup tags so that submatch addressing can be done.
1081*/
1082
1083
1084/* Inserts a catenation node to the root of the tree given in `node'.
1085 As the left child a new tag with number `tag_id' to `node' is added,
1086 and the right child is the old root. */
1087static reg_errcode_t
1088tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1089{
1090 tre_catenation_t *c;
1091
1092 c = tre_mem_alloc(mem, sizeof(*c))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*c));
1093 if (c == NULL((void*)0))
1094 return REG_ESPACE12;
1095 c->left = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1096 if (c->left == NULL((void*)0))
1097 return REG_ESPACE12;
1098 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t
))
;
1099 if (c->right == NULL((void*)0))
1100 return REG_ESPACE12;
1101
1102 c->right->obj = node->obj;
1103 c->right->type = node->type;
1104 c->right->nullable = -1;
1105 c->right->submatch_id = -1;
1106 c->right->firstpos = NULL((void*)0);
1107 c->right->lastpos = NULL((void*)0);
1108 c->right->num_tags = 0;
1109 node->obj = c;
1110 node->type = CATENATION;
1111 return REG_OK0;
1112}
1113
1114/* Inserts a catenation node to the root of the tree given in `node'.
1115 As the right child a new tag with number `tag_id' to `node' is added,
1116 and the left child is the old root. */
1117static reg_errcode_t
1118tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1119{
1120 tre_catenation_t *c;
1121
1122 c = tre_mem_alloc(mem, sizeof(*c))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*c));
1123 if (c == NULL((void*)0))
1124 return REG_ESPACE12;
1125 c->right = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1126 if (c->right == NULL((void*)0))
1127 return REG_ESPACE12;
1128 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t
))
;
1129 if (c->left == NULL((void*)0))
1130 return REG_ESPACE12;
1131
1132 c->left->obj = node->obj;
1133 c->left->type = node->type;
1134 c->left->nullable = -1;
1135 c->left->submatch_id = -1;
1136 c->left->firstpos = NULL((void*)0);
1137 c->left->lastpos = NULL((void*)0);
1138 c->left->num_tags = 0;
1139 node->obj = c;
1140 node->type = CATENATION;
1141 return REG_OK0;
1142}
1143
1144typedef enum {
1145 ADDTAGS_RECURSE,
1146 ADDTAGS_AFTER_ITERATION,
1147 ADDTAGS_AFTER_UNION_LEFT,
1148 ADDTAGS_AFTER_UNION_RIGHT,
1149 ADDTAGS_AFTER_CAT_LEFT,
1150 ADDTAGS_AFTER_CAT_RIGHT,
1151 ADDTAGS_SET_SUBMATCH_END
1152} tre_addtags_symbol_t;
1153
1154
1155typedef struct {
1156 int tag;
1157 int next_tag;
1158} tre_tag_states_t;
1159
1160
1161/* Go through `regset' and set submatch data for submatches that are
1162 using this tag. */
1163static void
1164tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1165{
1166 int i;
1167
1168 for (i = 0; regset[i] >= 0; i++)
1169 {
1170 int id = regset[i] / 2;
1171 int start = !(regset[i] % 2);
1172 if (start)
1173 tnfa->submatch_data[id].so_tag = tag;
1174 else
1175 tnfa->submatch_data[id].eo_tag = tag;
1176 }
1177 regset[0] = -1;
1178}
1179
1180
1181/* Adds tags to appropriate locations in the parse tree in `tree', so that
1182 subexpressions marked for submatch addressing can be traced. */
1183static reg_errcode_t
1184tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1185 tre_tnfa_t *tnfa)
1186{
1187 reg_errcode_t status = REG_OK0;
1188 tre_addtags_symbol_t symbol;
1189 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1190 int bottom = tre_stack_num_objects(stack);
1191 /* True for first pass (counting number of needed tags) */
1192 int first_pass = (mem == NULL((void*)0) || tnfa == NULL((void*)0));
1193 int *regset, *orig_regset;
1194 int num_tags = 0; /* Total number of tags. */
1195 int num_minimals = 0; /* Number of special minimal tags. */
1196 int tag = 0; /* The tag that is to be added next. */
1197 int next_tag = 1; /* Next tag to use after this one. */
1198 int *parents; /* Stack of submatches the current submatch is
1199 contained in. */
1200 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1201 tre_tag_states_t *saved_states;
1202
1203 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1204 if (!first_pass)
1205 {
1206 tnfa->end_tag = 0;
1207 tnfa->minimal_tags[0] = -1;
1208 }
1209
1210 regset = xmallocmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1211 if (regset == NULL((void*)0))
1212 return REG_ESPACE12;
1213 regset[0] = -1;
1214 orig_regset = regset;
1215
1216 parents = xmallocmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1217 if (parents == NULL((void*)0))
1218 {
1219 xfreefree(regset);
1220 return REG_ESPACE12;
1221 }
1222 parents[0] = -1;
1223
1224 saved_states = xmallocmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1225 if (saved_states == NULL((void*)0))
1226 {
1227 xfreefree(regset);
1228 xfreefree(parents);
1229 return REG_ESPACE12;
1230 }
1231 else
1232 {
1233 unsigned int i;
1234 for (i = 0; i <= tnfa->num_submatches; i++)
1235 saved_states[i].tag = -1;
1236 }
1237
1238 STACK_PUSH(stack, voidptr, node)do { status = tre_stack_push_voidptr(stack, node); } while ( 0
)
;
1239 STACK_PUSH(stack, int, ADDTAGS_RECURSE)do { status = tre_stack_push_int(stack, ADDTAGS_RECURSE); } while
( 0)
;
1240
1241 while (tre_stack_num_objects(stack) > bottom)
1242 {
1243 if (status != REG_OK0)
1244 break;
1245
1246 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1247 switch (symbol)
1248 {
1249
1250 case ADDTAGS_SET_SUBMATCH_END:
1251 {
1252 int id = tre_stack_pop_int(stack);
1253 int i;
1254
1255 /* Add end of this submatch to regset. */
1256 for (i = 0; regset[i] >= 0; i++);
1257 regset[i] = id * 2 + 1;
1258 regset[i + 1] = -1;
1259
1260 /* Pop this submatch from the parents stack. */
1261 for (i = 0; parents[i] >= 0; i++);
1262 parents[i - 1] = -1;
1263 break;
1264 }
1265
1266 case ADDTAGS_RECURSE:
1267 node = tre_stack_pop_voidptr(stack);
1268
1269 if (node->submatch_id >= 0)
1270 {
1271 int id = node->submatch_id;
1272 int i;
1273
1274
1275 /* Add start of this submatch to regset. */
1276 for (i = 0; regset[i] >= 0; i++);
1277 regset[i] = id * 2;
1278 regset[i + 1] = -1;
1279
1280 if (!first_pass)
1281 {
1282 for (i = 0; parents[i] >= 0; i++);
1283 tnfa->submatch_data[id].parents = NULL((void*)0);
1284 if (i > 0)
1285 {
1286 int *p = xmallocmalloc(sizeof(*p) * (i + 1));
1287 if (p == NULL((void*)0))
1288 {
1289 status = REG_ESPACE12;
1290 break;
1291 }
1292 assert(tnfa->submatch_data[id].parents == NULL)(void)0;
1293 tnfa->submatch_data[id].parents = p;
1294 for (i = 0; parents[i] >= 0; i++)
1295 p[i] = parents[i];
1296 p[i] = -1;
1297 }
1298 }
1299
1300 /* Add end of this submatch to regset after processing this
1301 node. */
1302 STACK_PUSHX(stack, int, node->submatch_id){ status = tre_stack_push_int(stack, node->submatch_id); if
(status != 0) break; }
;
1303 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END){ status = tre_stack_push_int(stack, ADDTAGS_SET_SUBMATCH_END
); if (status != 0) break; }
;
1304 }
1305
1306 switch (node->type)
1307 {
1308 case LITERAL:
1309 {
1310 tre_literal_t *lit = node->obj;
1311
1312 if (!IS_SPECIAL(lit)((lit)->code_min < 0) || IS_BACKREF(lit)((lit)->code_min == -4))
1313 {
1314 int i;
1315 if (regset[0] >= 0)
1316 {
1317 /* Regset is not empty, so add a tag before the
1318 literal or backref. */
1319 if (!first_pass)
1320 {
1321 status = tre_add_tag_left(mem, node, tag);
1322 tnfa->tag_directions[tag] = direction;
1323 if (minimal_tag >= 0)
1324 {
1325 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1326 tnfa->minimal_tags[i] = tag;
1327 tnfa->minimal_tags[i + 1] = minimal_tag;
1328 tnfa->minimal_tags[i + 2] = -1;
1329 minimal_tag = -1;
1330 num_minimals++;
1331 }
1332 tre_purge_regset(regset, tnfa, tag);
1333 }
1334 else
1335 {
1336 node->num_tags = 1;
1337 }
1338
1339 regset[0] = -1;
1340 tag = next_tag;
1341 num_tags++;
1342 next_tag++;
1343 }
1344 }
1345 else
1346 {
1347 assert(!IS_TAG(lit))(void)0;
1348 }
1349 break;
1350 }
1351 case CATENATION:
1352 {
1353 tre_catenation_t *cat = node->obj;
1354 tre_ast_node_t *left = cat->left;
1355 tre_ast_node_t *right = cat->right;
1356 int reserved_tag = -1;
1357
1358
1359 /* After processing right child. */
1360 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1361 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_RIGHT)
; if (status != 0) break; }
;
1362
1363 /* Process right child. */
1364 STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status !=
0) break; }
;
1365 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1366
1367 /* After processing left child. */
1368 STACK_PUSHX(stack, int, next_tag + left->num_tags){ status = tre_stack_push_int(stack, next_tag + left->num_tags
); if (status != 0) break; }
;
1369 if (left->num_tags > 0 && right->num_tags > 0)
1370 {
1371 /* Reserve the next tag to the right child. */
1372 reserved_tag = next_tag;
1373 next_tag++;
1374 }
1375 STACK_PUSHX(stack, int, reserved_tag){ status = tre_stack_push_int(stack, reserved_tag); if (status
!= 0) break; }
;
1376 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_LEFT);
if (status != 0) break; }
;
1377
1378 /* Process left child. */
1379 STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status !=
0) break; }
;
1380 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1381
1382 }
1383 break;
1384 case ITERATION:
1385 {
1386 tre_iteration_t *iter = node->obj;
1387
1388 if (first_pass)
1389 {
1390 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal){ status = tre_stack_push_int(stack, regset[0] >= 0 || iter
->minimal); if (status != 0) break; }
;
1391 }
1392 else
1393 {
1394 STACK_PUSHX(stack, int, tag){ status = tre_stack_push_int(stack, tag); if (status != 0) break
; }
;
1395 STACK_PUSHX(stack, int, iter->minimal){ status = tre_stack_push_int(stack, iter->minimal); if (status
!= 0) break; }
;
1396 }
1397 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1398 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_ITERATION)
; if (status != 0) break; }
;
1399
1400 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
1401 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1402
1403 /* Regset is not empty, so add a tag here. */
1404 if (regset[0] >= 0 || iter->minimal)
1405 {
1406 if (!first_pass)
1407 {
1408 int i;
1409 status = tre_add_tag_left(mem, node, tag);
1410 if (iter->minimal)
1411 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1412 else
1413 tnfa->tag_directions[tag] = direction;
1414 if (minimal_tag >= 0)
1415 {
1416 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1417 tnfa->minimal_tags[i] = tag;
1418 tnfa->minimal_tags[i + 1] = minimal_tag;
1419 tnfa->minimal_tags[i + 2] = -1;
1420 minimal_tag = -1;
1421 num_minimals++;
1422 }
1423 tre_purge_regset(regset, tnfa, tag);
1424 }
1425
1426 regset[0] = -1;
1427 tag = next_tag;
1428 num_tags++;
1429 next_tag++;
1430 }
1431 direction = TRE_TAG_MINIMIZE;
1432 }
1433 break;
1434 case UNION:
1435 {
1436 tre_union_t *uni = node->obj;
1437 tre_ast_node_t *left = uni->left;
1438 tre_ast_node_t *right = uni->right;
1439 int left_tag;
1440 int right_tag;
1441
1442 if (regset[0] >= 0)
1443 {
1444 left_tag = next_tag;
1445 right_tag = next_tag + 1;
1446 }
1447 else
1448 {
1449 left_tag = tag;
1450 right_tag = next_tag;
1451 }
1452
1453 /* After processing right child. */
1454 STACK_PUSHX(stack, int, right_tag){ status = tre_stack_push_int(stack, right_tag); if (status !=
0) break; }
;
1455 STACK_PUSHX(stack, int, left_tag){ status = tre_stack_push_int(stack, left_tag); if (status !=
0) break; }
;
1456 STACK_PUSHX(stack, voidptr, regset){ status = tre_stack_push_voidptr(stack, regset); if (status !=
0) break; }
;
1457 STACK_PUSHX(stack, int, regset[0] >= 0){ status = tre_stack_push_int(stack, regset[0] >= 0); if (
status != 0) break; }
;
1458 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1459 STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status !=
0) break; }
;
1460 STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status !=
0) break; }
;
1461 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_RIGHT
); if (status != 0) break; }
;
1462
1463 /* Process right child. */
1464 STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status !=
0) break; }
;
1465 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1466
1467 /* After processing left child. */
1468 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_LEFT
); if (status != 0) break; }
;
1469
1470 /* Process left child. */
1471 STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status !=
0) break; }
;
1472 STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status
!= 0) break; }
;
1473
1474 /* Regset is not empty, so add a tag here. */
1475 if (regset[0] >= 0)
1476 {
1477 if (!first_pass)
1478 {
1479 int i;
1480 status = tre_add_tag_left(mem, node, tag);
1481 tnfa->tag_directions[tag] = direction;
1482 if (minimal_tag >= 0)
1483 {
1484 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1485 tnfa->minimal_tags[i] = tag;
1486 tnfa->minimal_tags[i + 1] = minimal_tag;
1487 tnfa->minimal_tags[i + 2] = -1;
1488 minimal_tag = -1;
1489 num_minimals++;
1490 }
1491 tre_purge_regset(regset, tnfa, tag);
1492 }
1493
1494 regset[0] = -1;
1495 tag = next_tag;
1496 num_tags++;
1497 next_tag++;
1498 }
1499
1500 if (node->num_submatches > 0)
1501 {
1502 /* The next two tags are reserved for markers. */
1503 next_tag++;
1504 tag = next_tag;
1505 next_tag++;
1506 }
1507
1508 break;
1509 }
1510 }
1511
1512 if (node->submatch_id >= 0)
1513 {
1514 int i;
1515 /* Push this submatch on the parents stack. */
1516 for (i = 0; parents[i] >= 0; i++);
1517 parents[i] = node->submatch_id;
1518 parents[i + 1] = -1;
1519 }
1520
1521 break; /* end case: ADDTAGS_RECURSE */
1522
1523 case ADDTAGS_AFTER_ITERATION:
1524 {
1525 int minimal = 0;
1526 int enter_tag;
1527 node = tre_stack_pop_voidptr(stack);
1528 if (first_pass)
1529 {
1530 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1531 + tre_stack_pop_int(stack);
1532 minimal_tag = -1;
1533 }
1534 else
1535 {
1536 minimal = tre_stack_pop_int(stack);
1537 enter_tag = tre_stack_pop_int(stack);
1538 if (minimal)
1539 minimal_tag = enter_tag;
1540 }
1541
1542 if (!first_pass)
1543 {
1544 if (minimal)
1545 direction = TRE_TAG_MINIMIZE;
1546 else
1547 direction = TRE_TAG_MAXIMIZE;
1548 }
1549 break;
1550 }
1551
1552 case ADDTAGS_AFTER_CAT_LEFT:
1553 {
1554 int new_tag = tre_stack_pop_int(stack);
1555 next_tag = tre_stack_pop_int(stack);
1556 if (new_tag >= 0)
1557 {
1558 tag = new_tag;
1559 }
1560 break;
1561 }
1562
1563 case ADDTAGS_AFTER_CAT_RIGHT:
1564 node = tre_stack_pop_voidptr(stack);
1565 if (first_pass)
1566 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1567 + ((tre_catenation_t *)node->obj)->right->num_tags;
1568 break;
1569
1570 case ADDTAGS_AFTER_UNION_LEFT:
1571 /* Lift the bottom of the `regset' array so that when processing
1572 the right operand the items currently in the array are
1573 invisible. The original bottom was saved at ADDTAGS_UNION and
1574 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1575 while (*regset >= 0)
1576 regset++;
1577 break;
1578
1579 case ADDTAGS_AFTER_UNION_RIGHT:
1580 {
1581 int added_tags, tag_left, tag_right;
1582 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1583 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1584 node = tre_stack_pop_voidptr(stack);
1585 added_tags = tre_stack_pop_int(stack);
1586 if (first_pass)
1587 {
1588 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1589 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1590 + ((node->num_submatches > 0) ? 2 : 0);
1591 }
1592 regset = tre_stack_pop_voidptr(stack);
1593 tag_left = tre_stack_pop_int(stack);
1594 tag_right = tre_stack_pop_int(stack);
1595
1596 /* Add tags after both children, the left child gets a smaller
1597 tag than the right child. This guarantees that we prefer
1598 the left child over the right child. */
1599 /* XXX - This is not always necessary (if the children have
1600 tags which must be seen for every match of that child). */
1601 /* XXX - Check if this is the only place where tre_add_tag_right
1602 is used. If so, use tre_add_tag_left (putting the tag before
1603 the child as opposed after the child) and throw away
1604 tre_add_tag_right. */
1605 if (node->num_submatches > 0)
1606 {
1607 if (!first_pass)
1608 {
1609 status = tre_add_tag_right(mem, left, tag_left);
1610 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1611 if (status == REG_OK0)
1612 status = tre_add_tag_right(mem, right, tag_right);
1613 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1614 }
1615 num_tags += 2;
1616 }
1617 direction = TRE_TAG_MAXIMIZE;
1618 break;
1619 }
1620
1621 default:
1622 assert(0)(void)0;
1623 break;
1624
1625 } /* end switch(symbol) */
1626 } /* end while(tre_stack_num_objects(stack) > bottom) */
1627
1628 if (!first_pass)
1629 tre_purge_regset(regset, tnfa, tag);
1630
1631 if (!first_pass && minimal_tag >= 0)
1632 {
1633 int i;
1634 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1635 tnfa->minimal_tags[i] = tag;
1636 tnfa->minimal_tags[i + 1] = minimal_tag;
1637 tnfa->minimal_tags[i + 2] = -1;
1638 minimal_tag = -1;
1639 num_minimals++;
1640 }
1641
1642 assert(tree->num_tags == num_tags)(void)0;
1643 tnfa->end_tag = num_tags;
1644 tnfa->num_tags = num_tags;
1645 tnfa->num_minimals = num_minimals;
1646 xfreefree(orig_regset);
1647 xfreefree(parents);
1648 xfreefree(saved_states);
1649 return status;
1650}
1651
1652
1653
1654/*
1655 AST to TNFA compilation routines.
1656*/
1657
1658typedef enum {
1659 COPY_RECURSE,
1660 COPY_SET_RESULT_PTR
1661} tre_copyast_symbol_t;
1662
1663/* Flags for tre_copy_ast(). */
1664#define COPY_REMOVE_TAGS1 1
1665#define COPY_MAXIMIZE_FIRST_TAG2 2
1666
1667static reg_errcode_t
1668tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1669 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1670 tre_ast_node_t **copy, int *max_pos)
1671{
1672 reg_errcode_t status = REG_OK0;
1673 int bottom = tre_stack_num_objects(stack);
1674 int num_copied = 0;
1675 int first_tag = 1;
1676 tre_ast_node_t **result = copy;
1677 tre_copyast_symbol_t symbol;
1678
1679 STACK_PUSH(stack, voidptr, ast)do { status = tre_stack_push_voidptr(stack, ast); } while ( 0
)
;
1680 STACK_PUSH(stack, int, COPY_RECURSE)do { status = tre_stack_push_int(stack, COPY_RECURSE); } while
( 0)
;
1681
1682 while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1683 {
1684 tre_ast_node_t *node;
1685 if (status != REG_OK0)
1686 break;
1687
1688 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1689 switch (symbol)
1690 {
1691 case COPY_SET_RESULT_PTR:
1692 result = tre_stack_pop_voidptr(stack);
1693 break;
1694 case COPY_RECURSE:
1695 node = tre_stack_pop_voidptr(stack);
1696 switch (node->type)
1697 {
1698 case LITERAL:
1699 {
1700 tre_literal_t *lit = node->obj;
1701 int pos = lit->position;
1702 int min = lit->code_min;
1703 int max = lit->code_max;
1704 if (!IS_SPECIAL(lit)((lit)->code_min < 0) || IS_BACKREF(lit)((lit)->code_min == -4))
1705 {
1706 /* XXX - e.g. [ab] has only one position but two
1707 nodes, so we are creating holes in the state space
1708 here. Not fatal, just wastes memory. */
1709 pos += *pos_add;
1710 num_copied++;
1711 }
1712 else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_REMOVE_TAGS1))
1713 {
1714 /* Change this tag to empty. */
1715 min = EMPTY-1;
1716 max = pos = -1;
1717 }
1718 else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_MAXIMIZE_FIRST_TAG2)
1719 && first_tag)
1720 {
1721 /* Maximize the first tag. */
1722 tag_directions[max] = TRE_TAG_MAXIMIZE;
1723 first_tag = 0;
1724 }
1725 *result = tre_ast_new_literal(mem, min, max, pos);
1726 if (*result == NULL((void*)0))
1727 status = REG_ESPACE12;
1728 else {
1729 tre_literal_t *p = (*result)->obj;
1730 p->class = lit->class;
1731 p->neg_classes = lit->neg_classes;
1732 }
1733
1734 if (pos > *max_pos)
1735 *max_pos = pos;
1736 break;
1737 }
1738 case UNION:
1739 {
1740 tre_union_t *uni = node->obj;
1741 tre_union_t *tmp;
1742 *result = tre_ast_new_union(mem, uni->left, uni->right);
1743 if (*result == NULL((void*)0))
1744 {
1745 status = REG_ESPACE12;
1746 break;
1747 }
1748 tmp = (*result)->obj;
1749 result = &tmp->left;
1750 STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if (
status != 0) break; }
;
1751 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1752 STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right);
if (status != 0) break; }
;
1753 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if
(status != 0) break; }
;
1754 STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status
!= 0) break; }
;
1755 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1756 break;
1757 }
1758 case CATENATION:
1759 {
1760 tre_catenation_t *cat = node->obj;
1761 tre_catenation_t *tmp;
1762 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1763 if (*result == NULL((void*)0))
1764 {
1765 status = REG_ESPACE12;
1766 break;
1767 }
1768 tmp = (*result)->obj;
1769 tmp->left = NULL((void*)0);
1770 tmp->right = NULL((void*)0);
1771 result = &tmp->left;
1772
1773 STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if (
status != 0) break; }
;
1774 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1775 STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right);
if (status != 0) break; }
;
1776 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if
(status != 0) break; }
;
1777 STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status
!= 0) break; }
;
1778 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1779 break;
1780 }
1781 case ITERATION:
1782 {
1783 tre_iteration_t *iter = node->obj;
1784 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
1785 STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status
!= 0) break; }
;
1786 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1787 iter->max, iter->minimal);
1788 if (*result == NULL((void*)0))
1789 {
1790 status = REG_ESPACE12;
1791 break;
1792 }
1793 iter = (*result)->obj;
1794 result = &iter->arg;
1795 break;
1796 }
1797 default:
1798 assert(0)(void)0;
1799 break;
1800 }
1801 break;
1802 }
1803 }
1804 *pos_add += num_copied;
1805 return status;
1806}
1807
1808typedef enum {
1809 EXPAND_RECURSE,
1810 EXPAND_AFTER_ITER
1811} tre_expand_ast_symbol_t;
1812
1813/* Expands each iteration node that has a finite nonzero minimum or maximum
1814 iteration count to a catenated sequence of copies of the node. */
1815static reg_errcode_t
1816tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1817 int *position, tre_tag_direction_t *tag_directions)
1818{
1819 reg_errcode_t status = REG_OK0;
1820 int bottom = tre_stack_num_objects(stack);
1821 int pos_add = 0;
1822 int pos_add_total = 0;
1823 int max_pos = 0;
1824 int iter_depth = 0;
1825
1826 STACK_PUSHR(stack, voidptr, ast){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ast); if (_status != 0) return _status; }
;
1827 STACK_PUSHR(stack, int, EXPAND_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, EXPAND_RECURSE
); if (_status != 0) return _status; }
;
1828 while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1829 {
1830 tre_ast_node_t *node;
1831 tre_expand_ast_symbol_t symbol;
1832
1833 if (status != REG_OK0)
1834 break;
1835
1836 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1837 node = tre_stack_pop_voidptr(stack);
1838 switch (symbol)
1839 {
1840 case EXPAND_RECURSE:
1841 switch (node->type)
1842 {
1843 case LITERAL:
1844 {
1845 tre_literal_t *lit= node->obj;
1846 if (!IS_SPECIAL(lit)((lit)->code_min < 0) || IS_BACKREF(lit)((lit)->code_min == -4))
1847 {
1848 lit->position += pos_add;
1849 if (lit->position > max_pos)
1850 max_pos = lit->position;
1851 }
1852 break;
1853 }
1854 case UNION:
1855 {
1856 tre_union_t *uni = node->obj;
1857 STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if (
status != 0) break; }
;
1858 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1859 STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status
!= 0) break; }
;
1860 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1861 break;
1862 }
1863 case CATENATION:
1864 {
1865 tre_catenation_t *cat = node->obj;
1866 STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if (
status != 0) break; }
;
1867 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1868 STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status
!= 0) break; }
;
1869 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1870 break;
1871 }
1872 case ITERATION:
1873 {
1874 tre_iteration_t *iter = node->obj;
1875 STACK_PUSHX(stack, int, pos_add){ status = tre_stack_push_int(stack, pos_add); if (status != 0
) break; }
;
1876 STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status !=
0) break; }
;
1877 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER){ status = tre_stack_push_int(stack, EXPAND_AFTER_ITER); if (
status != 0) break; }
;
1878 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
1879 STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status
!= 0) break; }
;
1880 /* If we are going to expand this node at EXPAND_AFTER_ITER
1881 then don't increase the `pos' fields of the nodes now, it
1882 will get done when expanding. */
1883 if (iter->min > 1 || iter->max > 1)
1884 pos_add = 0;
1885 iter_depth++;
1886 break;
1887 }
1888 default:
1889 assert(0)(void)0;
1890 break;
1891 }
1892 break;
1893 case EXPAND_AFTER_ITER:
1894 {
1895 tre_iteration_t *iter = node->obj;
1896 int pos_add_last;
1897 pos_add = tre_stack_pop_int(stack);
1898 pos_add_last = pos_add;
1899 if (iter->min > 1 || iter->max > 1)
1900 {
1901 tre_ast_node_t *seq1 = NULL((void*)0), *seq2 = NULL((void*)0);
1902 int j;
1903 int pos_add_save = pos_add;
1904
1905 /* Create a catenated sequence of copies of the node. */
1906 for (j = 0; j < iter->min; j++)
1907 {
1908 tre_ast_node_t *copy;
1909 /* Remove tags from all but the last copy. */
1910 int flags = ((j + 1 < iter->min)
1911 ? COPY_REMOVE_TAGS1
1912 : COPY_MAXIMIZE_FIRST_TAG2);
1913 pos_add_save = pos_add;
1914 status = tre_copy_ast(mem, stack, iter->arg, flags,
1915 &pos_add, tag_directions, &copy,
1916 &max_pos);
1917 if (status != REG_OK0)
1918 return status;
1919 if (seq1 != NULL((void*)0))
1920 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1921 else
1922 seq1 = copy;
1923 if (seq1 == NULL((void*)0))
1924 return REG_ESPACE12;
1925 }
1926
1927 if (iter->max == -1)
1928 {
1929 /* No upper limit. */
1930 pos_add_save = pos_add;
1931 status = tre_copy_ast(mem, stack, iter->arg, 0,
1932 &pos_add, NULL((void*)0), &seq2, &max_pos);
1933 if (status != REG_OK0)
1934 return status;
1935 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1936 if (seq2 == NULL((void*)0))
1937 return REG_ESPACE12;
1938 }
1939 else
1940 {
1941 for (j = iter->min; j < iter->max; j++)
1942 {
1943 tre_ast_node_t *tmp, *copy;
1944 pos_add_save = pos_add;
1945 status = tre_copy_ast(mem, stack, iter->arg, 0,
1946 &pos_add, NULL((void*)0), &copy, &max_pos);
1947 if (status != REG_OK0)
1948 return status;
1949 if (seq2 != NULL((void*)0))
1950 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1951 else
1952 seq2 = copy;
1953 if (seq2 == NULL((void*)0))
1954 return REG_ESPACE12;
1955 tmp = tre_ast_new_literal(mem, EMPTY-1, -1, -1);
1956 if (tmp == NULL((void*)0))
1957 return REG_ESPACE12;
1958 seq2 = tre_ast_new_union(mem, tmp, seq2);
1959 if (seq2 == NULL((void*)0))
1960 return REG_ESPACE12;
1961 }
1962 }
1963
1964 pos_add = pos_add_save;
1965 if (seq1 == NULL((void*)0))
1966 seq1 = seq2;
1967 else if (seq2 != NULL((void*)0))
1968 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1969 if (seq1 == NULL((void*)0))
1970 return REG_ESPACE12;
1971 node->obj = seq1->obj;
1972 node->type = seq1->type;
1973 }
1974
1975 iter_depth--;
1976 pos_add_total += pos_add - pos_add_last;
1977 if (iter_depth == 0)
1978 pos_add = pos_add_total;
1979
1980 break;
1981 }
1982 default:
1983 assert(0)(void)0;
1984 break;
1985 }
1986 }
1987
1988 *position += pos_add_total;
1989
1990 /* `max_pos' should never be larger than `*position' if the above
1991 code works, but just an extra safeguard let's make sure
1992 `*position' is set large enough so enough memory will be
1993 allocated for the transition table. */
1994 if (max_pos > *position)
1995 *position = max_pos;
1996
1997 return status;
1998}
1999
2000static tre_pos_and_tags_t *
2001tre_set_empty(tre_mem_t mem)
2002{
2003 tre_pos_and_tags_t *new_set;
2004
2005 new_set = tre_mem_calloc(mem, sizeof(*new_set))__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof(*new_set));
2006 if (new_set == NULL((void*)0))
2007 return NULL((void*)0);
2008
2009 new_set[0].position = -1;
2010 new_set[0].code_min = -1;
2011 new_set[0].code_max = -1;
2012
2013 return new_set;
2014}
2015
2016static tre_pos_and_tags_t *
2017tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2018 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2019{
2020 tre_pos_and_tags_t *new_set;
2021
2022 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2)__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof(*new_set) *
2)
;
2023 if (new_set == NULL((void*)0))
2024 return NULL((void*)0);
2025
2026 new_set[0].position = position;
2027 new_set[0].code_min = code_min;
2028 new_set[0].code_max = code_max;
2029 new_set[0].class = class;
2030 new_set[0].neg_classes = neg_classes;
2031 new_set[0].backref = backref;
2032 new_set[1].position = -1;
2033 new_set[1].code_min = -1;
2034 new_set[1].code_max = -1;
2035
2036 return new_set;
2037}
2038
2039static tre_pos_and_tags_t *
2040tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2041 int *tags, int assertions)
2042{
2043 int s1, s2, i, j;
2044 tre_pos_and_tags_t *new_set;
2045 int *new_tags;
2046 int num_tags;
2047
2048 for (num_tags = 0; tags != NULL((void*)0) && tags[num_tags] >= 0; num_tags++);
2049 for (s1 = 0; set1[s1].position >= 0; s1++);
2050 for (s2 = 0; set2[s2].position >= 0; s2++);
2051 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1))__tre_mem_alloc_impl(mem, 0, ((void*)0), 1, sizeof(*new_set) *
(s1 + s2 + 1))
;
2052 if (!new_set )
2053 return NULL((void*)0);
2054
2055 for (s1 = 0; set1[s1].position >= 0; s1++)
2056 {
2057 new_set[s1].position = set1[s1].position;
2058 new_set[s1].code_min = set1[s1].code_min;
2059 new_set[s1].code_max = set1[s1].code_max;
2060 new_set[s1].assertions = set1[s1].assertions | assertions;
2061 new_set[s1].class = set1[s1].class;
2062 new_set[s1].neg_classes = set1[s1].neg_classes;
2063 new_set[s1].backref = set1[s1].backref;
2064 if (set1[s1].tags == NULL((void*)0) && tags == NULL((void*)0))
2065 new_set[s1].tags = NULL((void*)0);
2066 else
2067 {
2068 for (i = 0; set1[s1].tags != NULL((void*)0) && set1[s1].tags[i] >= 0; i++);
2069 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, (sizeof(*new_tags
) * (i + num_tags + 1)))
2070 * (i + num_tags + 1)))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, (sizeof(*new_tags
) * (i + num_tags + 1)))
;
2071 if (new_tags == NULL((void*)0))
2072 return NULL((void*)0);
2073 for (j = 0; j < i; j++)
2074 new_tags[j] = set1[s1].tags[j];
2075 for (i = 0; i < num_tags; i++)
2076 new_tags[j + i] = tags[i];
2077 new_tags[j + i] = -1;
2078 new_set[s1].tags = new_tags;
2079 }
2080 }
2081
2082 for (s2 = 0; set2[s2].position >= 0; s2++)
2083 {
2084 new_set[s1 + s2].position = set2[s2].position;
2085 new_set[s1 + s2].code_min = set2[s2].code_min;
2086 new_set[s1 + s2].code_max = set2[s2].code_max;
2087 /* XXX - why not | assertions here as well? */
2088 new_set[s1 + s2].assertions = set2[s2].assertions;
2089 new_set[s1 + s2].class = set2[s2].class;
2090 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2091 new_set[s1 + s2].backref = set2[s2].backref;
2092 if (set2[s2].tags == NULL((void*)0))
2093 new_set[s1 + s2].tags = NULL((void*)0);
2094 else
2095 {
2096 for (i = 0; set2[s2].tags[i] >= 0; i++);
2097 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*new_tags)
* (i + 1))
;
2098 if (new_tags == NULL((void*)0))
2099 return NULL((void*)0);
2100 for (j = 0; j < i; j++)
2101 new_tags[j] = set2[s2].tags[j];
2102 new_tags[j] = -1;
2103 new_set[s1 + s2].tags = new_tags;
2104 }
2105 }
2106 new_set[s1 + s2].position = -1;
2107 return new_set;
2108}
2109
2110/* Finds the empty path through `node' which is the one that should be
2111 taken according to POSIX.2 rules, and adds the tags on that path to
2112 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2113 set to the number of tags seen on the path. */
2114static reg_errcode_t
2115tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2116 int *assertions, int *num_tags_seen)
2117{
2118 tre_literal_t *lit;
2119 tre_union_t *uni;
2120 tre_catenation_t *cat;
2121 tre_iteration_t *iter;
2122 int i;
2123 int bottom = tre_stack_num_objects(stack);
2124 reg_errcode_t status = REG_OK0;
2125 if (num_tags_seen)
2126 *num_tags_seen = 0;
2127
2128 status = tre_stack_push_voidptr(stack, node);
2129
2130 /* Walk through the tree recursively. */
2131 while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
2132 {
2133 node = tre_stack_pop_voidptr(stack);
2134
2135 switch (node->type)
2136 {
2137 case LITERAL:
2138 lit = (tre_literal_t *)node->obj;
2139 switch (lit->code_min)
2140 {
2141 case TAG-3:
2142 if (lit->code_max >= 0)
2143 {
2144 if (tags != NULL((void*)0))
2145 {
2146 /* Add the tag to `tags'. */
2147 for (i = 0; tags[i] >= 0; i++)
2148 if (tags[i] == lit->code_max)
2149 break;
2150 if (tags[i] < 0)
2151 {
2152 tags[i] = lit->code_max;
2153 tags[i + 1] = -1;
2154 }
2155 }
2156 if (num_tags_seen)
2157 (*num_tags_seen)++;
2158 }
2159 break;
2160 case ASSERTION-2:
2161 assert(lit->code_max >= 1(void)0
2162 || lit->code_max <= ASSERT_LAST)(void)0;
2163 if (assertions != NULL((void*)0))
2164 *assertions |= lit->code_max;
2165 break;
2166 case EMPTY-1:
2167 break;
2168 default:
2169 assert(0)(void)0;
2170 break;
2171 }
2172 break;
2173
2174 case UNION:
2175 /* Subexpressions starting earlier take priority over ones
2176 starting later, so we prefer the left subexpression over the
2177 right subexpression. */
2178 uni = (tre_union_t *)node->obj;
2179 if (uni->left->nullable)
2180 STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status
!= 0) break; }
2181 else if (uni->right->nullable)
2182 STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if (
status != 0) break; }
2183 else
2184 assert(0)(void)0;
2185 break;
2186
2187 case CATENATION:
2188 /* The path must go through both children. */
2189 cat = (tre_catenation_t *)node->obj;
2190 assert(cat->left->nullable)(void)0;
2191 assert(cat->right->nullable)(void)0;
2192 STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status
!= 0) break; }
;
2193 STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if (
status != 0) break; }
;
2194 break;
2195
2196 case ITERATION:
2197 /* A match with an empty string is preferred over no match at
2198 all, so we go through the argument if possible. */
2199 iter = (tre_iteration_t *)node->obj;
2200 if (iter->arg->nullable)
2201 STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status
!= 0) break; }
;
2202 break;
2203
2204 default:
2205 assert(0)(void)0;
2206 break;
2207 }
2208 }
2209
2210 return status;
2211}
2212
2213
2214typedef enum {
2215 NFL_RECURSE,
2216 NFL_POST_UNION,
2217 NFL_POST_CATENATION,
2218 NFL_POST_ITERATION
2219} tre_nfl_stack_symbol_t;
2220
2221
2222/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2223 the nodes of the AST `tree'. */
2224static reg_errcode_t
2225tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2226{
2227 int bottom = tre_stack_num_objects(stack);
2228
2229 STACK_PUSHR(stack, voidptr, tree){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, tree); if (_status != 0) return _status; }
;
2230 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2231
2232 while (tre_stack_num_objects(stack) > bottom)
2233 {
2234 tre_nfl_stack_symbol_t symbol;
2235 tre_ast_node_t *node;
2236
2237 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2238 node = tre_stack_pop_voidptr(stack);
2239 switch (symbol)
2240 {
2241 case NFL_RECURSE:
2242 switch (node->type)
2243 {
2244 case LITERAL:
2245 {
2246 tre_literal_t *lit = (tre_literal_t *)node->obj;
2247 if (IS_BACKREF(lit)((lit)->code_min == -4))
2248 {
2249 /* Back references: nullable = false, firstpos = {i},
2250 lastpos = {i}. */
2251 node->nullable = 0;
2252 node->firstpos = tre_set_one(mem, lit->position, 0,
2253 TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0), -1);
2254 if (!node->firstpos)
2255 return REG_ESPACE12;
2256 node->lastpos = tre_set_one(mem, lit->position, 0,
2257 TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0),
2258 (int)lit->code_max);
2259 if (!node->lastpos)
2260 return REG_ESPACE12;
2261 }
2262 else if (lit->code_min < 0)
2263 {
2264 /* Tags, empty strings, params, and zero width assertions:
2265 nullable = true, firstpos = {}, and lastpos = {}. */
2266 node->nullable = 1;
2267 node->firstpos = tre_set_empty(mem);
2268 if (!node->firstpos)
2269 return REG_ESPACE12;
2270 node->lastpos = tre_set_empty(mem);
2271 if (!node->lastpos)
2272 return REG_ESPACE12;
2273 }
2274 else
2275 {
2276 /* Literal at position i: nullable = false, firstpos = {i},
2277 lastpos = {i}. */
2278 node->nullable = 0;
2279 node->firstpos =
2280 tre_set_one(mem, lit->position, (int)lit->code_min,
2281 (int)lit->code_max, 0, NULL((void*)0), -1);
2282 if (!node->firstpos)
2283 return REG_ESPACE12;
2284 node->lastpos = tre_set_one(mem, lit->position,
2285 (int)lit->code_min,
2286 (int)lit->code_max,
2287 lit->class, lit->neg_classes,
2288 -1);
2289 if (!node->lastpos)
2290 return REG_ESPACE12;
2291 }
2292 break;
2293 }
2294
2295 case UNION:
2296 /* Compute the attributes for the two subtrees, and after that
2297 for this node. */
2298 STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, node); if (_status != 0) return _status; }
;
2299 STACK_PUSHR(stack, int, NFL_POST_UNION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_UNION
); if (_status != 0) return _status; }
;
2300 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_union_t *)node->obj)->right); if (_status != 0)
return _status; }
;
2301 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2302 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_union_t *)node->obj)->left); if (_status != 0) return
_status; }
;
2303 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2304 break;
2305
2306 case CATENATION:
2307 /* Compute the attributes for the two subtrees, and after that
2308 for this node. */
2309 STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, node); if (_status != 0) return _status; }
;
2310 STACK_PUSHR(stack, int, NFL_POST_CATENATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_CATENATION
); if (_status != 0) return _status; }
;
2311 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_catenation_t *)node->obj)->right); if (_status !=
0) return _status; }
;
2312 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2313 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_catenation_t *)node->obj)->left); if (_status !=
0) return _status; }
;
2314 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2315 break;
2316
2317 case ITERATION:
2318 /* Compute the attributes for the subtree, and after that for
2319 this node. */
2320 STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, node); if (_status != 0) return _status; }
;
2321 STACK_PUSHR(stack, int, NFL_POST_ITERATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_ITERATION
); if (_status != 0) return _status; }
;
2322 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack
, ((tre_iteration_t *)node->obj)->arg); if (_status != 0
) return _status; }
;
2323 STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE
); if (_status != 0) return _status; }
;
2324 break;
2325 }
2326 break; /* end case: NFL_RECURSE */
2327
2328 case NFL_POST_UNION:
2329 {
2330 tre_union_t *uni = (tre_union_t *)node->obj;
2331 node->nullable = uni->left->nullable || uni->right->nullable;
2332 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2333 uni->right->firstpos, NULL((void*)0), 0);
2334 if (!node->firstpos)
2335 return REG_ESPACE12;
2336 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2337 uni->right->lastpos, NULL((void*)0), 0);
2338 if (!node->lastpos)
2339 return REG_ESPACE12;
2340 break;
2341 }
2342
2343 case NFL_POST_ITERATION:
2344 {
2345 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2346
2347 if (iter->min == 0 || iter->arg->nullable)
2348 node->nullable = 1;
2349 else
2350 node->nullable = 0;
2351 node->firstpos = iter->arg->firstpos;
2352 node->lastpos = iter->arg->lastpos;
2353 break;
2354 }
2355
2356 case NFL_POST_CATENATION:
2357 {
2358 int num_tags, *tags, assertions;
2359 reg_errcode_t status;
2360 tre_catenation_t *cat = node->obj;
2361 node->nullable = cat->left->nullable && cat->right->nullable;
2362
2363 /* Compute firstpos. */
2364 if (cat->left->nullable)
2365 {
2366 /* The left side matches the empty string. Make a first pass
2367 with tre_match_empty() to get the number of tags and
2368 parameters. */
2369 status = tre_match_empty(stack, cat->left,
2370 NULL((void*)0), NULL((void*)0), &num_tags);
2371 if (status != REG_OK0)
2372 return status;
2373 /* Allocate arrays for the tags and parameters. */
2374 tags = xmallocmalloc(sizeof(*tags) * (num_tags + 1));
2375 if (!tags)
2376 return REG_ESPACE12;
2377 tags[0] = -1;
2378 assertions = 0;
2379 /* Second pass with tre_mach_empty() to get the list of
2380 tags and parameters. */
2381 status = tre_match_empty(stack, cat->left, tags,
2382 &assertions, NULL((void*)0));
2383 if (status != REG_OK0)
2384 {
2385 xfreefree(tags);
2386 return status;
2387 }
2388 node->firstpos =
2389 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2390 tags, assertions);
2391 xfreefree(tags);
2392 if (!node->firstpos)
2393 return REG_ESPACE12;
2394 }
2395 else
2396 {
2397 node->firstpos = cat->left->firstpos;
2398 }
2399
2400 /* Compute lastpos. */
2401 if (cat->right->nullable)
2402 {
2403 /* The right side matches the empty string. Make a first pass
2404 with tre_match_empty() to get the number of tags and
2405 parameters. */
2406 status = tre_match_empty(stack, cat->right,
2407 NULL((void*)0), NULL((void*)0), &num_tags);
2408 if (status != REG_OK0)
2409 return status;
2410 /* Allocate arrays for the tags and parameters. */
2411 tags = xmallocmalloc(sizeof(int) * (num_tags + 1));
2412 if (!tags)
2413 return REG_ESPACE12;
2414 tags[0] = -1;
2415 assertions = 0;
2416 /* Second pass with tre_mach_empty() to get the list of
2417 tags and parameters. */
2418 status = tre_match_empty(stack, cat->right, tags,
2419 &assertions, NULL((void*)0));
2420 if (status != REG_OK0)
2421 {
2422 xfreefree(tags);
2423 return status;
2424 }
2425 node->lastpos =
2426 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2427 tags, assertions);
2428 xfreefree(tags);
2429 if (!node->lastpos)
2430 return REG_ESPACE12;
2431 }
2432 else
2433 {
2434 node->lastpos = cat->right->lastpos;
2435 }
2436 break;
2437 }
2438
2439 default:
2440 assert(0)(void)0;
2441 break;
2442 }
2443 }
2444
2445 return REG_OK0;
2446}
2447
2448
2449/* Adds a transition from each position in `p1' to each position in `p2'. */
2450static reg_errcode_t
2451tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2452 tre_tnfa_transition_t *transitions,
2453 int *counts, int *offs)
2454{
2455 tre_pos_and_tags_t *orig_p2 = p2;
2456 tre_tnfa_transition_t *trans;
2457 int i, j, k, l, dup, prev_p2_pos;
2458
2459 if (transitions != NULL((void*)0))
2460 while (p1->position >= 0)
2461 {
2462 p2 = orig_p2;
2463 prev_p2_pos = -1;
2464 while (p2->position >= 0)
2465 {
2466 /* Optimization: if this position was already handled, skip it. */
2467 if (p2->position == prev_p2_pos)
2468 {
2469 p2++;
2470 continue;
2471 }
2472 prev_p2_pos = p2->position;
2473 /* Set `trans' to point to the next unused transition from
2474 position `p1->position'. */
2475 trans = transitions + offs[p1->position];
2476 while (trans->state != NULL((void*)0))
2477 {
2478#if 0
2479 /* If we find a previous transition from `p1->position' to
2480 `p2->position', it is overwritten. This can happen only
2481 if there are nested loops in the regexp, like in "((a)*)*".
2482 In POSIX.2 repetition using the outer loop is always
2483 preferred over using the inner loop. Therefore the
2484 transition for the inner loop is useless and can be thrown
2485 away. */
2486 /* XXX - The same position is used for all nodes in a bracket
2487 expression, so this optimization cannot be used (it will
2488 break bracket expressions) unless I figure out a way to
2489 detect it here. */
2490 if (trans->state_id == p2->position)
2491 {
2492 break;
2493 }
2494#endif
2495 trans++;
2496 }
2497
2498 if (trans->state == NULL((void*)0))
2499 (trans + 1)->state = NULL((void*)0);
2500 /* Use the character ranges, assertions, etc. from `p1' for
2501 the transition from `p1' to `p2'. */
2502 trans->code_min = p1->code_min;
2503 trans->code_max = p1->code_max;
2504 trans->state = transitions + offs[p2->position];
2505 trans->state_id = p2->position;
2506 trans->assertions = p1->assertions | p2->assertions
2507 | (p1->class ? ASSERT_CHAR_CLASS4 : 0)
2508 | (p1->neg_classes != NULL((void*)0) ? ASSERT_CHAR_CLASS_NEG8 : 0);
2509 if (p1->backref >= 0)
2510 {
2511 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0)(void)0;
2512 assert(p2->backref < 0)(void)0;
2513 trans->u.backref = p1->backref;
2514 trans->assertions |= ASSERT_BACKREF256;
2515 }
2516 else
2517 trans->u.class = p1->class;
2518 if (p1->neg_classes != NULL((void*)0))
2519 {
2520 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2521 trans->neg_classes =
2522 xmallocmalloc(sizeof(*trans->neg_classes) * (i + 1));
2523 if (trans->neg_classes == NULL((void*)0))
2524 return REG_ESPACE12;
2525 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2526 trans->neg_classes[i] = p1->neg_classes[i];
2527 trans->neg_classes[i] = (tre_ctype_t)0;
2528 }
2529 else
2530 trans->neg_classes = NULL((void*)0);
2531
2532 /* Find out how many tags this transition has. */
2533 i = 0;
2534 if (p1->tags != NULL((void*)0))
2535 while(p1->tags[i] >= 0)
2536 i++;
2537 j = 0;
2538 if (p2->tags != NULL((void*)0))
2539 while(p2->tags[j] >= 0)
2540 j++;
2541
2542 /* If we are overwriting a transition, free the old tag array. */
2543 if (trans->tags != NULL((void*)0))
2544 xfreefree(trans->tags);
2545 trans->tags = NULL((void*)0);
2546
2547 /* If there were any tags, allocate an array and fill it. */
2548 if (i + j > 0)
2549 {
2550 trans->tags = xmallocmalloc(sizeof(*trans->tags) * (i + j + 1));
2551 if (!trans->tags)
2552 return REG_ESPACE12;
2553 i = 0;
2554 if (p1->tags != NULL((void*)0))
2555 while(p1->tags[i] >= 0)
2556 {
2557 trans->tags[i] = p1->tags[i];
2558 i++;
2559 }
2560 l = i;
2561 j = 0;
2562 if (p2->tags != NULL((void*)0))
2563 while (p2->tags[j] >= 0)
2564 {
2565 /* Don't add duplicates. */
2566 dup = 0;
2567 for (k = 0; k < i; k++)
2568 if (trans->tags[k] == p2->tags[j])
2569 {
2570 dup = 1;
2571 break;
2572 }
2573 if (!dup)
2574 trans->tags[l++] = p2->tags[j];
2575 j++;
2576 }
2577 trans->tags[l] = -1;
2578 }
2579
2580 p2++;
2581 }
2582 p1++;
2583 }
2584 else
2585 /* Compute a maximum limit for the number of transitions leaving
2586 from each state. */
2587 while (p1->position >= 0)
2588 {
2589 p2 = orig_p2;
2590 while (p2->position >= 0)
2591 {
2592 counts[p1->position]++;
2593 p2++;
2594 }
2595 p1++;
2596 }
2597 return REG_OK0;
2598}
2599
2600/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2601 labelled with one character range (there are no transitions on empty
2602 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2603 the regexp. */
2604static reg_errcode_t
2605tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2606 int *counts, int *offs)
2607{
2608 tre_union_t *uni;
2609 tre_catenation_t *cat;
2610 tre_iteration_t *iter;
2611 reg_errcode_t errcode = REG_OK0;
2612
2613 /* XXX - recurse using a stack!. */
2614 switch (node->type)
2615 {
2616 case LITERAL:
2617 break;
2618 case UNION:
2619 uni = (tre_union_t *)node->obj;
2620 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2621 if (errcode != REG_OK0)
2622 return errcode;
2623 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2624 break;
2625
2626 case CATENATION:
2627 cat = (tre_catenation_t *)node->obj;
2628 /* Add a transition from each position in cat->left->lastpos
2629 to each position in cat->right->firstpos. */
2630 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2631 transitions, counts, offs);
2632 if (errcode != REG_OK0)
2633 return errcode;
2634 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2635 if (errcode != REG_OK0)
2636 return errcode;
2637 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2638 break;
2639
2640 case ITERATION:
2641 iter = (tre_iteration_t *)node->obj;
2642 assert(iter->max == -1 || iter->max == 1)(void)0;
2643
2644 if (iter->max == -1)
2645 {
2646 assert(iter->min == 0 || iter->min == 1)(void)0;
2647 /* Add a transition from each last position in the iterated
2648 expression to each first position. */
2649 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2650 transitions, counts, offs);
2651 if (errcode != REG_OK0)
2652 return errcode;
2653 }
2654 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2655 break;
2656 }
2657 return errcode;
2658}
2659
2660
2661#define ERROR_EXIT(err)do { errcode = err; if ( 1) goto error_exit; } while ( 0) \
2662 do \
2663 { \
2664 errcode = err; \
2665 if (/*CONSTCOND*/1) \
2666 goto error_exit; \
2667 } \
2668 while (/*CONSTCOND*/0)
2669
2670
2671int
2672regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2673{
2674 tre_stack_t *stack;
2675 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2676 tre_pos_and_tags_t *p;
2677 int *counts = NULL((void*)0), *offs = NULL((void*)0);
2678 int i, add = 0;
2679 tre_tnfa_transition_t *transitions, *initial;
2680 tre_tnfa_t *tnfa = NULL((void*)0);
2681 tre_submatch_data_t *submatch_data;
2682 tre_tag_direction_t *tag_directions = NULL((void*)0);
2683 reg_errcode_t errcode;
2684 tre_mem_t mem;
2685
2686 /* Parse context. */
2687 tre_parse_ctx_t parse_ctx;
2688
2689 /* Allocate a stack used throughout the compilation process for various
2690 purposes. */
2691 stack = tre_stack_new(512, 1024000, 128);
2692 if (!stack)
2693 return REG_ESPACE12;
2694 /* Allocate a fast memory allocator. */
2695 mem = tre_mem_new()__tre_mem_new_impl(0, ((void*)0));
2696 if (!mem)
2697 {
2698 tre_stack_destroy(stack);
2699 return REG_ESPACE12;
2700 }
2701
2702 /* Parse the regexp. */
2703 memset(&parse_ctx, 0, sizeof(parse_ctx));
2704 parse_ctx.mem = mem;
2705 parse_ctx.stack = stack;
2706 parse_ctx.re = regex;
2707 parse_ctx.cflags = cflags;
2708 parse_ctx.max_backref = -1;
2709 errcode = tre_parse(&parse_ctx);
2710 if (errcode != REG_OK0)
2711 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2712 preg->re_nsub = parse_ctx.submatch_id - 1;
2713 tree = parse_ctx.n;
2714
2715#ifdef TRE_DEBUG
2716 tre_ast_print(tree);
2717#endif /* TRE_DEBUG */
2718
2719 /* Referring to nonexistent subexpressions is illegal. */
2720 if (parse_ctx.max_backref > (int)preg->re_nsub)
2721 ERROR_EXIT(REG_ESUBREG)do { errcode = 6; if ( 1) goto error_exit; } while ( 0);
2722
2723 /* Allocate the TNFA struct. */
2724 tnfa = xcalloccalloc(1, sizeof(tre_tnfa_t));
2725 if (tnfa == NULL((void*)0))
2726 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2727 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2728 tnfa->have_approx = 0;
2729 tnfa->num_submatches = parse_ctx.submatch_id;
2730
2731 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2732 regexp does not have back references, this can be skipped. */
2733 if (tnfa->have_backrefs || !(cflags & REG_NOSUB8))
2734 {
2735
2736 /* Figure out how many tags we will need. */
2737 errcode = tre_add_tags(NULL((void*)0), stack, tree, tnfa);
2738 if (errcode != REG_OK0)
2739 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2740
2741 if (tnfa->num_tags > 0)
2742 {
2743 tag_directions = xmallocmalloc(sizeof(*tag_directions)
2744 * (tnfa->num_tags + 1));
2745 if (tag_directions == NULL((void*)0))
2746 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2747 tnfa->tag_directions = tag_directions;
2748 memset(tag_directions, -1,
2749 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2750 }
2751 tnfa->minimal_tags = xcalloccalloc((unsigned)tnfa->num_tags * 2 + 1,
2752 sizeof(*tnfa->minimal_tags));
2753 if (tnfa->minimal_tags == NULL((void*)0))
2754 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2755
2756 submatch_data = xcalloccalloc((unsigned)parse_ctx.submatch_id,
2757 sizeof(*submatch_data));
2758 if (submatch_data == NULL((void*)0))
2759 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2760 tnfa->submatch_data = submatch_data;
2761
2762 errcode = tre_add_tags(mem, stack, tree, tnfa);
2763 if (errcode != REG_OK0)
2764 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2765
2766 }
2767
2768 /* Expand iteration nodes. */
2769 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2770 tag_directions);
2771 if (errcode != REG_OK0)
2772 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2773
2774 /* Add a dummy node for the final state.
2775 XXX - For certain patterns this dummy node can be optimized away,
2776 for example "a*" or "ab*". Figure out a simple way to detect
2777 this possibility. */
2778 tmp_ast_l = tree;
2779 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2780 if (tmp_ast_r == NULL((void*)0))
2781 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2782
2783 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2784 if (tree == NULL((void*)0))
2785 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2786
2787 errcode = tre_compute_nfl(mem, stack, tree);
2788 if (errcode != REG_OK0)
2789 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2790
2791 counts = xmallocmalloc(sizeof(int) * parse_ctx.position);
2792 if (counts == NULL((void*)0))
2793 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2794
2795 offs = xmallocmalloc(sizeof(int) * parse_ctx.position);
2796 if (offs == NULL((void*)0))
2797 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2798
2799 for (i = 0; i < parse_ctx.position; i++)
2800 counts[i] = 0;
2801 tre_ast_to_tnfa(tree, NULL((void*)0), counts, NULL((void*)0));
2802
2803 add = 0;
2804 for (i = 0; i < parse_ctx.position; i++)
2805 {
2806 offs[i] = add;
2807 add += counts[i] + 1;
2808 counts[i] = 0;
2809 }
2810 transitions = xcalloccalloc((unsigned)add + 1, sizeof(*transitions));
2811 if (transitions == NULL((void*)0))
2812 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2813 tnfa->transitions = transitions;
2814 tnfa->num_transitions = add;
2815
2816 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2817 if (errcode != REG_OK0)
2818 ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2819
2820 tnfa->firstpos_chars = NULL((void*)0);
2821
2822 p = tree->firstpos;
2823 i = 0;
2824 while (p->position >= 0)
2825 {
2826 i++;
2827 p++;
2828 }
2829
2830 initial = xcalloccalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2831 if (initial == NULL((void*)0))
2832 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2833 tnfa->initial = initial;
2834
2835 i = 0;
2836 for (p = tree->firstpos; p->position >= 0; p++)
2837 {
2838 initial[i].state = transitions + offs[p->position];
2839 initial[i].state_id = p->position;
2840 initial[i].tags = NULL((void*)0);
2841 /* Copy the arrays p->tags, and p->params, they are allocated
2842 from a tre_mem object. */
2843 if (p->tags)
2844 {
2845 int j;
2846 for (j = 0; p->tags[j] >= 0; j++);
2847 initial[i].tags = xmallocmalloc(sizeof(*p->tags) * (j + 1));
2848 if (!initial[i].tags)
2849 ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2850 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2851 }
2852 initial[i].assertions = p->assertions;
2853 i++;
2854 }
2855 initial[i].state = NULL((void*)0);
2856
2857 tnfa->num_transitions = add;
2858 tnfa->final = transitions + offs[tree->lastpos[0].position];
2859 tnfa->num_states = parse_ctx.position;
2860 tnfa->cflags = cflags;
2861
2862 tre_mem_destroy__tre_mem_destroy(mem);
2863 tre_stack_destroy(stack);
2864 xfreefree(counts);
2865 xfreefree(offs);
2866
2867 preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2868 return REG_OK0;
2869
2870 error_exit:
2871 /* Free everything that was allocated and return the error code. */
2872 tre_mem_destroy__tre_mem_destroy(mem);
2873 if (stack != NULL((void*)0))
2874 tre_stack_destroy(stack);
2875 if (counts != NULL((void*)0))
2876 xfreefree(counts);
2877 if (offs != NULL((void*)0))
2878 xfreefree(offs);
2879 preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2880 regfree(preg);
2881 return errcode;
2882}
2883
2884
2885
2886
2887void
2888regfree(regex_t *preg)
2889{
2890 tre_tnfa_t *tnfa;
2891 unsigned int i;
2892 tre_tnfa_transition_t *trans;
2893
2894 tnfa = (void *)preg->TRE_REGEX_T_FIELD__opaque;
2895 if (!tnfa)
2896 return;
2897
2898 for (i = 0; i < tnfa->num_transitions; i++)
2899 if (tnfa->transitions[i].state)
2900 {
2901 if (tnfa->transitions[i].tags)
2902 xfreefree(tnfa->transitions[i].tags);
2903 if (tnfa->transitions[i].neg_classes)
2904 xfreefree(tnfa->transitions[i].neg_classes);
2905 }
2906 if (tnfa->transitions)
2907 xfreefree(tnfa->transitions);
2908
2909 if (tnfa->initial)
2910 {
2911 for (trans = tnfa->initial; trans->state; trans++)
2912 {
2913 if (trans->tags)
2914 xfreefree(trans->tags);
2915 }
2916 xfreefree(tnfa->initial);
2917 }
2918
2919 if (tnfa->submatch_data)
2920 {
2921 for (i = 0; i < tnfa->num_submatches; i++)
2922 if (tnfa->submatch_data[i].parents)
2923 xfreefree(tnfa->submatch_data[i].parents);
2924 xfreefree(tnfa->submatch_data);
2925 }
2926
2927 if (tnfa->tag_directions)
2928 xfreefree(tnfa->tag_directions);
2929 if (tnfa->firstpos_chars)
2930 xfreefree(tnfa->firstpos_chars);
2931 if (tnfa->minimal_tags)
2932 xfreefree(tnfa->minimal_tags);
2933 xfreefree(tnfa);
2934}