/a/builder/home/kraj/work/oe/musl/src/regex/regcomp.c

Bug Summary

File:	src/regex/regcomp.c
Location:	line 663, column 2
Description:	Value stored to 'negmax' is never read

Annotated Source Code

1	/*
2	regcomp.c - TRE POSIX compatible regex compilation functions.
3
4	Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5	All rights reserved.
6
7	Redistribution and use in source and binary forms, with or without
8	modification, are permitted provided that the following conditions
9	are met:
10
11	1. Redistributions of source code must retain the above copyright
12	notice, this list of conditions and the following disclaimer.
13
14	2. Redistributions in binary form must reproduce the above copyright
15	notice, this list of conditions and the following disclaimer in the
16	documentation and/or other materials provided with the distribution.
17
18	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	*/
31
32	#include <string.h>
33	#include <stdlib.h>
34	#include <regex.h>
35	#include <limits.h>
36	#include <stdint.h>
37	#include <ctype.h>
38
39	#include "tre.h"
40
41	#include <assert.h>
42
43	/***********************************************************************
44	from tre-compile.h
45	***********************************************************************/
46
47	typedef struct {
48	int position;
49	int code_min;
50	int code_max;
51	int *tags;
52	int assertions;
53	tre_ctype_t class;
54	tre_ctype_t *neg_classes;
55	int backref;
56	} tre_pos_and_tags_t;
57
58
59	/***********************************************************************
60	from tre-ast.c and tre-ast.h
61	***********************************************************************/
62
63	/* The different AST node types. */
64	typedef enum {
65	LITERAL,
66	CATENATION,
67	ITERATION,
68	UNION
69	} tre_ast_type_t;
70
71	/* Special subtypes of TRE_LITERAL. */
72	#define EMPTY-1 -1 /* Empty leaf (denotes empty string). */
73	#define ASSERTION-2 -2 /* Assertion leaf. */
74	#define TAG-3 -3 /* Tag leaf. */
75	#define BACKREF-4 -4 /* Back reference leaf. */
76
77	#define IS_SPECIAL(x)((x)->code_min < 0) ((x)->code_min < 0)
78	#define IS_EMPTY(x)((x)->code_min == -1) ((x)->code_min == EMPTY-1)
79	#define IS_ASSERTION(x)((x)->code_min == -2) ((x)->code_min == ASSERTION-2)
80	#define IS_TAG(x)((x)->code_min == -3) ((x)->code_min == TAG-3)
81	#define IS_BACKREF(x)((x)->code_min == -4) ((x)->code_min == BACKREF-4)
82
83
84	/* A generic AST node. All AST nodes consist of this node on the top
85	level with `obj' pointing to the actual content. */
86	typedef struct {
87	tre_ast_type_t type; /* Type of the node. */
88	void obj; / Pointer to actual node. */
89	int nullable;
90	int submatch_id;
91	int num_submatches;
92	int num_tags;
93	tre_pos_and_tags_t *firstpos;
94	tre_pos_and_tags_t *lastpos;
95	} tre_ast_node_t;
96
97
98	/* A "literal" node. These are created for assertions, back references,
99	tags, matching parameter settings, and all expressions that match one
100	character. */
101	typedef struct {
102	long code_min;
103	long code_max;
104	int position;
105	tre_ctype_t class;
106	tre_ctype_t *neg_classes;
107	} tre_literal_t;
108
109	/* A "catenation" node. These are created when two regexps are concatenated.
110	If there are more than one subexpressions in sequence, the `left' part
111	holds all but the last, and `right' part holds the last subexpression
112	(catenation is left associative). */
113	typedef struct {
114	tre_ast_node_t *left;
115	tre_ast_node_t *right;
116	} tre_catenation_t;
117
118	/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
119	operators. */
120	typedef struct {
121	/* Subexpression to match. */
122	tre_ast_node_t *arg;
123	/* Minimum number of consecutive matches. */
124	int min;
125	/* Maximum number of consecutive matches. */
126	int max;
127	/* If 0, match as many characters as possible, if 1 match as few as
128	possible. Note that this does not always mean the same thing as
129	matching as many/few repetitions as possible. */
130	unsigned int minimal:1;
131	} tre_iteration_t;
132
133	/* An "union" node. These are created for the "\|" operator. */
134	typedef struct {
135	tre_ast_node_t *left;
136	tre_ast_node_t *right;
137	} tre_union_t;
138
139
140	static tre_ast_node_t *
141	tre_ast_new_node(tre_mem_t mem, int type, void *obj)
142	{
143	tre_ast_node_t node = tre_mem_calloc(mem, sizeof node)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof node);
144	if (!node \|\| !obj)
145	return 0;
146	node->obj = obj;
147	node->type = type;
148	node->nullable = -1;
149	node->submatch_id = -1;
150	return node;
151	}
152
153	static tre_ast_node_t *
154	tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
155	{
156	tre_ast_node_t *node;
157	tre_literal_t *lit;
158
159	lit = tre_mem_calloc(mem, sizeof lit)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *lit);
160	node = tre_ast_new_node(mem, LITERAL, lit);
161	if (!node)
162	return 0;
163	lit->code_min = code_min;
164	lit->code_max = code_max;
165	lit->position = position;
166	return node;
167	}
168
169	static tre_ast_node_t *
170	tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
171	{
172	tre_ast_node_t *node;
173	tre_iteration_t *iter;
174
175	iter = tre_mem_calloc(mem, sizeof iter)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *iter);
176	node = tre_ast_new_node(mem, ITERATION, iter);
177	if (!node)
178	return 0;
179	iter->arg = arg;
180	iter->min = min;
181	iter->max = max;
182	iter->minimal = minimal;
183	node->num_submatches = arg->num_submatches;
184	return node;
185	}
186
187	static tre_ast_node_t *
188	tre_ast_new_union(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t right)
189	{
190	tre_ast_node_t *node;
191	tre_union_t *un;
192
193	if (!left)
194	return right;
195	un = tre_mem_calloc(mem, sizeof un)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *un);
196	node = tre_ast_new_node(mem, UNION, un);
197	if (!node \|\| !right)
198	return 0;
199	un->left = left;
200	un->right = right;
201	node->num_submatches = left->num_submatches + right->num_submatches;
202	return node;
203	}
204
205	static tre_ast_node_t *
206	tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t right)
207	{
208	tre_ast_node_t *node;
209	tre_catenation_t *cat;
210
211	if (!left)
212	return right;
213	cat = tre_mem_calloc(mem, sizeof cat)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *cat);
214	node = tre_ast_new_node(mem, CATENATION, cat);
215	if (!node)
216	return 0;
217	cat->left = left;
218	cat->right = right;
219	node->num_submatches = left->num_submatches + right->num_submatches;
220	return node;
221	}
222
223
224	/***********************************************************************
225	from tre-stack.c and tre-stack.h
226	***********************************************************************/
227
228	typedef struct tre_stack_rec tre_stack_t;
229
230	/* Creates a new stack object. `size' is initial size in bytes, `max_size'
231	is maximum size, and `increment' specifies how much more space will be
232	allocated with realloc() if all space gets used up. Returns the stack
233	object or NULL if out of memory. */
234	static tre_stack_t *
235	tre_stack_new(int size, int max_size, int increment);
236
237	/* Frees the stack object. */
238	static void
239	tre_stack_destroy(tre_stack_t *s);
240
241	/* Returns the current number of objects in the stack. */
242	static int
243	tre_stack_num_objects(tre_stack_t *s);
244
245	/* Each tre_stack_push_(tre_stack_t s, <type> value) function pushes
246	`value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247	This tries to realloc() more space before failing if maximum size
248	has not yet been reached. Returns REG_OK if successful. */
249	#define declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type value) \
250	static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
251
252	declare_pushf(voidptr, void )static reg_errcode_t tre_stack_push_voidptr(tre_stack_t s, void * value);
253	declare_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value );
254
255	/* Each tre_stack_pop_(tre_stack_t s) function pops the topmost
256	element off of stack `s' and returns it. The stack must not be
257	empty. */
258	#define declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) \
259	static type tre_stack_pop_ ## typetag(tre_stack_t *s)
260
261	declare_popf(voidptr, void )static void tre_stack_pop_voidptr(tre_stack_t *s);
262	declare_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s);
263
264	/* Just to save some typing. */
265	#define STACK_PUSH(s, typetag, value)do { status = tre_stack_push_typetag(s, value); } while ( 0) \
266	do \
267	{ \
268	status = tre_stack_push_ ## typetag(s, value); \
269	} \
270	while (/CONSTCOND/0)
271
272	#define STACK_PUSHX(s, typetag, value){ status = tre_stack_push_typetag(s, value); if (status != 0) break; } \
273	{ \
274	status = tre_stack_push_ ## typetag(s, value); \
275	if (status != REG_OK0) \
276	break; \
277	}
278
279	#define STACK_PUSHR(s, typetag, value){ reg_errcode_t _status; _status = tre_stack_push_typetag(s, value ); if (_status != 0) return _status; } \
280	{ \
281	reg_errcode_t _status; \
282	_status = tre_stack_push_ ## typetag(s, value); \
283	if (_status != REG_OK0) \
284	return _status; \
285	}
286
287	union tre_stack_item {
288	void *voidptr_value;
289	int int_value;
290	};
291
292	struct tre_stack_rec {
293	int size;
294	int max_size;
295	int increment;
296	int ptr;
297	union tre_stack_item *stack;
298	};
299
300
301	static tre_stack_t *
302	tre_stack_new(int size, int max_size, int increment)
303	{
304	tre_stack_t *s;
305
306	s = xmallocmalloc(sizeof(*s));
307	if (s != NULL((void*)0))
308	{
309	s->stack = xmallocmalloc(sizeof(s->stack) size);
310	if (s->stack == NULL((void*)0))
311	{
312	xfreefree(s);
313	return NULL((void*)0);
314	}
315	s->size = size;
316	s->max_size = max_size;
317	s->increment = increment;
318	s->ptr = 0;
319	}
320	return s;
321	}
322
323	static void
324	tre_stack_destroy(tre_stack_t *s)
325	{
326	xfreefree(s->stack);
327	xfreefree(s);
328	}
329
330	static int
331	tre_stack_num_objects(tre_stack_t *s)
332	{
333	return s->ptr;
334	}
335
336	static reg_errcode_t
337	tre_stack_push(tre_stack_t *s, union tre_stack_item value)
338	{
339	if (s->ptr < s->size)
340	{
341	s->stack[s->ptr] = value;
342	s->ptr++;
343	}
344	else
345	{
346	if (s->size >= s->max_size)
347	{
348	return REG_ESPACE12;
349	}
350	else
351	{
352	union tre_stack_item *new_buffer;
353	int new_size;
354	new_size = s->size + s->increment;
355	if (new_size > s->max_size)
356	new_size = s->max_size;
357	new_buffer = xreallocrealloc(s->stack, sizeof(new_buffer) new_size);
358	if (new_buffer == NULL((void*)0))
359	{
360	return REG_ESPACE12;
361	}
362	assert(new_size > s->size)(void)0;
363	s->size = new_size;
364	s->stack = new_buffer;
365	tre_stack_push(s, value);
366	}
367	}
368	return REG_OK0;
369	}
370
371	#define define_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type value) { union tre_stack_item item; item.typetag_value = value ; return tre_stack_push(s, item); } \
372	declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type value) { \
373	union tre_stack_item item; \
374	item.typetag ## _value = value; \
375	return tre_stack_push(s, item); \
376	}
377
378	define_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value ) { union tre_stack_item item; item.int_value = value; return tre_stack_push(s, item); }
379	define_pushf(voidptr, void )static reg_errcode_t tre_stack_push_voidptr(tre_stack_t s, void * value) { union tre_stack_item item; item.voidptr_value = value ; return tre_stack_push(s, item); }
380
381	#define define_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { return s-> stack[--s->ptr].typetag_value; } \
382	declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { \
383	return s->stack[--s->ptr].typetag ## _value; \
384	}
385
386	define_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s) { return s->stack [--s->ptr].int_value; }
387	define_popf(voidptr, void )static void tre_stack_pop_voidptr(tre_stack_t *s) { return s ->stack[--s->ptr].voidptr_value; }
388
389
390	/***********************************************************************
391	from tre-parse.c and tre-parse.h
392	***********************************************************************/
393
394	/* Parse context. */
395	typedef struct {
396	/* Memory allocator. The AST is allocated using this. */
397	tre_mem_t mem;
398	/* Stack used for keeping track of regexp syntax. */
399	tre_stack_t *stack;
400	/* The parsed node after a parse function returns. */
401	tre_ast_node_t *n;
402	/* Position in the regexp pattern after a parse function returns. */
403	const char *s;
404	/* The first character of the regexp. */
405	const char *re;
406	/* Current submatch ID. */
407	int submatch_id;
408	/* Current position (number of literal). */
409	int position;
410	/* The highest back reference or -1 if none seen so far. */
411	int max_backref;
412	/* Compilation flags. */
413	int cflags;
414	} tre_parse_ctx_t;
415
416	/* Some macros for expanding \w, \s, etc. */
417	static const struct {
418	char c;
419	const char *expansion;
420	} tre_macros[] = {
421	{'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422	{'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423	{'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424	{'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
425	{ 0, 0 }
426	};
427
428	/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429	must have at least `len' items. Sets buf[0] to zero if the there
430	is no match in `tre_macros'. */
431	static const char tre_expand_macro(const char s)
432	{
433	int i;
434	for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435	return tre_macros[i].expansion;
436	}
437
438	static int
439	tre_compare_lit(const void a, const void b)
440	{
441	const tre_literal_t const la = a;
442	const tre_literal_t const lb = b;
443	/* assumes the range of valid code_min is < INT_MAX */
444	return la[0]->code_min - lb[0]->code_min;
445	}
446
447	struct literals {
448	tre_mem_t mem;
449	tre_literal_t **a;
450	int len;
451	int cap;
452	};
453
454	static tre_literal_t tre_new_lit(struct literals p)
455	{
456	tre_literal_t **a;
457	if (p->len >= p->cap) {
458	if (p->cap >= 1<<15)
459	return 0;
460	p->cap *= 2;
461	a = xreallocrealloc(p->a, p->cap * sizeof *p->a);
462	if (!a)
463	return 0;
464	p->a = a;
465	}
466	a = p->a + p->len++;
467	a = tre_mem_calloc(p->mem, sizeof a)__tre_mem_alloc_impl(p->mem, 0, ((void)0), 1, sizeof **a);
468	return *a;
469	}
470
471	static int add_icase_literals(struct literals *ls, int min, int max)
472	{
473	tre_literal_t *lit;
474	int b, e, c;
475	for (c=min; c<=max; ) {
476	/* assumes islower(c) and isupper(c) are exclusive
477	and toupper(c)!=c if islower(c).
478	multiple opposite case characters are not supported */
479	if (tre_isloweriswlower(c)) {
480	b = e = tre_touppertowupper(c);
481	for (c++, e++; c<=max; c++, e++)
482	if (tre_touppertowupper(c) != e) break;
483	} else if (tre_isupperiswupper(c)) {
484	b = e = tre_tolowertowlower(c);
485	for (c++, e++; c<=max; c++, e++)
486	if (tre_tolowertowlower(c) != e) break;
487	} else {
488	c++;
489	continue;
490	}
491	lit = tre_new_lit(ls);
492	if (!lit)
493	return -1;
494	lit->code_min = b;
495	lit->code_max = e-1;
496	lit->position = -1;
497	}
498	return 0;
499	}
500
501
502	/* Maximum number of character classes in a negated bracket expression. */
503	#define MAX_NEG_CLASSES64 64
504
505	struct neg {
506	int negate;
507	int len;
508	tre_ctype_t a[MAX_NEG_CLASSES64];
509	};
510
511	// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
512
513	/*
514	bracket grammar:
515	Bracket = '[' List ']' \| '[^' List ']'
516	List = Term \| List Term
517	Term = Char \| Range \| Chclass \| Eqclass
518	Range = Char '-' Char \| Char '-' '-'
519	Char = Coll \| coll_single
520	Meta = ']' \| '-'
521	Coll = '[.' coll_single '.]' \| '[.' coll_multi '.]' \| '[.' Meta '.]'
522	Eqclass = '[=' coll_single '=]' \| '[=' coll_multi '=]'
523	Chclass = '[:' class ':]'
524
525	coll_single is a single char collating element but it can be
526	'-' only at the beginning or end of a List and
527	']' only at the beginning of a List and
528	'^' anywhere except after the openning '['
529	*/
530
531	static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t ctx, const char s, struct literals ls, struct neg neg)
532	{
533	const char *start = s;
534	tre_ctype_t class;
535	int min, max;
536	wchar_t wc;
537	int len;
538
539	for (;;) {
540	class = 0;
541	len = mbtowc(&wc, s, -1);
542	if (len <= 0)
543	return *s ? REG_BADPAT2 : REG_EBRACK7;
544	if (*s == ']' && s != start) {
545	ctx->s = s+1;
546	return REG_OK0;
547	}
548	if (*s == '-' && s != start && s[1] != ']' &&
549	/* extension: [a-z--@] is accepted as [a-z]\|[--@] */
550	(s[1] != '-' \|\| s[2] == ']'))
551	return REG_ERANGE11;
552	if (*s == '[' && (s[1] == '.' \|\| s[1] == '='))
553	/* collating symbols and equivalence classes are not supported */
554	return REG_ECOLLATE3;
555	if (*s == '[' && s[1] == ':') {
556	char tmp[CHARCLASS_NAME_MAX14+1];
557	s += 2;
558	for (len=0; len < CHARCLASS_NAME_MAX14 && s[len]; len++) {
559	if (s[len] == ':') {
560	memcpy(tmp, s, len);
561	tmp[len] = 0;
562	class = tre_ctypewctype(tmp);
563	break;
564	}
565	}
566	if (!class \|\| s[len+1] != ']')
567	return REG_ECTYPE4;
568	min = 0;
569	max = TRE_CHAR_MAX0x10ffff;
570	s += len+2;
571	} else {
572	min = max = wc;
573	s += len;
574	if (*s == '-' && s[1] != ']') {
575	s++;
576	len = mbtowc(&wc, s, -1);
577	max = wc;
578	/* XXX - Should use collation order instead of
579	encoding values in character ranges. */
580	if (len <= 0 \|\| min > max)
581	return REG_ERANGE11;
582	s += len;
583	}
584	}
585
586	if (class && neg->negate) {
587	if (neg->len >= MAX_NEG_CLASSES64)
588	return REG_ESPACE12;
589	neg->a[neg->len++] = class;
590	} else {
591	tre_literal_t *lit = tre_new_lit(ls);
592	if (!lit)
593	return REG_ESPACE12;
594	lit->code_min = min;
595	lit->code_max = max;
596	lit->class = class;
597	lit->position = -1;
598
599	/* Add opposite-case codepoints if REG_ICASE is present.
600	It seems that POSIX requires that bracket negation
601	should happen before case-folding, but most practical
602	implementations do it the other way around. Changing
603	the order would need efficient representation of
604	case-fold ranges and bracket range sets even with
605	simple patterns so this is ok for now. */
606	if (ctx->cflags & REG_ICASE2 && !class)
607	if (add_icase_literals(ls, min, max))
608	return REG_ESPACE12;
609	}
610	}
611	}
612
613	static reg_errcode_t parse_bracket(tre_parse_ctx_t ctx, const char s)
614	{
615	int i, max, min, negmax, negmin;
616	tre_ast_node_t node = 0, n;
617	tre_ctype_t *nc = 0;
618	tre_literal_t *lit;
619	struct literals ls;
620	struct neg neg;
621	reg_errcode_t err;
622
623	ls.mem = ctx->mem;
624	ls.len = 0;
625	ls.cap = 32;
626	ls.a = xmallocmalloc(ls.cap * sizeof *ls.a);
627	if (!ls.a)
628	return REG_ESPACE12;
629	neg.len = 0;
630	neg.negate = *s == '^';
631	if (neg.negate)
632	s++;
633
634	err = parse_bracket_terms(ctx, s, &ls, &neg);
635	if (err != REG_OK0)
636	goto parse_bracket_done;
637
638	if (neg.negate) {
639	/* Sort the array if we need to negate it. */
640	qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641	/* extra lit for the last negated range */
642	lit = tre_new_lit(&ls);
643	if (!lit) {
644	err = REG_ESPACE12;
645	goto parse_bracket_done;
646	}
647	lit->code_min = TRE_CHAR_MAX0x10ffff+1;
648	lit->code_max = TRE_CHAR_MAX0x10ffff+1;
649	lit->position = -1;
650	/* negated classes */
651	if (neg.len) {
652	nc = tre_mem_alloc(ctx->mem, (neg.len+1)sizeof neg.a)__tre_mem_alloc_impl(ctx->mem, 0, ((void)0), 0, (neg.len+ 1)sizeof *neg.a);
653	if (!nc) {
654	err = REG_ESPACE12;
655	goto parse_bracket_done;
656	}
657	memcpy(nc, neg.a, neg.lensizeof neg.a);
658	nc[neg.len] = 0;
659	}
660	}
661
662	/* Build a union of the items in the array, negated if necessary. */
663	negmax = negmin = 0;
	Value stored to 'negmax' is never read
664	for (i = 0; i < ls.len; i++) {
665	lit = ls.a[i];
666	min = lit->code_min;
667	max = lit->code_max;
668	if (neg.negate) {
669	if (min <= negmin) {
670	/* Overlap. */
671	negmin = MAX(max + 1, negmin)(((max + 1) >= (negmin)) ? (max + 1) : (negmin));
672	continue;
673	}
674	negmax = min - 1;
675	lit->code_min = negmin;
676	lit->code_max = negmax;
677	negmin = max + 1;
678	}
679	lit->position = ctx->position;
680	lit->neg_classes = nc;
681	n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682	node = tre_ast_new_union(ctx->mem, node, n);
683	if (!node) {
684	err = REG_ESPACE12;
685	break;
686	}
687	}
688
689	parse_bracket_done:
690	xfreefree(ls.a);
691	ctx->position++;
692	ctx->n = node;
693	return err;
694	}
695
696	static const char parse_dup_count(const char s, int *n)
697	{
698	*n = -1;
699	if (!isdigit(s)(0 ? isdigit(s) : ((unsigned)(*s)-'0') < 10))
700	return s;
701	*n = 0;
702	for (;;) {
703	n = 10 n + (s - '0');
704	s++;
705	if (!isdigit(s)(0 ? isdigit(s) : ((unsigned)(s)-'0') < 10) \|\| n > RE_DUP_MAX255)
706	break;
707	}
708	return s;
709	}
710
711	static const char parse_dup(const char s, int ere, int pmin, int pmax)
712	{
713	int min, max;
714
715	s = parse_dup_count(s, &min);
716	if (*s == ',')
717	s = parse_dup_count(s+1, &max);
718	else
719	max = min;
720
721	if (
722	(max < min && max >= 0) \|\|
723	max > RE_DUP_MAX255 \|\|
724	min > RE_DUP_MAX255 \|\|
725	min < 0 \|\|
726	(!ere && *s++ != '\\') \|\|
727	*s++ != '}'
728	)
729	return 0;
730	*pmin = min;
731	*pmax = max;
732	return s;
733	}
734
735	static int hexval(unsigned c)
736	{
737	if (c-'0'<10) return c-'0';
738	c \|= 32;
739	if (c-'a'<6) return c-'a'+10;
740	return -1;
741	}
742
743	static reg_errcode_t marksub(tre_parse_ctx_t ctx, tre_ast_node_t node, int subid)
744	{
745	if (node->submatch_id >= 0) {
746	tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
747	if (!n)
748	return REG_ESPACE12;
749	n = tre_ast_new_catenation(ctx->mem, n, node);
750	if (!n)
751	return REG_ESPACE12;
752	n->num_submatches = node->num_submatches;
753	node = n;
754	}
755	node->submatch_id = subid;
756	node->num_submatches++;
757	ctx->n = node;
758	return REG_OK0;
759	}
760
761	/*
762	BRE grammar:
763	Regex = Branch \| '^' \| '$' \| '^$' \| '^' Branch \| Branch '$' \| '^' Branch '$'
764	Branch = Atom \| Branch Atom
765	Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '$' Branch '$' \| back_ref
766	Dup = '*' \| '\{' Count '\}' \| '\{' Count ',\}' \| '\{' Count ',' Count '\}'
767
768	(leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
769
770	ERE grammar:
771	Regex = Branch \| Regex '\|' Branch
772	Branch = Atom \| Branch Atom
773	Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '(' Regex ')' \| '^' \| '$'
774	Dup = '*' \| '+' \| '?' \| '{' Count '}' \| '{' Count ',}' \| '{' Count ',' Count '}'
775
776	(a+?, ^, $+, \X, {, (\|a) are unspecified)
777	*/
778
779	static reg_errcode_t parse_atom(tre_parse_ctx_t ctx, const char s)
780	{
781	int len, ere = ctx->cflags & REG_EXTENDED1;
782	const char *p;
783	tre_ast_node_t *node;
784	wchar_t wc;
785	switch (*s) {
786	case '[':
787	return parse_bracket(ctx, s+1);
788	case '\\':
789	p = tre_expand_macro(s+1);
790	if (p) {
791	/* assume \X expansion is a single atom */
792	reg_errcode_t err = parse_atom(ctx, p);
793	ctx->s = s+2;
794	return err;
795	}
796	/* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
797	switch (*++s) {
798	case 0:
799	return REG_EESCAPE5;
800	case 'b':
801	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB64, -1);
802	break;
803	case 'B':
804	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB_NEG128, -1);
805	break;
806	case '<':
807	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOW16, -1);
808	break;
809	case '>':
810	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOW32, -1);
811	break;
812	case 'x':
813	s++;
814	int i, v = 0, c;
815	len = 2;
816	if (*s == '{') {
817	len = 8;
818	s++;
819	}
820	for (i=0; i<len && v<0x110000; i++) {
821	c = hexval(s[i]);
822	if (c < 0) break;
823	v = 16*v + c;
824	}
825	s += i;
826	if (len == 8) {
827	if (*s != '}')
828	return REG_EBRACE9;
829	s++;
830	}
831	node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
832	s--;
833	break;
834	case '{':
835	case '+':
836	case '?':
837	/* extension: treat \+, \? as repetitions in BRE */
838	/* reject repetitions after empty expression in BRE */
839	if (!ere)
840	return REG_BADRPT13;
841	case '\|':
842	/* extension: treat \\| as alternation in BRE */
843	if (!ere) {
844	node = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
845	s--;
846	goto end;
847	}
848	/* fallthrough */
849	default:
850	if (!ere && (unsigned)*s-'1' < 9) {
851	/* back reference */
852	int val = *s - '0';
853	node = tre_ast_new_literal(ctx->mem, BACKREF-4, val, ctx->position++);
854	ctx->max_backref = MAX(val, ctx->max_backref)(((val) >= (ctx->max_backref)) ? (val) : (ctx->max_backref ));
855	} else {
856	/* extension: accept unknown escaped char
857	as a literal */
858	goto parse_literal;
859	}
860	}
861	s++;
862	break;
863	case '.':
864	if (ctx->cflags & REG_NEWLINE4) {
865	tre_ast_node_t tmp1, tmp2;
866	tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
867	tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX0x10ffff, ctx->position++);
868	if (tmp1 && tmp2)
869	node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
870	else
871	node = 0;
872	} else {
873	node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX0x10ffff, ctx->position++);
874	}
875	s++;
876	break;
877	case '^':
878	/* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
879	if (!ere && s != ctx->re)
880	goto parse_literal;
881	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOL1, -1);
882	s++;
883	break;
884	case '$':
885	/* '$' is special everywhere in EREs, and in the end of the string in BREs. */
886	if (!ere && s[1])
887	goto parse_literal;
888	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOL2, -1);
889	s++;
890	break;
891	case '*':
892	case '{':
893	case '+':
894	case '?':
895	/* reject repetitions after empty expression in ERE */
896	if (ere)
897	return REG_BADRPT13;
898	case '\|':
899	if (!ere)
900	goto parse_literal;
901	case 0:
902	node = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
903	break;
904	default:
905	parse_literal:
906	len = mbtowc(&wc, s, -1);
907	if (len < 0)
908	return REG_BADPAT2;
909	if (ctx->cflags & REG_ICASE2 && (tre_isupperiswupper(wc) \|\| tre_isloweriswlower(wc))) {
910	tre_ast_node_t tmp1, tmp2;
911	/* multiple opposite case characters are not supported */
912	tmp1 = tre_ast_new_literal(ctx->mem, tre_touppertowupper(wc), tre_touppertowupper(wc), ctx->position);
913	tmp2 = tre_ast_new_literal(ctx->mem, tre_tolowertowlower(wc), tre_tolowertowlower(wc), ctx->position);
914	if (tmp1 && tmp2)
915	node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
916	else
917	node = 0;
918	} else {
919	node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
920	}
921	ctx->position++;
922	s += len;
923	break;
924	}
925	end:
926	if (!node)
927	return REG_ESPACE12;
928	ctx->n = node;
929	ctx->s = s;
930	return REG_OK0;
931	}
932
933	#define PUSHPTR(err, s, v)do { if ((err = tre_stack_push_voidptr(s, v)) != 0) return err ; } while(0) do { \
934	if ((err = tre_stack_push_voidptr(s, v)) != REG_OK0) \
935	return err; \
936	} while(0)
937
938	#define PUSHINT(err, s, v)do { if ((err = tre_stack_push_int(s, v)) != 0) return err; } while(0) do { \
939	if ((err = tre_stack_push_int(s, v)) != REG_OK0) \
940	return err; \
941	} while(0)
942
943	static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
944	{
945	tre_ast_node_t nbranch=0, nunion=0;
946	int ere = ctx->cflags & REG_EXTENDED1;
947	const char *s = ctx->re;
948	int subid = 0;
949	int depth = 0;
950	reg_errcode_t err;
951	tre_stack_t *stack = ctx->stack;
952
953	PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return err; } while(0);
954	for (;;) {
955	if ((!ere && *s == '\\' && s[1] == '(') \|\|
956	(ere && *s == '(')) {
957	PUSHPTR(err, stack, nunion)do { if ((err = tre_stack_push_voidptr(stack, nunion)) != 0) return err; } while(0);
958	PUSHPTR(err, stack, nbranch)do { if ((err = tre_stack_push_voidptr(stack, nbranch)) != 0) return err; } while(0);
959	PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return err; } while(0);
960	s++;
961	if (!ere)
962	s++;
963	depth++;
964	nbranch = nunion = 0;
965	continue;
966	}
967	if ((!ere && *s == '\\' && s[1] == ')') \|\|
968	(ere && *s == ')' && depth)) {
969	ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
970	if (!ctx->n)
971	return REG_ESPACE12;
972	} else {
973	err = parse_atom(ctx, s);
974	if (err != REG_OK0)
975	return err;
976	s = ctx->s;
977	}
978
979	parse_iter:
980	for (;;) {
981	int min, max;
982
983	if (s!='\\' && s!='*') {
984	if (!ere)
985	break;
986	if (s!='+' && s!='?' && *s!='{')
987	break;
988	}
989	if (*s=='\\' && ere)
990	break;
991	/* extension: treat \+, \? as repetitions in BRE */
992	if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
993	break;
994	if (*s=='\\')
995	s++;
996
997	/* handle ^* at the start of a complete BRE. */
998	if (!ere && s==ctx->re+1 && s[-1]=='^')
999	break;
1000
1001	/* extension: multiple consecutive *+?{,} is unspecified,
1002	but (a+)+ has to be supported so accepting a++ makes
1003	sense, note however that the RE_DUP_MAX limit can be
1004	circumvented: (a{255}){255} uses a lot of memory.. */
1005	if (*s=='{') {
1006	s = parse_dup(s+1, ere, &min, &max);
1007	if (!s)
1008	return REG_BADBR10;
1009	} else {
1010	min=0;
1011	max=-1;
1012	if (*s == '+')
1013	min = 1;
1014	if (*s == '?')
1015	max = 1;
1016	s++;
1017	}
1018	if (max == 0)
1019	ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
1020	else
1021	ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1022	if (!ctx->n)
1023	return REG_ESPACE12;
1024	}
1025
1026	nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1027	if ((ere && *s == '\|') \|\|
1028	(ere && *s == ')' && depth) \|\|
1029	(!ere && *s == '\\' && s[1] == ')') \|\|
1030	/* extension: treat \\| as alternation in BRE */
1031	(!ere && *s == '\\' && s[1] == '\|') \|\|
1032	!*s) {
1033	/* extension: empty branch is unspecified (), (\|a), (a\|)
1034	here they are not rejected but match on empty string */
1035	int c = *s;
1036	nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1037	nbranch = 0;
1038
1039	if (c == '\\' && s[1] == '\|') {
1040	s+=2;
1041	} else if (c == '\|') {
1042	s++;
1043	} else {
1044	if (c == '\\') {
1045	if (!depth) return REG_EPAREN8;
1046	s+=2;
1047	} else if (c == ')')
1048	s++;
1049	depth--;
1050	err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1051	if (err != REG_OK0)
1052	return err;
1053	if (!c && depth<0) {
1054	ctx->submatch_id = subid;
1055	return REG_OK0;
1056	}
1057	if (!c \|\| depth<0)
1058	return REG_EPAREN8;
1059	nbranch = tre_stack_pop_voidptr(stack);
1060	nunion = tre_stack_pop_voidptr(stack);
1061	goto parse_iter;
1062	}
1063	}
1064	}
1065	}
1066
1067
1068	/***********************************************************************
1069	from tre-compile.c
1070	***********************************************************************/
1071
1072
1073	/*
1074	TODO:
1075	- Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1076	function calls.
1077	*/
1078
1079	/*
1080	Algorithms to setup tags so that submatch addressing can be done.
1081	*/
1082
1083
1084	/* Inserts a catenation node to the root of the tree given in `node'.
1085	As the left child a new tag with number `tag_id' to `node' is added,
1086	and the right child is the old root. */
1087	static reg_errcode_t
1088	tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1089	{
1090	tre_catenation_t *c;
1091
1092	c = tre_mem_alloc(mem, sizeof(c))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, sizeof(*c));
1093	if (c == NULL((void*)0))
1094	return REG_ESPACE12;
1095	c->left = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1096	if (c->left == NULL((void*)0))
1097	return REG_ESPACE12;
1098	c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t ));
1099	if (c->right == NULL((void*)0))
1100	return REG_ESPACE12;
1101
1102	c->right->obj = node->obj;
1103	c->right->type = node->type;
1104	c->right->nullable = -1;
1105	c->right->submatch_id = -1;
1106	c->right->firstpos = NULL((void*)0);
1107	c->right->lastpos = NULL((void*)0);
1108	c->right->num_tags = 0;
1109	node->obj = c;
1110	node->type = CATENATION;
1111	return REG_OK0;
1112	}
1113
1114	/* Inserts a catenation node to the root of the tree given in `node'.
1115	As the right child a new tag with number `tag_id' to `node' is added,
1116	and the left child is the old root. */
1117	static reg_errcode_t
1118	tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1119	{
1120	tre_catenation_t *c;
1121
1122	c = tre_mem_alloc(mem, sizeof(c))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, sizeof(*c));
1123	if (c == NULL((void*)0))
1124	return REG_ESPACE12;
1125	c->right = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1126	if (c->right == NULL((void*)0))
1127	return REG_ESPACE12;
1128	c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t ));
1129	if (c->left == NULL((void*)0))
1130	return REG_ESPACE12;
1131
1132	c->left->obj = node->obj;
1133	c->left->type = node->type;
1134	c->left->nullable = -1;
1135	c->left->submatch_id = -1;
1136	c->left->firstpos = NULL((void*)0);
1137	c->left->lastpos = NULL((void*)0);
1138	c->left->num_tags = 0;
1139	node->obj = c;
1140	node->type = CATENATION;
1141	return REG_OK0;
1142	}
1143
1144	typedef enum {
1145	ADDTAGS_RECURSE,
1146	ADDTAGS_AFTER_ITERATION,
1147	ADDTAGS_AFTER_UNION_LEFT,
1148	ADDTAGS_AFTER_UNION_RIGHT,
1149	ADDTAGS_AFTER_CAT_LEFT,
1150	ADDTAGS_AFTER_CAT_RIGHT,
1151	ADDTAGS_SET_SUBMATCH_END
1152	} tre_addtags_symbol_t;
1153
1154
1155	typedef struct {
1156	int tag;
1157	int next_tag;
1158	} tre_tag_states_t;
1159
1160
1161	/* Go through `regset' and set submatch data for submatches that are
1162	using this tag. */
1163	static void
1164	tre_purge_regset(int regset, tre_tnfa_t tnfa, int tag)
1165	{
1166	int i;
1167
1168	for (i = 0; regset[i] >= 0; i++)
1169	{
1170	int id = regset[i] / 2;
1171	int start = !(regset[i] % 2);
1172	if (start)
1173	tnfa->submatch_data[id].so_tag = tag;
1174	else
1175	tnfa->submatch_data[id].eo_tag = tag;
1176	}
1177	regset[0] = -1;
1178	}
1179
1180
1181	/* Adds tags to appropriate locations in the parse tree in `tree', so that
1182	subexpressions marked for submatch addressing can be traced. */
1183	static reg_errcode_t
1184	tre_add_tags(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree,
1185	tre_tnfa_t *tnfa)
1186	{
1187	reg_errcode_t status = REG_OK0;
1188	tre_addtags_symbol_t symbol;
1189	tre_ast_node_t node = tree; / Tree node we are currently looking at. */
1190	int bottom = tre_stack_num_objects(stack);
1191	/* True for first pass (counting number of needed tags) */
1192	int first_pass = (mem == NULL((void)0) \|\| tnfa == NULL((void)0));
1193	int regset, orig_regset;
1194	int num_tags = 0; /* Total number of tags. */
1195	int num_minimals = 0; /* Number of special minimal tags. */
1196	int tag = 0; /* The tag that is to be added next. */
1197	int next_tag = 1; /* Next tag to use after this one. */
1198	int parents; / Stack of submatches the current submatch is
1199	contained in. */
1200	int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1201	tre_tag_states_t *saved_states;
1202
1203	tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1204	if (!first_pass)
1205	{
1206	tnfa->end_tag = 0;
1207	tnfa->minimal_tags[0] = -1;
1208	}
1209
1210	regset = xmallocmalloc(sizeof(regset) ((tnfa->num_submatches + 1) * 2));
1211	if (regset == NULL((void*)0))
1212	return REG_ESPACE12;
1213	regset[0] = -1;
1214	orig_regset = regset;
1215
1216	parents = xmallocmalloc(sizeof(parents) (tnfa->num_submatches + 1));
1217	if (parents == NULL((void*)0))
1218	{
1219	xfreefree(regset);
1220	return REG_ESPACE12;
1221	}
1222	parents[0] = -1;
1223
1224	saved_states = xmallocmalloc(sizeof(saved_states) (tnfa->num_submatches + 1));
1225	if (saved_states == NULL((void*)0))
1226	{
1227	xfreefree(regset);
1228	xfreefree(parents);
1229	return REG_ESPACE12;
1230	}
1231	else
1232	{
1233	unsigned int i;
1234	for (i = 0; i <= tnfa->num_submatches; i++)
1235	saved_states[i].tag = -1;
1236	}
1237
1238	STACK_PUSH(stack, voidptr, node)do { status = tre_stack_push_voidptr(stack, node); } while ( 0 );
1239	STACK_PUSH(stack, int, ADDTAGS_RECURSE)do { status = tre_stack_push_int(stack, ADDTAGS_RECURSE); } while ( 0);
1240
1241	while (tre_stack_num_objects(stack) > bottom)
1242	{
1243	if (status != REG_OK0)
1244	break;
1245
1246	symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1247	switch (symbol)
1248	{
1249
1250	case ADDTAGS_SET_SUBMATCH_END:
1251	{
1252	int id = tre_stack_pop_int(stack);
1253	int i;
1254
1255	/* Add end of this submatch to regset. */
1256	for (i = 0; regset[i] >= 0; i++);
1257	regset[i] = id * 2 + 1;
1258	regset[i + 1] = -1;
1259
1260	/* Pop this submatch from the parents stack. */
1261	for (i = 0; parents[i] >= 0; i++);
1262	parents[i - 1] = -1;
1263	break;
1264	}
1265
1266	case ADDTAGS_RECURSE:
1267	node = tre_stack_pop_voidptr(stack);
1268
1269	if (node->submatch_id >= 0)
1270	{
1271	int id = node->submatch_id;
1272	int i;
1273
1274
1275	/* Add start of this submatch to regset. */
1276	for (i = 0; regset[i] >= 0; i++);
1277	regset[i] = id * 2;
1278	regset[i + 1] = -1;
1279
1280	if (!first_pass)
1281	{
1282	for (i = 0; parents[i] >= 0; i++);
1283	tnfa->submatch_data[id].parents = NULL((void*)0);
1284	if (i > 0)
1285	{
1286	int p = xmallocmalloc(sizeof(p) * (i + 1));
1287	if (p == NULL((void*)0))
1288	{
1289	status = REG_ESPACE12;
1290	break;
1291	}
1292	assert(tnfa->submatch_data[id].parents == NULL)(void)0;
1293	tnfa->submatch_data[id].parents = p;
1294	for (i = 0; parents[i] >= 0; i++)
1295	p[i] = parents[i];
1296	p[i] = -1;
1297	}
1298	}
1299
1300	/* Add end of this submatch to regset after processing this
1301	node. */
1302	STACK_PUSHX(stack, int, node->submatch_id){ status = tre_stack_push_int(stack, node->submatch_id); if (status != 0) break; };
1303	STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END){ status = tre_stack_push_int(stack, ADDTAGS_SET_SUBMATCH_END ); if (status != 0) break; };
1304	}
1305
1306	switch (node->type)
1307	{
1308	case LITERAL:
1309	{
1310	tre_literal_t *lit = node->obj;
1311
1312	if (!IS_SPECIAL(lit)((lit)->code_min < 0) \|\| IS_BACKREF(lit)((lit)->code_min == -4))
1313	{
1314	int i;
1315	if (regset[0] >= 0)
1316	{
1317	/* Regset is not empty, so add a tag before the
1318	literal or backref. */
1319	if (!first_pass)
1320	{
1321	status = tre_add_tag_left(mem, node, tag);
1322	tnfa->tag_directions[tag] = direction;
1323	if (minimal_tag >= 0)
1324	{
1325	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1326	tnfa->minimal_tags[i] = tag;
1327	tnfa->minimal_tags[i + 1] = minimal_tag;
1328	tnfa->minimal_tags[i + 2] = -1;
1329	minimal_tag = -1;
1330	num_minimals++;
1331	}
1332	tre_purge_regset(regset, tnfa, tag);
1333	}
1334	else
1335	{
1336	node->num_tags = 1;
1337	}
1338
1339	regset[0] = -1;
1340	tag = next_tag;
1341	num_tags++;
1342	next_tag++;
1343	}
1344	}
1345	else
1346	{
1347	assert(!IS_TAG(lit))(void)0;
1348	}
1349	break;
1350	}
1351	case CATENATION:
1352	{
1353	tre_catenation_t *cat = node->obj;
1354	tre_ast_node_t *left = cat->left;
1355	tre_ast_node_t *right = cat->right;
1356	int reserved_tag = -1;
1357
1358
1359	/* After processing right child. */
1360	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1361	STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_RIGHT) ; if (status != 0) break; };
1362
1363	/* Process right child. */
1364	STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status != 0) break; };
1365	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1366
1367	/* After processing left child. */
1368	STACK_PUSHX(stack, int, next_tag + left->num_tags){ status = tre_stack_push_int(stack, next_tag + left->num_tags ); if (status != 0) break; };
1369	if (left->num_tags > 0 && right->num_tags > 0)
1370	{
1371	/* Reserve the next tag to the right child. */
1372	reserved_tag = next_tag;
1373	next_tag++;
1374	}
1375	STACK_PUSHX(stack, int, reserved_tag){ status = tre_stack_push_int(stack, reserved_tag); if (status != 0) break; };
1376	STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_LEFT); if (status != 0) break; };
1377
1378	/* Process left child. */
1379	STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status != 0) break; };
1380	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1381
1382	}
1383	break;
1384	case ITERATION:
1385	{
1386	tre_iteration_t *iter = node->obj;
1387
1388	if (first_pass)
1389	{
1390	STACK_PUSHX(stack, int, regset[0] >= 0 \|\| iter->minimal){ status = tre_stack_push_int(stack, regset[0] >= 0 \|\| iter ->minimal); if (status != 0) break; };
1391	}
1392	else
1393	{
1394	STACK_PUSHX(stack, int, tag){ status = tre_stack_push_int(stack, tag); if (status != 0) break ; };
1395	STACK_PUSHX(stack, int, iter->minimal){ status = tre_stack_push_int(stack, iter->minimal); if (status != 0) break; };
1396	}
1397	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1398	STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_ITERATION) ; if (status != 0) break; };
1399
1400	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
1401	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1402
1403	/* Regset is not empty, so add a tag here. */
1404	if (regset[0] >= 0 \|\| iter->minimal)
1405	{
1406	if (!first_pass)
1407	{
1408	int i;
1409	status = tre_add_tag_left(mem, node, tag);
1410	if (iter->minimal)
1411	tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1412	else
1413	tnfa->tag_directions[tag] = direction;
1414	if (minimal_tag >= 0)
1415	{
1416	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1417	tnfa->minimal_tags[i] = tag;
1418	tnfa->minimal_tags[i + 1] = minimal_tag;
1419	tnfa->minimal_tags[i + 2] = -1;
1420	minimal_tag = -1;
1421	num_minimals++;
1422	}
1423	tre_purge_regset(regset, tnfa, tag);
1424	}
1425
1426	regset[0] = -1;
1427	tag = next_tag;
1428	num_tags++;
1429	next_tag++;
1430	}
1431	direction = TRE_TAG_MINIMIZE;
1432	}
1433	break;
1434	case UNION:
1435	{
1436	tre_union_t *uni = node->obj;
1437	tre_ast_node_t *left = uni->left;
1438	tre_ast_node_t *right = uni->right;
1439	int left_tag;
1440	int right_tag;
1441
1442	if (regset[0] >= 0)
1443	{
1444	left_tag = next_tag;
1445	right_tag = next_tag + 1;
1446	}
1447	else
1448	{
1449	left_tag = tag;
1450	right_tag = next_tag;
1451	}
1452
1453	/* After processing right child. */
1454	STACK_PUSHX(stack, int, right_tag){ status = tre_stack_push_int(stack, right_tag); if (status != 0) break; };
1455	STACK_PUSHX(stack, int, left_tag){ status = tre_stack_push_int(stack, left_tag); if (status != 0) break; };
1456	STACK_PUSHX(stack, voidptr, regset){ status = tre_stack_push_voidptr(stack, regset); if (status != 0) break; };
1457	STACK_PUSHX(stack, int, regset[0] >= 0){ status = tre_stack_push_int(stack, regset[0] >= 0); if ( status != 0) break; };
1458	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1459	STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status != 0) break; };
1460	STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status != 0) break; };
1461	STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_RIGHT ); if (status != 0) break; };
1462
1463	/* Process right child. */
1464	STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status != 0) break; };
1465	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1466
1467	/* After processing left child. */
1468	STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_LEFT ); if (status != 0) break; };
1469
1470	/* Process left child. */
1471	STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status != 0) break; };
1472	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1473
1474	/* Regset is not empty, so add a tag here. */
1475	if (regset[0] >= 0)
1476	{
1477	if (!first_pass)
1478	{
1479	int i;
1480	status = tre_add_tag_left(mem, node, tag);
1481	tnfa->tag_directions[tag] = direction;
1482	if (minimal_tag >= 0)
1483	{
1484	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1485	tnfa->minimal_tags[i] = tag;
1486	tnfa->minimal_tags[i + 1] = minimal_tag;
1487	tnfa->minimal_tags[i + 2] = -1;
1488	minimal_tag = -1;
1489	num_minimals++;
1490	}
1491	tre_purge_regset(regset, tnfa, tag);
1492	}
1493
1494	regset[0] = -1;
1495	tag = next_tag;
1496	num_tags++;
1497	next_tag++;
1498	}
1499
1500	if (node->num_submatches > 0)
1501	{
1502	/* The next two tags are reserved for markers. */
1503	next_tag++;
1504	tag = next_tag;
1505	next_tag++;
1506	}
1507
1508	break;
1509	}
1510	}
1511
1512	if (node->submatch_id >= 0)
1513	{
1514	int i;
1515	/* Push this submatch on the parents stack. */
1516	for (i = 0; parents[i] >= 0; i++);
1517	parents[i] = node->submatch_id;
1518	parents[i + 1] = -1;
1519	}
1520
1521	break; /* end case: ADDTAGS_RECURSE */
1522
1523	case ADDTAGS_AFTER_ITERATION:
1524	{
1525	int minimal = 0;
1526	int enter_tag;
1527	node = tre_stack_pop_voidptr(stack);
1528	if (first_pass)
1529	{
1530	node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1531	+ tre_stack_pop_int(stack);
1532	minimal_tag = -1;
1533	}
1534	else
1535	{
1536	minimal = tre_stack_pop_int(stack);
1537	enter_tag = tre_stack_pop_int(stack);
1538	if (minimal)
1539	minimal_tag = enter_tag;
1540	}
1541
1542	if (!first_pass)
1543	{
1544	if (minimal)
1545	direction = TRE_TAG_MINIMIZE;
1546	else
1547	direction = TRE_TAG_MAXIMIZE;
1548	}
1549	break;
1550	}
1551
1552	case ADDTAGS_AFTER_CAT_LEFT:
1553	{
1554	int new_tag = tre_stack_pop_int(stack);
1555	next_tag = tre_stack_pop_int(stack);
1556	if (new_tag >= 0)
1557	{
1558	tag = new_tag;
1559	}
1560	break;
1561	}
1562
1563	case ADDTAGS_AFTER_CAT_RIGHT:
1564	node = tre_stack_pop_voidptr(stack);
1565	if (first_pass)
1566	node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1567	+ ((tre_catenation_t *)node->obj)->right->num_tags;
1568	break;
1569
1570	case ADDTAGS_AFTER_UNION_LEFT:
1571	/* Lift the bottom of the `regset' array so that when processing
1572	the right operand the items currently in the array are
1573	invisible. The original bottom was saved at ADDTAGS_UNION and
1574	will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1575	while (*regset >= 0)
1576	regset++;
1577	break;
1578
1579	case ADDTAGS_AFTER_UNION_RIGHT:
1580	{
1581	int added_tags, tag_left, tag_right;
1582	tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1583	tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1584	node = tre_stack_pop_voidptr(stack);
1585	added_tags = tre_stack_pop_int(stack);
1586	if (first_pass)
1587	{
1588	node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1589	+ ((tre_union_t *)node->obj)->right->num_tags + added_tags
1590	+ ((node->num_submatches > 0) ? 2 : 0);
1591	}
1592	regset = tre_stack_pop_voidptr(stack);
1593	tag_left = tre_stack_pop_int(stack);
1594	tag_right = tre_stack_pop_int(stack);
1595
1596	/* Add tags after both children, the left child gets a smaller
1597	tag than the right child. This guarantees that we prefer
1598	the left child over the right child. */
1599	/* XXX - This is not always necessary (if the children have
1600	tags which must be seen for every match of that child). */
1601	/* XXX - Check if this is the only place where tre_add_tag_right
1602	is used. If so, use tre_add_tag_left (putting the tag before
1603	the child as opposed after the child) and throw away
1604	tre_add_tag_right. */
1605	if (node->num_submatches > 0)
1606	{
1607	if (!first_pass)
1608	{
1609	status = tre_add_tag_right(mem, left, tag_left);
1610	tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1611	if (status == REG_OK0)
1612	status = tre_add_tag_right(mem, right, tag_right);
1613	tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1614	}
1615	num_tags += 2;
1616	}
1617	direction = TRE_TAG_MAXIMIZE;
1618	break;
1619	}
1620
1621	default:
1622	assert(0)(void)0;
1623	break;
1624
1625	} /* end switch(symbol) */
1626	} /* end while(tre_stack_num_objects(stack) > bottom) */
1627
1628	if (!first_pass)
1629	tre_purge_regset(regset, tnfa, tag);
1630
1631	if (!first_pass && minimal_tag >= 0)
1632	{
1633	int i;
1634	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1635	tnfa->minimal_tags[i] = tag;
1636	tnfa->minimal_tags[i + 1] = minimal_tag;
1637	tnfa->minimal_tags[i + 2] = -1;
1638	minimal_tag = -1;
1639	num_minimals++;
1640	}
1641
1642	assert(tree->num_tags == num_tags)(void)0;
1643	tnfa->end_tag = num_tags;
1644	tnfa->num_tags = num_tags;
1645	tnfa->num_minimals = num_minimals;
1646	xfreefree(orig_regset);
1647	xfreefree(parents);
1648	xfreefree(saved_states);
1649	return status;
1650	}
1651
1652
1653
1654	/*
1655	AST to TNFA compilation routines.
1656	*/
1657
1658	typedef enum {
1659	COPY_RECURSE,
1660	COPY_SET_RESULT_PTR
1661	} tre_copyast_symbol_t;
1662
1663	/* Flags for tre_copy_ast(). */
1664	#define COPY_REMOVE_TAGS1 1
1665	#define COPY_MAXIMIZE_FIRST_TAG2 2
1666
1667	static reg_errcode_t
1668	tre_copy_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,
1669	int flags, int pos_add, tre_tag_direction_t tag_directions,
1670	tre_ast_node_t *copy, int max_pos)
1671	{
1672	reg_errcode_t status = REG_OK0;
1673	int bottom = tre_stack_num_objects(stack);
1674	int num_copied = 0;
1675	int first_tag = 1;
1676	tre_ast_node_t **result = copy;
1677	tre_copyast_symbol_t symbol;
1678
1679	STACK_PUSH(stack, voidptr, ast)do { status = tre_stack_push_voidptr(stack, ast); } while ( 0 );
1680	STACK_PUSH(stack, int, COPY_RECURSE)do { status = tre_stack_push_int(stack, COPY_RECURSE); } while ( 0);
1681
1682	while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1683	{
1684	tre_ast_node_t *node;
1685	if (status != REG_OK0)
1686	break;
1687
1688	symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1689	switch (symbol)
1690	{
1691	case COPY_SET_RESULT_PTR:
1692	result = tre_stack_pop_voidptr(stack);
1693	break;
1694	case COPY_RECURSE:
1695	node = tre_stack_pop_voidptr(stack);
1696	switch (node->type)
1697	{
1698	case LITERAL:
1699	{
1700	tre_literal_t *lit = node->obj;
1701	int pos = lit->position;
1702	int min = lit->code_min;
1703	int max = lit->code_max;
1704	if (!IS_SPECIAL(lit)((lit)->code_min < 0) \|\| IS_BACKREF(lit)((lit)->code_min == -4))
1705	{
1706	/* XXX - e.g. [ab] has only one position but two
1707	nodes, so we are creating holes in the state space
1708	here. Not fatal, just wastes memory. */
1709	pos += *pos_add;
1710	num_copied++;
1711	}
1712	else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_REMOVE_TAGS1))
1713	{
1714	/* Change this tag to empty. */
1715	min = EMPTY-1;
1716	max = pos = -1;
1717	}
1718	else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_MAXIMIZE_FIRST_TAG2)
1719	&& first_tag)
1720	{
1721	/* Maximize the first tag. */
1722	tag_directions[max] = TRE_TAG_MAXIMIZE;
1723	first_tag = 0;
1724	}
1725	*result = tre_ast_new_literal(mem, min, max, pos);
1726	if (result == NULL((void)0))
1727	status = REG_ESPACE12;
1728	else {
1729	tre_literal_t p = (result)->obj;
1730	p->class = lit->class;
1731	p->neg_classes = lit->neg_classes;
1732	}
1733
1734	if (pos > *max_pos)
1735	*max_pos = pos;
1736	break;
1737	}
1738	case UNION:
1739	{
1740	tre_union_t *uni = node->obj;
1741	tre_union_t *tmp;
1742	*result = tre_ast_new_union(mem, uni->left, uni->right);
1743	if (result == NULL((void)0))
1744	{
1745	status = REG_ESPACE12;
1746	break;
1747	}
1748	tmp = (*result)->obj;
1749	result = &tmp->left;
1750	STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if ( status != 0) break; };
1751	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1752	STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right); if (status != 0) break; };
1753	STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if (status != 0) break; };
1754	STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status != 0) break; };
1755	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1756	break;
1757	}
1758	case CATENATION:
1759	{
1760	tre_catenation_t *cat = node->obj;
1761	tre_catenation_t *tmp;
1762	*result = tre_ast_new_catenation(mem, cat->left, cat->right);
1763	if (result == NULL((void)0))
1764	{
1765	status = REG_ESPACE12;
1766	break;
1767	}
1768	tmp = (*result)->obj;
1769	tmp->left = NULL((void*)0);
1770	tmp->right = NULL((void*)0);
1771	result = &tmp->left;
1772
1773	STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if ( status != 0) break; };
1774	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1775	STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right); if (status != 0) break; };
1776	STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if (status != 0) break; };
1777	STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status != 0) break; };
1778	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1779	break;
1780	}
1781	case ITERATION:
1782	{
1783	tre_iteration_t *iter = node->obj;
1784	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
1785	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1786	*result = tre_ast_new_iter(mem, iter->arg, iter->min,
1787	iter->max, iter->minimal);
1788	if (result == NULL((void)0))
1789	{
1790	status = REG_ESPACE12;
1791	break;
1792	}
1793	iter = (*result)->obj;
1794	result = &iter->arg;
1795	break;
1796	}
1797	default:
1798	assert(0)(void)0;
1799	break;
1800	}
1801	break;
1802	}
1803	}
1804	*pos_add += num_copied;
1805	return status;
1806	}
1807
1808	typedef enum {
1809	EXPAND_RECURSE,
1810	EXPAND_AFTER_ITER
1811	} tre_expand_ast_symbol_t;
1812
1813	/* Expands each iteration node that has a finite nonzero minimum or maximum
1814	iteration count to a catenated sequence of copies of the node. */
1815	static reg_errcode_t
1816	tre_expand_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,
1817	int position, tre_tag_direction_t tag_directions)
1818	{
1819	reg_errcode_t status = REG_OK0;
1820	int bottom = tre_stack_num_objects(stack);
1821	int pos_add = 0;
1822	int pos_add_total = 0;
1823	int max_pos = 0;
1824	int iter_depth = 0;
1825
1826	STACK_PUSHR(stack, voidptr, ast){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ast); if (_status != 0) return _status; };
1827	STACK_PUSHR(stack, int, EXPAND_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, EXPAND_RECURSE ); if (_status != 0) return _status; };
1828	while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1829	{
1830	tre_ast_node_t *node;
1831	tre_expand_ast_symbol_t symbol;
1832
1833	if (status != REG_OK0)
1834	break;
1835
1836	symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1837	node = tre_stack_pop_voidptr(stack);
1838	switch (symbol)
1839	{
1840	case EXPAND_RECURSE:
1841	switch (node->type)
1842	{
1843	case LITERAL:
1844	{
1845	tre_literal_t *lit= node->obj;
1846	if (!IS_SPECIAL(lit)((lit)->code_min < 0) \|\| IS_BACKREF(lit)((lit)->code_min == -4))
1847	{
1848	lit->position += pos_add;
1849	if (lit->position > max_pos)
1850	max_pos = lit->position;
1851	}
1852	break;
1853	}
1854	case UNION:
1855	{
1856	tre_union_t *uni = node->obj;
1857	STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if ( status != 0) break; };
1858	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1859	STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status != 0) break; };
1860	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1861	break;
1862	}
1863	case CATENATION:
1864	{
1865	tre_catenation_t *cat = node->obj;
1866	STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if ( status != 0) break; };
1867	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1868	STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status != 0) break; };
1869	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1870	break;
1871	}
1872	case ITERATION:
1873	{
1874	tre_iteration_t *iter = node->obj;
1875	STACK_PUSHX(stack, int, pos_add){ status = tre_stack_push_int(stack, pos_add); if (status != 0 ) break; };
1876	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1877	STACK_PUSHX(stack, int, EXPAND_AFTER_ITER){ status = tre_stack_push_int(stack, EXPAND_AFTER_ITER); if ( status != 0) break; };
1878	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
1879	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1880	/* If we are going to expand this node at EXPAND_AFTER_ITER
1881	then don't increase the `pos' fields of the nodes now, it
1882	will get done when expanding. */
1883	if (iter->min > 1 \|\| iter->max > 1)
1884	pos_add = 0;
1885	iter_depth++;
1886	break;
1887	}
1888	default:
1889	assert(0)(void)0;
1890	break;
1891	}
1892	break;
1893	case EXPAND_AFTER_ITER:
1894	{
1895	tre_iteration_t *iter = node->obj;
1896	int pos_add_last;
1897	pos_add = tre_stack_pop_int(stack);
1898	pos_add_last = pos_add;
1899	if (iter->min > 1 \|\| iter->max > 1)
1900	{
1901	tre_ast_node_t seq1 = NULL((void)0), seq2 = NULL((void)0);
1902	int j;
1903	int pos_add_save = pos_add;
1904
1905	/* Create a catenated sequence of copies of the node. */
1906	for (j = 0; j < iter->min; j++)
1907	{
1908	tre_ast_node_t *copy;
1909	/* Remove tags from all but the last copy. */
1910	int flags = ((j + 1 < iter->min)
1911	? COPY_REMOVE_TAGS1
1912	: COPY_MAXIMIZE_FIRST_TAG2);
1913	pos_add_save = pos_add;
1914	status = tre_copy_ast(mem, stack, iter->arg, flags,
1915	&pos_add, tag_directions, &copy,
1916	&max_pos);
1917	if (status != REG_OK0)
1918	return status;
1919	if (seq1 != NULL((void*)0))
1920	seq1 = tre_ast_new_catenation(mem, seq1, copy);
1921	else
1922	seq1 = copy;
1923	if (seq1 == NULL((void*)0))
1924	return REG_ESPACE12;
1925	}
1926
1927	if (iter->max == -1)
1928	{
1929	/* No upper limit. */
1930	pos_add_save = pos_add;
1931	status = tre_copy_ast(mem, stack, iter->arg, 0,
1932	&pos_add, NULL((void*)0), &seq2, &max_pos);
1933	if (status != REG_OK0)
1934	return status;
1935	seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1936	if (seq2 == NULL((void*)0))
1937	return REG_ESPACE12;
1938	}
1939	else
1940	{
1941	for (j = iter->min; j < iter->max; j++)
1942	{
1943	tre_ast_node_t tmp, copy;
1944	pos_add_save = pos_add;
1945	status = tre_copy_ast(mem, stack, iter->arg, 0,
1946	&pos_add, NULL((void*)0), &copy, &max_pos);
1947	if (status != REG_OK0)
1948	return status;
1949	if (seq2 != NULL((void*)0))
1950	seq2 = tre_ast_new_catenation(mem, copy, seq2);
1951	else
1952	seq2 = copy;
1953	if (seq2 == NULL((void*)0))
1954	return REG_ESPACE12;
1955	tmp = tre_ast_new_literal(mem, EMPTY-1, -1, -1);
1956	if (tmp == NULL((void*)0))
1957	return REG_ESPACE12;
1958	seq2 = tre_ast_new_union(mem, tmp, seq2);
1959	if (seq2 == NULL((void*)0))
1960	return REG_ESPACE12;
1961	}
1962	}
1963
1964	pos_add = pos_add_save;
1965	if (seq1 == NULL((void*)0))
1966	seq1 = seq2;
1967	else if (seq2 != NULL((void*)0))
1968	seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1969	if (seq1 == NULL((void*)0))
1970	return REG_ESPACE12;
1971	node->obj = seq1->obj;
1972	node->type = seq1->type;
1973	}
1974
1975	iter_depth--;
1976	pos_add_total += pos_add - pos_add_last;
1977	if (iter_depth == 0)
1978	pos_add = pos_add_total;
1979
1980	break;
1981	}
1982	default:
1983	assert(0)(void)0;
1984	break;
1985	}
1986	}
1987
1988	*position += pos_add_total;
1989
1990	/* `max_pos' should never be larger than `*position' if the above
1991	code works, but just an extra safeguard let's make sure
1992	`*position' is set large enough so enough memory will be
1993	allocated for the transition table. */
1994	if (max_pos > *position)
1995	*position = max_pos;
1996
1997	return status;
1998	}
1999
2000	static tre_pos_and_tags_t *
2001	tre_set_empty(tre_mem_t mem)
2002	{
2003	tre_pos_and_tags_t *new_set;
2004
2005	new_set = tre_mem_calloc(mem, sizeof(new_set))__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof(*new_set));
2006	if (new_set == NULL((void*)0))
2007	return NULL((void*)0);
2008
2009	new_set[0].position = -1;
2010	new_set[0].code_min = -1;
2011	new_set[0].code_max = -1;
2012
2013	return new_set;
2014	}
2015
2016	static tre_pos_and_tags_t *
2017	tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2018	tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2019	{
2020	tre_pos_and_tags_t *new_set;
2021
2022	new_set = tre_mem_calloc(mem, sizeof(new_set) 2)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof(new_set) * 2);
2023	if (new_set == NULL((void*)0))
2024	return NULL((void*)0);
2025
2026	new_set[0].position = position;
2027	new_set[0].code_min = code_min;
2028	new_set[0].code_max = code_max;
2029	new_set[0].class = class;
2030	new_set[0].neg_classes = neg_classes;
2031	new_set[0].backref = backref;
2032	new_set[1].position = -1;
2033	new_set[1].code_min = -1;
2034	new_set[1].code_max = -1;
2035
2036	return new_set;
2037	}
2038
2039	static tre_pos_and_tags_t *
2040	tre_set_union(tre_mem_t mem, tre_pos_and_tags_t set1, tre_pos_and_tags_t set2,
2041	int *tags, int assertions)
2042	{
2043	int s1, s2, i, j;
2044	tre_pos_and_tags_t *new_set;
2045	int *new_tags;
2046	int num_tags;
2047
2048	for (num_tags = 0; tags != NULL((void*)0) && tags[num_tags] >= 0; num_tags++);
2049	for (s1 = 0; set1[s1].position >= 0; s1++);
2050	for (s2 = 0; set2[s2].position >= 0; s2++);
2051	new_set = tre_mem_calloc(mem, sizeof(new_set) (s1 + s2 + 1))__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof(new_set) * (s1 + s2 + 1));
2052	if (!new_set )
2053	return NULL((void*)0);
2054
2055	for (s1 = 0; set1[s1].position >= 0; s1++)
2056	{
2057	new_set[s1].position = set1[s1].position;
2058	new_set[s1].code_min = set1[s1].code_min;
2059	new_set[s1].code_max = set1[s1].code_max;
2060	new_set[s1].assertions = set1[s1].assertions \| assertions;
2061	new_set[s1].class = set1[s1].class;
2062	new_set[s1].neg_classes = set1[s1].neg_classes;
2063	new_set[s1].backref = set1[s1].backref;
2064	if (set1[s1].tags == NULL((void)0) && tags == NULL((void)0))
2065	new_set[s1].tags = NULL((void*)0);
2066	else
2067	{
2068	for (i = 0; set1[s1].tags != NULL((void*)0) && set1[s1].tags[i] >= 0; i++);
2069	new_tags = tre_mem_alloc(mem, (sizeof(new_tags)__tre_mem_alloc_impl(mem, 0, ((void)0), 0, (sizeof(new_tags ) (i + num_tags + 1)))
2070	* (i + num_tags + 1)))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, (sizeof(new_tags ) * (i + num_tags + 1)));
2071	if (new_tags == NULL((void*)0))
2072	return NULL((void*)0);
2073	for (j = 0; j < i; j++)
2074	new_tags[j] = set1[s1].tags[j];
2075	for (i = 0; i < num_tags; i++)
2076	new_tags[j + i] = tags[i];
2077	new_tags[j + i] = -1;
2078	new_set[s1].tags = new_tags;
2079	}
2080	}
2081
2082	for (s2 = 0; set2[s2].position >= 0; s2++)
2083	{
2084	new_set[s1 + s2].position = set2[s2].position;
2085	new_set[s1 + s2].code_min = set2[s2].code_min;
2086	new_set[s1 + s2].code_max = set2[s2].code_max;
2087	/* XXX - why not \| assertions here as well? */
2088	new_set[s1 + s2].assertions = set2[s2].assertions;
2089	new_set[s1 + s2].class = set2[s2].class;
2090	new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2091	new_set[s1 + s2].backref = set2[s2].backref;
2092	if (set2[s2].tags == NULL((void*)0))
2093	new_set[s1 + s2].tags = NULL((void*)0);
2094	else
2095	{
2096	for (i = 0; set2[s2].tags[i] >= 0; i++);
2097	new_tags = tre_mem_alloc(mem, sizeof(new_tags) (i + 1))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, sizeof(new_tags) * (i + 1));
2098	if (new_tags == NULL((void*)0))
2099	return NULL((void*)0);
2100	for (j = 0; j < i; j++)
2101	new_tags[j] = set2[s2].tags[j];
2102	new_tags[j] = -1;
2103	new_set[s1 + s2].tags = new_tags;
2104	}
2105	}
2106	new_set[s1 + s2].position = -1;
2107	return new_set;
2108	}
2109
2110	/* Finds the empty path through `node' which is the one that should be
2111	taken according to POSIX.2 rules, and adds the tags on that path to
2112	`tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2113	set to the number of tags seen on the path. */
2114	static reg_errcode_t
2115	tre_match_empty(tre_stack_t stack, tre_ast_node_t node, int *tags,
2116	int assertions, int num_tags_seen)
2117	{
2118	tre_literal_t *lit;
2119	tre_union_t *uni;
2120	tre_catenation_t *cat;
2121	tre_iteration_t *iter;
2122	int i;
2123	int bottom = tre_stack_num_objects(stack);
2124	reg_errcode_t status = REG_OK0;
2125	if (num_tags_seen)
2126	*num_tags_seen = 0;
2127
2128	status = tre_stack_push_voidptr(stack, node);
2129
2130	/* Walk through the tree recursively. */
2131	while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
2132	{
2133	node = tre_stack_pop_voidptr(stack);
2134
2135	switch (node->type)
2136	{
2137	case LITERAL:
2138	lit = (tre_literal_t *)node->obj;
2139	switch (lit->code_min)
2140	{
2141	case TAG-3:
2142	if (lit->code_max >= 0)
2143	{
2144	if (tags != NULL((void*)0))
2145	{
2146	/* Add the tag to `tags'. */
2147	for (i = 0; tags[i] >= 0; i++)
2148	if (tags[i] == lit->code_max)
2149	break;
2150	if (tags[i] < 0)
2151	{
2152	tags[i] = lit->code_max;
2153	tags[i + 1] = -1;
2154	}
2155	}
2156	if (num_tags_seen)
2157	(*num_tags_seen)++;
2158	}
2159	break;
2160	case ASSERTION-2:
2161	assert(lit->code_max >= 1(void)0
2162	\|\| lit->code_max <= ASSERT_LAST)(void)0;
2163	if (assertions != NULL((void*)0))
2164	*assertions \|= lit->code_max;
2165	break;
2166	case EMPTY-1:
2167	break;
2168	default:
2169	assert(0)(void)0;
2170	break;
2171	}
2172	break;
2173
2174	case UNION:
2175	/* Subexpressions starting earlier take priority over ones
2176	starting later, so we prefer the left subexpression over the
2177	right subexpression. */
2178	uni = (tre_union_t *)node->obj;
2179	if (uni->left->nullable)
2180	STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status != 0) break; }
2181	else if (uni->right->nullable)
2182	STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if ( status != 0) break; }
2183	else
2184	assert(0)(void)0;
2185	break;
2186
2187	case CATENATION:
2188	/* The path must go through both children. */
2189	cat = (tre_catenation_t *)node->obj;
2190	assert(cat->left->nullable)(void)0;
2191	assert(cat->right->nullable)(void)0;
2192	STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status != 0) break; };
2193	STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if ( status != 0) break; };
2194	break;
2195
2196	case ITERATION:
2197	/* A match with an empty string is preferred over no match at
2198	all, so we go through the argument if possible. */
2199	iter = (tre_iteration_t *)node->obj;
2200	if (iter->arg->nullable)
2201	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
2202	break;
2203
2204	default:
2205	assert(0)(void)0;
2206	break;
2207	}
2208	}
2209
2210	return status;
2211	}
2212
2213
2214	typedef enum {
2215	NFL_RECURSE,
2216	NFL_POST_UNION,
2217	NFL_POST_CATENATION,
2218	NFL_POST_ITERATION
2219	} tre_nfl_stack_symbol_t;
2220
2221
2222	/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2223	the nodes of the AST `tree'. */
2224	static reg_errcode_t
2225	tre_compute_nfl(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree)
2226	{
2227	int bottom = tre_stack_num_objects(stack);
2228
2229	STACK_PUSHR(stack, voidptr, tree){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , tree); if (_status != 0) return _status; };
2230	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2231
2232	while (tre_stack_num_objects(stack) > bottom)
2233	{
2234	tre_nfl_stack_symbol_t symbol;
2235	tre_ast_node_t *node;
2236
2237	symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2238	node = tre_stack_pop_voidptr(stack);
2239	switch (symbol)
2240	{
2241	case NFL_RECURSE:
2242	switch (node->type)
2243	{
2244	case LITERAL:
2245	{
2246	tre_literal_t lit = (tre_literal_t )node->obj;
2247	if (IS_BACKREF(lit)((lit)->code_min == -4))
2248	{
2249	/* Back references: nullable = false, firstpos = {i},
2250	lastpos = {i}. */
2251	node->nullable = 0;
2252	node->firstpos = tre_set_one(mem, lit->position, 0,
2253	TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0), -1);
2254	if (!node->firstpos)
2255	return REG_ESPACE12;
2256	node->lastpos = tre_set_one(mem, lit->position, 0,
2257	TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0),
2258	(int)lit->code_max);
2259	if (!node->lastpos)
2260	return REG_ESPACE12;
2261	}
2262	else if (lit->code_min < 0)
2263	{
2264	/* Tags, empty strings, params, and zero width assertions:
2265	nullable = true, firstpos = {}, and lastpos = {}. */
2266	node->nullable = 1;
2267	node->firstpos = tre_set_empty(mem);
2268	if (!node->firstpos)
2269	return REG_ESPACE12;
2270	node->lastpos = tre_set_empty(mem);
2271	if (!node->lastpos)
2272	return REG_ESPACE12;
2273	}
2274	else
2275	{
2276	/* Literal at position i: nullable = false, firstpos = {i},
2277	lastpos = {i}. */
2278	node->nullable = 0;
2279	node->firstpos =
2280	tre_set_one(mem, lit->position, (int)lit->code_min,
2281	(int)lit->code_max, 0, NULL((void*)0), -1);
2282	if (!node->firstpos)
2283	return REG_ESPACE12;
2284	node->lastpos = tre_set_one(mem, lit->position,
2285	(int)lit->code_min,
2286	(int)lit->code_max,
2287	lit->class, lit->neg_classes,
2288	-1);
2289	if (!node->lastpos)
2290	return REG_ESPACE12;
2291	}
2292	break;
2293	}
2294
2295	case UNION:
2296	/* Compute the attributes for the two subtrees, and after that
2297	for this node. */
2298	STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , node); if (_status != 0) return _status; };
2299	STACK_PUSHR(stack, int, NFL_POST_UNION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_UNION ); if (_status != 0) return _status; };
2300	STACK_PUSHR(stack, voidptr, ((tre_union_t )node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_union_t )node->obj)->right); if (_status != 0) return _status; };
2301	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2302	STACK_PUSHR(stack, voidptr, ((tre_union_t )node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_union_t )node->obj)->left); if (_status != 0) return _status; };
2303	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2304	break;
2305
2306	case CATENATION:
2307	/* Compute the attributes for the two subtrees, and after that
2308	for this node. */
2309	STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , node); if (_status != 0) return _status; };
2310	STACK_PUSHR(stack, int, NFL_POST_CATENATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_CATENATION ); if (_status != 0) return _status; };
2311	STACK_PUSHR(stack, voidptr, ((tre_catenation_t )node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_catenation_t )node->obj)->right); if (_status != 0) return _status; };
2312	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2313	STACK_PUSHR(stack, voidptr, ((tre_catenation_t )node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_catenation_t )node->obj)->left); if (_status != 0) return _status; };
2314	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2315	break;
2316
2317	case ITERATION:
2318	/* Compute the attributes for the subtree, and after that for
2319	this node. */
2320	STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , node); if (_status != 0) return _status; };
2321	STACK_PUSHR(stack, int, NFL_POST_ITERATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_ITERATION ); if (_status != 0) return _status; };
2322	STACK_PUSHR(stack, voidptr, ((tre_iteration_t )node->obj)->arg){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_iteration_t )node->obj)->arg); if (_status != 0 ) return _status; };
2323	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2324	break;
2325	}
2326	break; /* end case: NFL_RECURSE */
2327
2328	case NFL_POST_UNION:
2329	{
2330	tre_union_t uni = (tre_union_t )node->obj;
2331	node->nullable = uni->left->nullable \|\| uni->right->nullable;
2332	node->firstpos = tre_set_union(mem, uni->left->firstpos,
2333	uni->right->firstpos, NULL((void*)0), 0);
2334	if (!node->firstpos)
2335	return REG_ESPACE12;
2336	node->lastpos = tre_set_union(mem, uni->left->lastpos,
2337	uni->right->lastpos, NULL((void*)0), 0);
2338	if (!node->lastpos)
2339	return REG_ESPACE12;
2340	break;
2341	}
2342
2343	case NFL_POST_ITERATION:
2344	{
2345	tre_iteration_t iter = (tre_iteration_t )node->obj;
2346
2347	if (iter->min == 0 \|\| iter->arg->nullable)
2348	node->nullable = 1;
2349	else
2350	node->nullable = 0;
2351	node->firstpos = iter->arg->firstpos;
2352	node->lastpos = iter->arg->lastpos;
2353	break;
2354	}
2355
2356	case NFL_POST_CATENATION:
2357	{
2358	int num_tags, *tags, assertions;
2359	reg_errcode_t status;
2360	tre_catenation_t *cat = node->obj;
2361	node->nullable = cat->left->nullable && cat->right->nullable;
2362
2363	/* Compute firstpos. */
2364	if (cat->left->nullable)
2365	{
2366	/* The left side matches the empty string. Make a first pass
2367	with tre_match_empty() to get the number of tags and
2368	parameters. */
2369	status = tre_match_empty(stack, cat->left,
2370	NULL((void)0), NULL((void)0), &num_tags);
2371	if (status != REG_OK0)
2372	return status;
2373	/* Allocate arrays for the tags and parameters. */
2374	tags = xmallocmalloc(sizeof(tags) (num_tags + 1));
2375	if (!tags)
2376	return REG_ESPACE12;
2377	tags[0] = -1;
2378	assertions = 0;
2379	/* Second pass with tre_mach_empty() to get the list of
2380	tags and parameters. */
2381	status = tre_match_empty(stack, cat->left, tags,
2382	&assertions, NULL((void*)0));
2383	if (status != REG_OK0)
2384	{
2385	xfreefree(tags);
2386	return status;
2387	}
2388	node->firstpos =
2389	tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2390	tags, assertions);
2391	xfreefree(tags);
2392	if (!node->firstpos)
2393	return REG_ESPACE12;
2394	}
2395	else
2396	{
2397	node->firstpos = cat->left->firstpos;
2398	}
2399
2400	/* Compute lastpos. */
2401	if (cat->right->nullable)
2402	{
2403	/* The right side matches the empty string. Make a first pass
2404	with tre_match_empty() to get the number of tags and
2405	parameters. */
2406	status = tre_match_empty(stack, cat->right,
2407	NULL((void)0), NULL((void)0), &num_tags);
2408	if (status != REG_OK0)
2409	return status;
2410	/* Allocate arrays for the tags and parameters. */
2411	tags = xmallocmalloc(sizeof(int) * (num_tags + 1));
2412	if (!tags)
2413	return REG_ESPACE12;
2414	tags[0] = -1;
2415	assertions = 0;
2416	/* Second pass with tre_mach_empty() to get the list of
2417	tags and parameters. */
2418	status = tre_match_empty(stack, cat->right, tags,
2419	&assertions, NULL((void*)0));
2420	if (status != REG_OK0)
2421	{
2422	xfreefree(tags);
2423	return status;
2424	}
2425	node->lastpos =
2426	tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2427	tags, assertions);
2428	xfreefree(tags);
2429	if (!node->lastpos)
2430	return REG_ESPACE12;
2431	}
2432	else
2433	{
2434	node->lastpos = cat->right->lastpos;
2435	}
2436	break;
2437	}
2438
2439	default:
2440	assert(0)(void)0;
2441	break;
2442	}
2443	}
2444
2445	return REG_OK0;
2446	}
2447
2448
2449	/* Adds a transition from each position in `p1' to each position in `p2'. */
2450	static reg_errcode_t
2451	tre_make_trans(tre_pos_and_tags_t p1, tre_pos_and_tags_t p2,
2452	tre_tnfa_transition_t *transitions,
2453	int counts, int offs)
2454	{
2455	tre_pos_and_tags_t *orig_p2 = p2;
2456	tre_tnfa_transition_t *trans;
2457	int i, j, k, l, dup, prev_p2_pos;
2458
2459	if (transitions != NULL((void*)0))
2460	while (p1->position >= 0)
2461	{
2462	p2 = orig_p2;
2463	prev_p2_pos = -1;
2464	while (p2->position >= 0)
2465	{
2466	/* Optimization: if this position was already handled, skip it. */
2467	if (p2->position == prev_p2_pos)
2468	{
2469	p2++;
2470	continue;
2471	}
2472	prev_p2_pos = p2->position;
2473	/* Set `trans' to point to the next unused transition from
2474	position `p1->position'. */
2475	trans = transitions + offs[p1->position];
2476	while (trans->state != NULL((void*)0))
2477	{
2478	#if 0
2479	/* If we find a previous transition from `p1->position' to
2480	`p2->position', it is overwritten. This can happen only
2481	if there are nested loops in the regexp, like in "((a))".
2482	In POSIX.2 repetition using the outer loop is always
2483	preferred over using the inner loop. Therefore the
2484	transition for the inner loop is useless and can be thrown
2485	away. */
2486	/* XXX - The same position is used for all nodes in a bracket
2487	expression, so this optimization cannot be used (it will
2488	break bracket expressions) unless I figure out a way to
2489	detect it here. */
2490	if (trans->state_id == p2->position)
2491	{
2492	break;
2493	}
2494	#endif
2495	trans++;
2496	}
2497
2498	if (trans->state == NULL((void*)0))
2499	(trans + 1)->state = NULL((void*)0);
2500	/* Use the character ranges, assertions, etc. from `p1' for
2501	the transition from `p1' to `p2'. */
2502	trans->code_min = p1->code_min;
2503	trans->code_max = p1->code_max;
2504	trans->state = transitions + offs[p2->position];
2505	trans->state_id = p2->position;
2506	trans->assertions = p1->assertions \| p2->assertions
2507	\| (p1->class ? ASSERT_CHAR_CLASS4 : 0)
2508	\| (p1->neg_classes != NULL((void*)0) ? ASSERT_CHAR_CLASS_NEG8 : 0);
2509	if (p1->backref >= 0)
2510	{
2511	assert((trans->assertions & ASSERT_CHAR_CLASS) == 0)(void)0;
2512	assert(p2->backref < 0)(void)0;
2513	trans->u.backref = p1->backref;
2514	trans->assertions \|= ASSERT_BACKREF256;
2515	}
2516	else
2517	trans->u.class = p1->class;
2518	if (p1->neg_classes != NULL((void*)0))
2519	{
2520	for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2521	trans->neg_classes =
2522	xmallocmalloc(sizeof(trans->neg_classes) (i + 1));
2523	if (trans->neg_classes == NULL((void*)0))
2524	return REG_ESPACE12;
2525	for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2526	trans->neg_classes[i] = p1->neg_classes[i];
2527	trans->neg_classes[i] = (tre_ctype_t)0;
2528	}
2529	else
2530	trans->neg_classes = NULL((void*)0);
2531
2532	/* Find out how many tags this transition has. */
2533	i = 0;
2534	if (p1->tags != NULL((void*)0))
2535	while(p1->tags[i] >= 0)
2536	i++;
2537	j = 0;
2538	if (p2->tags != NULL((void*)0))
2539	while(p2->tags[j] >= 0)
2540	j++;
2541
2542	/* If we are overwriting a transition, free the old tag array. */
2543	if (trans->tags != NULL((void*)0))
2544	xfreefree(trans->tags);
2545	trans->tags = NULL((void*)0);
2546
2547	/* If there were any tags, allocate an array and fill it. */
2548	if (i + j > 0)
2549	{
2550	trans->tags = xmallocmalloc(sizeof(trans->tags) (i + j + 1));
2551	if (!trans->tags)
2552	return REG_ESPACE12;
2553	i = 0;
2554	if (p1->tags != NULL((void*)0))
2555	while(p1->tags[i] >= 0)
2556	{
2557	trans->tags[i] = p1->tags[i];
2558	i++;
2559	}
2560	l = i;
2561	j = 0;
2562	if (p2->tags != NULL((void*)0))
2563	while (p2->tags[j] >= 0)
2564	{
2565	/* Don't add duplicates. */
2566	dup = 0;
2567	for (k = 0; k < i; k++)
2568	if (trans->tags[k] == p2->tags[j])
2569	{
2570	dup = 1;
2571	break;
2572	}
2573	if (!dup)
2574	trans->tags[l++] = p2->tags[j];
2575	j++;
2576	}
2577	trans->tags[l] = -1;
2578	}
2579
2580	p2++;
2581	}
2582	p1++;
2583	}
2584	else
2585	/* Compute a maximum limit for the number of transitions leaving
2586	from each state. */
2587	while (p1->position >= 0)
2588	{
2589	p2 = orig_p2;
2590	while (p2->position >= 0)
2591	{
2592	counts[p1->position]++;
2593	p2++;
2594	}
2595	p1++;
2596	}
2597	return REG_OK0;
2598	}
2599
2600	/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2601	labelled with one character range (there are no transitions on empty
2602	strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2603	the regexp. */
2604	static reg_errcode_t
2605	tre_ast_to_tnfa(tre_ast_node_t node, tre_tnfa_transition_t transitions,
2606	int counts, int offs)
2607	{
2608	tre_union_t *uni;
2609	tre_catenation_t *cat;
2610	tre_iteration_t *iter;
2611	reg_errcode_t errcode = REG_OK0;
2612
2613	/* XXX - recurse using a stack!. */
2614	switch (node->type)
2615	{
2616	case LITERAL:
2617	break;
2618	case UNION:
2619	uni = (tre_union_t *)node->obj;
2620	errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2621	if (errcode != REG_OK0)
2622	return errcode;
2623	errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2624	break;
2625
2626	case CATENATION:
2627	cat = (tre_catenation_t *)node->obj;
2628	/* Add a transition from each position in cat->left->lastpos
2629	to each position in cat->right->firstpos. */
2630	errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2631	transitions, counts, offs);
2632	if (errcode != REG_OK0)
2633	return errcode;
2634	errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2635	if (errcode != REG_OK0)
2636	return errcode;
2637	errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2638	break;
2639
2640	case ITERATION:
2641	iter = (tre_iteration_t *)node->obj;
2642	assert(iter->max == -1 \|\| iter->max == 1)(void)0;
2643
2644	if (iter->max == -1)
2645	{
2646	assert(iter->min == 0 \|\| iter->min == 1)(void)0;
2647	/* Add a transition from each last position in the iterated
2648	expression to each first position. */
2649	errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2650	transitions, counts, offs);
2651	if (errcode != REG_OK0)
2652	return errcode;
2653	}
2654	errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2655	break;
2656	}
2657	return errcode;
2658	}
2659
2660
2661	#define ERROR_EXIT(err)do { errcode = err; if ( 1) goto error_exit; } while ( 0) \
2662	do \
2663	{ \
2664	errcode = err; \
2665	if (/CONSTCOND/1) \
2666	goto error_exit; \
2667	} \
2668	while (/CONSTCOND/0)
2669
2670
2671	int
2672	regcomp(regex_t restrict preg, const char restrict regex, int cflags)
2673	{
2674	tre_stack_t *stack;
2675	tre_ast_node_t tree, tmp_ast_l, *tmp_ast_r;
2676	tre_pos_and_tags_t *p;
2677	int counts = NULL((void)0), offs = NULL((void)0);
2678	int i, add = 0;
2679	tre_tnfa_transition_t transitions, initial;
2680	tre_tnfa_t tnfa = NULL((void)0);
2681	tre_submatch_data_t *submatch_data;
2682	tre_tag_direction_t tag_directions = NULL((void)0);
2683	reg_errcode_t errcode;
2684	tre_mem_t mem;
2685
2686	/* Parse context. */
2687	tre_parse_ctx_t parse_ctx;
2688
2689	/* Allocate a stack used throughout the compilation process for various
2690	purposes. */
2691	stack = tre_stack_new(512, 1024000, 128);
2692	if (!stack)
2693	return REG_ESPACE12;
2694	/* Allocate a fast memory allocator. */
2695	mem = tre_mem_new()__tre_mem_new_impl(0, ((void*)0));
2696	if (!mem)
2697	{
2698	tre_stack_destroy(stack);
2699	return REG_ESPACE12;
2700	}
2701
2702	/* Parse the regexp. */
2703	memset(&parse_ctx, 0, sizeof(parse_ctx));
2704	parse_ctx.mem = mem;
2705	parse_ctx.stack = stack;
2706	parse_ctx.re = regex;
2707	parse_ctx.cflags = cflags;
2708	parse_ctx.max_backref = -1;
2709	errcode = tre_parse(&parse_ctx);
2710	if (errcode != REG_OK0)
2711	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2712	preg->re_nsub = parse_ctx.submatch_id - 1;
2713	tree = parse_ctx.n;
2714
2715	#ifdef TRE_DEBUG
2716	tre_ast_print(tree);
2717	#endif /* TRE_DEBUG */
2718
2719	/* Referring to nonexistent subexpressions is illegal. */
2720	if (parse_ctx.max_backref > (int)preg->re_nsub)
2721	ERROR_EXIT(REG_ESUBREG)do { errcode = 6; if ( 1) goto error_exit; } while ( 0);
2722
2723	/* Allocate the TNFA struct. */
2724	tnfa = xcalloccalloc(1, sizeof(tre_tnfa_t));
2725	if (tnfa == NULL((void*)0))
2726	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2727	tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2728	tnfa->have_approx = 0;
2729	tnfa->num_submatches = parse_ctx.submatch_id;
2730
2731	/* Set up tags for submatch addressing. If REG_NOSUB is set and the
2732	regexp does not have back references, this can be skipped. */
2733	if (tnfa->have_backrefs \|\| !(cflags & REG_NOSUB8))
2734	{
2735
2736	/* Figure out how many tags we will need. */
2737	errcode = tre_add_tags(NULL((void*)0), stack, tree, tnfa);
2738	if (errcode != REG_OK0)
2739	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2740
2741	if (tnfa->num_tags > 0)
2742	{
2743	tag_directions = xmallocmalloc(sizeof(*tag_directions)
2744	* (tnfa->num_tags + 1));
2745	if (tag_directions == NULL((void*)0))
2746	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2747	tnfa->tag_directions = tag_directions;
2748	memset(tag_directions, -1,
2749	sizeof(tag_directions) (tnfa->num_tags + 1));
2750	}
2751	tnfa->minimal_tags = xcalloccalloc((unsigned)tnfa->num_tags * 2 + 1,
2752	sizeof(*tnfa->minimal_tags));
2753	if (tnfa->minimal_tags == NULL((void*)0))
2754	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2755
2756	submatch_data = xcalloccalloc((unsigned)parse_ctx.submatch_id,
2757	sizeof(*submatch_data));
2758	if (submatch_data == NULL((void*)0))
2759	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2760	tnfa->submatch_data = submatch_data;
2761
2762	errcode = tre_add_tags(mem, stack, tree, tnfa);
2763	if (errcode != REG_OK0)
2764	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2765
2766	}
2767
2768	/* Expand iteration nodes. */
2769	errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2770	tag_directions);
2771	if (errcode != REG_OK0)
2772	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2773
2774	/* Add a dummy node for the final state.
2775	XXX - For certain patterns this dummy node can be optimized away,
2776	for example "a" or "ab". Figure out a simple way to detect
2777	this possibility. */
2778	tmp_ast_l = tree;
2779	tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2780	if (tmp_ast_r == NULL((void*)0))
2781	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2782
2783	tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2784	if (tree == NULL((void*)0))
2785	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2786
2787	errcode = tre_compute_nfl(mem, stack, tree);
2788	if (errcode != REG_OK0)
2789	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2790
2791	counts = xmallocmalloc(sizeof(int) * parse_ctx.position);
2792	if (counts == NULL((void*)0))
2793	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2794
2795	offs = xmallocmalloc(sizeof(int) * parse_ctx.position);
2796	if (offs == NULL((void*)0))
2797	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2798
2799	for (i = 0; i < parse_ctx.position; i++)
2800	counts[i] = 0;
2801	tre_ast_to_tnfa(tree, NULL((void)0), counts, NULL((void)0));
2802
2803	add = 0;
2804	for (i = 0; i < parse_ctx.position; i++)
2805	{
2806	offs[i] = add;
2807	add += counts[i] + 1;
2808	counts[i] = 0;
2809	}
2810	transitions = xcalloccalloc((unsigned)add + 1, sizeof(*transitions));
2811	if (transitions == NULL((void*)0))
2812	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2813	tnfa->transitions = transitions;
2814	tnfa->num_transitions = add;
2815
2816	errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2817	if (errcode != REG_OK0)
2818	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2819
2820	tnfa->firstpos_chars = NULL((void*)0);
2821
2822	p = tree->firstpos;
2823	i = 0;
2824	while (p->position >= 0)
2825	{
2826	i++;
2827	p++;
2828	}
2829
2830	initial = xcalloccalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2831	if (initial == NULL((void*)0))
2832	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2833	tnfa->initial = initial;
2834
2835	i = 0;
2836	for (p = tree->firstpos; p->position >= 0; p++)
2837	{
2838	initial[i].state = transitions + offs[p->position];
2839	initial[i].state_id = p->position;
2840	initial[i].tags = NULL((void*)0);
2841	/* Copy the arrays p->tags, and p->params, they are allocated
2842	from a tre_mem object. */
2843	if (p->tags)
2844	{
2845	int j;
2846	for (j = 0; p->tags[j] >= 0; j++);
2847	initial[i].tags = xmallocmalloc(sizeof(p->tags) (j + 1));
2848	if (!initial[i].tags)
2849	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2850	memcpy(initial[i].tags, p->tags, sizeof(p->tags) (j + 1));
2851	}
2852	initial[i].assertions = p->assertions;
2853	i++;
2854	}
2855	initial[i].state = NULL((void*)0);
2856
2857	tnfa->num_transitions = add;
2858	tnfa->final = transitions + offs[tree->lastpos[0].position];
2859	tnfa->num_states = parse_ctx.position;
2860	tnfa->cflags = cflags;
2861
2862	tre_mem_destroy__tre_mem_destroy(mem);
2863	tre_stack_destroy(stack);
2864	xfreefree(counts);
2865	xfreefree(offs);
2866
2867	preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2868	return REG_OK0;
2869
2870	error_exit:
2871	/* Free everything that was allocated and return the error code. */
2872	tre_mem_destroy__tre_mem_destroy(mem);
2873	if (stack != NULL((void*)0))
2874	tre_stack_destroy(stack);
2875	if (counts != NULL((void*)0))
2876	xfreefree(counts);
2877	if (offs != NULL((void*)0))
2878	xfreefree(offs);
2879	preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2880	regfree(preg);
2881	return errcode;
2882	}
2883
2884
2885
2886
2887	void
2888	regfree(regex_t *preg)
2889	{
2890	tre_tnfa_t *tnfa;
2891	unsigned int i;
2892	tre_tnfa_transition_t *trans;
2893
2894	tnfa = (void *)preg->TRE_REGEX_T_FIELD__opaque;
2895	if (!tnfa)
2896	return;
2897
2898	for (i = 0; i < tnfa->num_transitions; i++)
2899	if (tnfa->transitions[i].state)
2900	{
2901	if (tnfa->transitions[i].tags)
2902	xfreefree(tnfa->transitions[i].tags);
2903	if (tnfa->transitions[i].neg_classes)
2904	xfreefree(tnfa->transitions[i].neg_classes);
2905	}
2906	if (tnfa->transitions)
2907	xfreefree(tnfa->transitions);
2908
2909	if (tnfa->initial)
2910	{
2911	for (trans = tnfa->initial; trans->state; trans++)
2912	{
2913	if (trans->tags)
2914	xfreefree(trans->tags);
2915	}
2916	xfreefree(tnfa->initial);
2917	}
2918
2919	if (tnfa->submatch_data)
2920	{
2921	for (i = 0; i < tnfa->num_submatches; i++)
2922	if (tnfa->submatch_data[i].parents)
2923	xfreefree(tnfa->submatch_data[i].parents);
2924	xfreefree(tnfa->submatch_data);
2925	}
2926
2927	if (tnfa->tag_directions)
2928	xfreefree(tnfa->tag_directions);
2929	if (tnfa->firstpos_chars)
2930	xfreefree(tnfa->firstpos_chars);
2931	if (tnfa->minimal_tags)
2932	xfreefree(tnfa->minimal_tags);
2933	xfreefree(tnfa);
2934	}