src/regex/regcomp.c

Bug Summary

File:	regex/regcomp.c
Location:	line 1585, column 7
Description:	Value stored to 'status' is never read

Annotated Source Code

1	/*
2	regcomp.c - TRE POSIX compatible regex compilation functions.
3
4	Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5	All rights reserved.
6
7	Redistribution and use in source and binary forms, with or without
8	modification, are permitted provided that the following conditions
9	are met:
10
11	1. Redistributions of source code must retain the above copyright
12	notice, this list of conditions and the following disclaimer.
13
14	2. Redistributions in binary form must reproduce the above copyright
15	notice, this list of conditions and the following disclaimer in the
16	documentation and/or other materials provided with the distribution.
17
18	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	*/
31
32	#include <string.h>
33	#include <stdlib.h>
34	#include <regex.h>
35	#include <limits.h>
36	#include <stdint.h>
37	#include <ctype.h>
38
39	#include "tre.h"
40
41	#include <assert.h>
42
43	/***********************************************************************
44	from tre-compile.h
45	***********************************************************************/
46
47	typedef struct {
48	int position;
49	int code_min;
50	int code_max;
51	int *tags;
52	int assertions;
53	tre_ctype_t class;
54	tre_ctype_t *neg_classes;
55	int backref;
56	} tre_pos_and_tags_t;
57
58
59	/***********************************************************************
60	from tre-ast.c and tre-ast.h
61	***********************************************************************/
62
63	/* The different AST node types. */
64	typedef enum {
65	LITERAL,
66	CATENATION,
67	ITERATION,
68	UNION
69	} tre_ast_type_t;
70
71	/* Special subtypes of TRE_LITERAL. */
72	#define EMPTY-1 -1 /* Empty leaf (denotes empty string). */
73	#define ASSERTION-2 -2 /* Assertion leaf. */
74	#define TAG-3 -3 /* Tag leaf. */
75	#define BACKREF-4 -4 /* Back reference leaf. */
76
77	#define IS_SPECIAL(x)((x)->code_min < 0) ((x)->code_min < 0)
78	#define IS_EMPTY(x)((x)->code_min == -1) ((x)->code_min == EMPTY-1)
79	#define IS_ASSERTION(x)((x)->code_min == -2) ((x)->code_min == ASSERTION-2)
80	#define IS_TAG(x)((x)->code_min == -3) ((x)->code_min == TAG-3)
81	#define IS_BACKREF(x)((x)->code_min == -4) ((x)->code_min == BACKREF-4)
82
83
84	/* A generic AST node. All AST nodes consist of this node on the top
85	level with `obj' pointing to the actual content. */
86	typedef struct {
87	tre_ast_type_t type; /* Type of the node. */
88	void obj; / Pointer to actual node. */
89	int nullable;
90	int submatch_id;
91	int num_submatches;
92	int num_tags;
93	tre_pos_and_tags_t *firstpos;
94	tre_pos_and_tags_t *lastpos;
95	} tre_ast_node_t;
96
97
98	/* A "literal" node. These are created for assertions, back references,
99	tags, matching parameter settings, and all expressions that match one
100	character. */
101	typedef struct {
102	long code_min;
103	long code_max;
104	int position;
105	tre_ctype_t class;
106	tre_ctype_t *neg_classes;
107	} tre_literal_t;
108
109	/* A "catenation" node. These are created when two regexps are concatenated.
110	If there are more than one subexpressions in sequence, the `left' part
111	holds all but the last, and `right' part holds the last subexpression
112	(catenation is left associative). */
113	typedef struct {
114	tre_ast_node_t *left;
115	tre_ast_node_t *right;
116	} tre_catenation_t;
117
118	/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
119	operators. */
120	typedef struct {
121	/* Subexpression to match. */
122	tre_ast_node_t *arg;
123	/* Minimum number of consecutive matches. */
124	int min;
125	/* Maximum number of consecutive matches. */
126	int max;
127	/* If 0, match as many characters as possible, if 1 match as few as
128	possible. Note that this does not always mean the same thing as
129	matching as many/few repetitions as possible. */
130	unsigned int minimal:1;
131	} tre_iteration_t;
132
133	/* An "union" node. These are created for the "\|" operator. */
134	typedef struct {
135	tre_ast_node_t *left;
136	tre_ast_node_t *right;
137	} tre_union_t;
138
139
140	static tre_ast_node_t *
141	tre_ast_new_node(tre_mem_t mem, int type, void *obj)
142	{
143	tre_ast_node_t node = tre_mem_calloc(mem, sizeof node)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof node);
144	if (!node \|\| !obj)
145	return 0;
146	node->obj = obj;
147	node->type = type;
148	node->nullable = -1;
149	node->submatch_id = -1;
150	return node;
151	}
152
153	static tre_ast_node_t *
154	tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
155	{
156	tre_ast_node_t *node;
157	tre_literal_t *lit;
158
159	lit = tre_mem_calloc(mem, sizeof lit)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *lit);
160	node = tre_ast_new_node(mem, LITERAL, lit);
161	if (!node)
162	return 0;
163	lit->code_min = code_min;
164	lit->code_max = code_max;
165	lit->position = position;
166	return node;
167	}
168
169	static tre_ast_node_t *
170	tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
171	{
172	tre_ast_node_t *node;
173	tre_iteration_t *iter;
174
175	iter = tre_mem_calloc(mem, sizeof iter)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *iter);
176	node = tre_ast_new_node(mem, ITERATION, iter);
177	if (!node)
178	return 0;
179	iter->arg = arg;
180	iter->min = min;
181	iter->max = max;
182	iter->minimal = minimal;
183	node->num_submatches = arg->num_submatches;
184	return node;
185	}
186
187	static tre_ast_node_t *
188	tre_ast_new_union(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t right)
189	{
190	tre_ast_node_t *node;
191	tre_union_t *un;
192
193	if (!left)
194	return right;
195	un = tre_mem_calloc(mem, sizeof un)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *un);
196	node = tre_ast_new_node(mem, UNION, un);
197	if (!node \|\| !right)
198	return 0;
199	un->left = left;
200	un->right = right;
201	node->num_submatches = left->num_submatches + right->num_submatches;
202	return node;
203	}
204
205	static tre_ast_node_t *
206	tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t left, tre_ast_node_t right)
207	{
208	tre_ast_node_t *node;
209	tre_catenation_t *cat;
210
211	if (!left)
212	return right;
213	cat = tre_mem_calloc(mem, sizeof cat)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof *cat);
214	node = tre_ast_new_node(mem, CATENATION, cat);
215	if (!node)
216	return 0;
217	cat->left = left;
218	cat->right = right;
219	node->num_submatches = left->num_submatches + right->num_submatches;
220	return node;
221	}
222
223
224	/***********************************************************************
225	from tre-stack.c and tre-stack.h
226	***********************************************************************/
227
228	typedef struct tre_stack_rec tre_stack_t;
229
230	/* Creates a new stack object. `size' is initial size in bytes, `max_size'
231	is maximum size, and `increment' specifies how much more space will be
232	allocated with realloc() if all space gets used up. Returns the stack
233	object or NULL if out of memory. */
234	static tre_stack_t *
235	tre_stack_new(int size, int max_size, int increment);
236
237	/* Frees the stack object. */
238	static void
239	tre_stack_destroy(tre_stack_t *s);
240
241	/* Returns the current number of objects in the stack. */
242	static int
243	tre_stack_num_objects(tre_stack_t *s);
244
245	/* Each tre_stack_push_(tre_stack_t s, <type> value) function pushes
246	`value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247	This tries to realloc() more space before failing if maximum size
248	has not yet been reached. Returns REG_OK if successful. */
249	#define declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type value) \
250	static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
251
252	declare_pushf(voidptr, void )static reg_errcode_t tre_stack_push_voidptr(tre_stack_t s, void * value);
253	declare_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value );
254
255	/* Each tre_stack_pop_(tre_stack_t s) function pops the topmost
256	element off of stack `s' and returns it. The stack must not be
257	empty. */
258	#define declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) \
259	static type tre_stack_pop_ ## typetag(tre_stack_t *s)
260
261	declare_popf(voidptr, void )static void tre_stack_pop_voidptr(tre_stack_t *s);
262	declare_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s);
263
264	/* Just to save some typing. */
265	#define STACK_PUSH(s, typetag, value)do { status = tre_stack_push_typetag(s, value); } while ( 0) \
266	do \
267	{ \
268	status = tre_stack_push_ ## typetag(s, value); \
269	} \
270	while (/CONSTCOND/0)
271
272	#define STACK_PUSHX(s, typetag, value){ status = tre_stack_push_typetag(s, value); if (status != 0) break; } \
273	{ \
274	status = tre_stack_push_ ## typetag(s, value); \
275	if (status != REG_OK0) \
276	break; \
277	}
278
279	#define STACK_PUSHR(s, typetag, value){ reg_errcode_t _status; _status = tre_stack_push_typetag(s, value ); if (_status != 0) return _status; } \
280	{ \
281	reg_errcode_t _status; \
282	_status = tre_stack_push_ ## typetag(s, value); \
283	if (_status != REG_OK0) \
284	return _status; \
285	}
286
287	union tre_stack_item {
288	void *voidptr_value;
289	int int_value;
290	};
291
292	struct tre_stack_rec {
293	int size;
294	int max_size;
295	int increment;
296	int ptr;
297	union tre_stack_item *stack;
298	};
299
300
301	static tre_stack_t *
302	tre_stack_new(int size, int max_size, int increment)
303	{
304	tre_stack_t *s;
305
306	s = xmallocmalloc(sizeof(*s));
307	if (s != NULL((void*)0))
308	{
309	s->stack = xmallocmalloc(sizeof(s->stack) size);
310	if (s->stack == NULL((void*)0))
311	{
312	xfreefree(s);
313	return NULL((void*)0);
314	}
315	s->size = size;
316	s->max_size = max_size;
317	s->increment = increment;
318	s->ptr = 0;
319	}
320	return s;
321	}
322
323	static void
324	tre_stack_destroy(tre_stack_t *s)
325	{
326	xfreefree(s->stack);
327	xfreefree(s);
328	}
329
330	static int
331	tre_stack_num_objects(tre_stack_t *s)
332	{
333	return s->ptr;
334	}
335
336	static reg_errcode_t
337	tre_stack_push(tre_stack_t *s, union tre_stack_item value)
338	{
339	if (s->ptr < s->size)
340	{
341	s->stack[s->ptr] = value;
342	s->ptr++;
343	}
344	else
345	{
346	if (s->size >= s->max_size)
347	{
348	return REG_ESPACE12;
349	}
350	else
351	{
352	union tre_stack_item *new_buffer;
353	int new_size;
354	new_size = s->size + s->increment;
355	if (new_size > s->max_size)
356	new_size = s->max_size;
357	new_buffer = xreallocrealloc(s->stack, sizeof(new_buffer) new_size);
358	if (new_buffer == NULL((void*)0))
359	{
360	return REG_ESPACE12;
361	}
362	assert(new_size > s->size)(void)0;
363	s->size = new_size;
364	s->stack = new_buffer;
365	tre_stack_push(s, value);
366	}
367	}
368	return REG_OK0;
369	}
370
371	#define define_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type value) { union tre_stack_item item; item.typetag_value = value ; return tre_stack_push(s, item); } \
372	declare_pushf(typetag, type)static reg_errcode_t tre_stack_push_typetag(tre_stack_t *s, type value) { \
373	union tre_stack_item item; \
374	item.typetag ## _value = value; \
375	return tre_stack_push(s, item); \
376	}
377
378	define_pushf(int, int)static reg_errcode_t tre_stack_push_int(tre_stack_t *s, int value ) { union tre_stack_item item; item.int_value = value; return tre_stack_push(s, item); }
379	define_pushf(voidptr, void )static reg_errcode_t tre_stack_push_voidptr(tre_stack_t s, void * value) { union tre_stack_item item; item.voidptr_value = value ; return tre_stack_push(s, item); }
380
381	#define define_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { return s-> stack[--s->ptr].typetag_value; } \
382	declare_popf(typetag, type)static type tre_stack_pop_typetag(tre_stack_t *s) { \
383	return s->stack[--s->ptr].typetag ## _value; \
384	}
385
386	define_popf(int, int)static int tre_stack_pop_int(tre_stack_t *s) { return s->stack [--s->ptr].int_value; }
387	define_popf(voidptr, void )static void tre_stack_pop_voidptr(tre_stack_t *s) { return s ->stack[--s->ptr].voidptr_value; }
388
389
390	/***********************************************************************
391	from tre-parse.c and tre-parse.h
392	***********************************************************************/
393
394	/* Parse context. */
395	typedef struct {
396	/* Memory allocator. The AST is allocated using this. */
397	tre_mem_t mem;
398	/* Stack used for keeping track of regexp syntax. */
399	tre_stack_t *stack;
400	/* The parsed node after a parse function returns. */
401	tre_ast_node_t *n;
402	/* Position in the regexp pattern after a parse function returns. */
403	const char *s;
404	/* The first character of the regexp. */
405	const char *re;
406	/* Current submatch ID. */
407	int submatch_id;
408	/* Current position (number of literal). */
409	int position;
410	/* The highest back reference or -1 if none seen so far. */
411	int max_backref;
412	/* Compilation flags. */
413	int cflags;
414	} tre_parse_ctx_t;
415
416	/* Some macros for expanding \w, \s, etc. */
417	static const struct {
418	char c;
419	const char *expansion;
420	} tre_macros[] = {
421	{'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422	{'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423	{'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424	{'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
425	{ 0, 0 }
426	};
427
428	/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429	must have at least `len' items. Sets buf[0] to zero if the there
430	is no match in `tre_macros'. */
431	static const char tre_expand_macro(const char s)
432	{
433	int i;
434	for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435	return tre_macros[i].expansion;
436	}
437
438	static int
439	tre_compare_lit(const void a, const void b)
440	{
441	const tre_literal_t const la = a;
442	const tre_literal_t const lb = b;
443	/* assumes the range of valid code_min is < INT_MAX */
444	return la[0]->code_min - lb[0]->code_min;
445	}
446
447	struct literals {
448	tre_mem_t mem;
449	tre_literal_t **a;
450	int len;
451	int cap;
452	};
453
454	static tre_literal_t tre_new_lit(struct literals p)
455	{
456	tre_literal_t **a;
457	if (p->len >= p->cap) {
458	if (p->cap >= 1<<15)
459	return 0;
460	p->cap *= 2;
461	a = xreallocrealloc(p->a, p->cap * sizeof *p->a);
462	if (!a)
463	return 0;
464	p->a = a;
465	}
466	a = p->a + p->len++;
467	a = tre_mem_calloc(p->mem, sizeof a)__tre_mem_alloc_impl(p->mem, 0, ((void)0), 1, sizeof **a);
468	return *a;
469	}
470
471	static int add_icase_literals(struct literals *ls, int min, int max)
472	{
473	tre_literal_t *lit;
474	int b, e, c;
475	for (c=min; c<=max; ) {
476	/* assumes islower(c) and isupper(c) are exclusive
477	and toupper(c)!=c if islower(c).
478	multiple opposite case characters are not supported */
479	if (tre_isloweriswlower(c)) {
480	b = e = tre_touppertowupper(c);
481	for (c++, e++; c<=max; c++, e++)
482	if (tre_touppertowupper(c) != e) break;
483	} else if (tre_isupperiswupper(c)) {
484	b = e = tre_tolowertowlower(c);
485	for (c++, e++; c<=max; c++, e++)
486	if (tre_tolowertowlower(c) != e) break;
487	} else {
488	c++;
489	continue;
490	}
491	lit = tre_new_lit(ls);
492	if (!lit)
493	return -1;
494	lit->code_min = b;
495	lit->code_max = e-1;
496	lit->position = -1;
497	}
498	return 0;
499	}
500
501
502	/* Maximum number of character classes in a negated bracket expression. */
503	#define MAX_NEG_CLASSES64 64
504
505	struct neg {
506	int negate;
507	int len;
508	tre_ctype_t a[MAX_NEG_CLASSES64];
509	};
510
511	// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
512
513	/*
514	bracket grammar:
515	Bracket = '[' List ']' \| '[^' List ']'
516	List = Term \| List Term
517	Term = Char \| Range \| Chclass \| Eqclass
518	Range = Char '-' Char \| Char '-' '-'
519	Char = Coll \| coll_single
520	Meta = ']' \| '-'
521	Coll = '[.' coll_single '.]' \| '[.' coll_multi '.]' \| '[.' Meta '.]'
522	Eqclass = '[=' coll_single '=]' \| '[=' coll_multi '=]'
523	Chclass = '[:' class ':]'
524
525	coll_single is a single char collating element but it can be
526	'-' only at the beginning or end of a List and
527	']' only at the beginning of a List and
528	'^' anywhere except after the openning '['
529	*/
530
531	static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t ctx, const char s, struct literals ls, struct neg neg)
532	{
533	const char *start = s;
534	tre_ctype_t class;
535	int min, max;
536	wchar_t wc;
537	int len;
538
539	for (;;) {
540	class = 0;
541	len = mbtowc(&wc, s, -1);
542	if (len <= 0)
543	return *s ? REG_BADPAT2 : REG_EBRACK7;
544	if (*s == ']' && s != start) {
545	ctx->s = s+1;
546	return REG_OK0;
547	}
548	if (*s == '-' && s != start && s[1] != ']' &&
549	/* extension: [a-z--@] is accepted as [a-z]\|[--@] */
550	(s[1] != '-' \|\| s[2] == ']'))
551	return REG_ERANGE11;
552	if (*s == '[' && (s[1] == '.' \|\| s[1] == '='))
553	/* collating symbols and equivalence classes are not supported */
554	return REG_ECOLLATE3;
555	if (*s == '[' && s[1] == ':') {
556	char tmp[CHARCLASS_NAME_MAX14+1];
557	s += 2;
558	for (len=0; len < CHARCLASS_NAME_MAX14 && s[len]; len++) {
559	if (s[len] == ':') {
560	memcpy(tmp, s, len);
561	tmp[len] = 0;
562	class = tre_ctypewctype(tmp);
563	break;
564	}
565	}
566	if (!class \|\| s[len+1] != ']')
567	return REG_ECTYPE4;
568	min = 0;
569	max = TRE_CHAR_MAX0x10ffff;
570	s += len+2;
571	} else {
572	min = max = wc;
573	s += len;
574	if (*s == '-' && s[1] != ']') {
575	s++;
576	len = mbtowc(&wc, s, -1);
577	max = wc;
578	/* XXX - Should use collation order instead of
579	encoding values in character ranges. */
580	if (len <= 0 \|\| min > max)
581	return REG_ERANGE11;
582	s += len;
583	}
584	}
585
586	if (class && neg->negate) {
587	if (neg->len >= MAX_NEG_CLASSES64)
588	return REG_ESPACE12;
589	neg->a[neg->len++] = class;
590	} else {
591	tre_literal_t *lit = tre_new_lit(ls);
592	if (!lit)
593	return REG_ESPACE12;
594	lit->code_min = min;
595	lit->code_max = max;
596	lit->class = class;
597	lit->position = -1;
598
599	/* Add opposite-case codepoints if REG_ICASE is present.
600	It seems that POSIX requires that bracket negation
601	should happen before case-folding, but most practical
602	implementations do it the other way around. Changing
603	the order would need efficient representation of
604	case-fold ranges and bracket range sets even with
605	simple patterns so this is ok for now. */
606	if (ctx->cflags & REG_ICASE2 && !class)
607	if (add_icase_literals(ls, min, max))
608	return REG_ESPACE12;
609	}
610	}
611	}
612
613	static reg_errcode_t parse_bracket(tre_parse_ctx_t ctx, const char s)
614	{
615	int i, max, min, negmax, negmin;
616	tre_ast_node_t node = 0, n;
617	tre_ctype_t *nc = 0;
618	tre_literal_t *lit;
619	struct literals ls;
620	struct neg neg;
621	reg_errcode_t err;
622
623	ls.mem = ctx->mem;
624	ls.len = 0;
625	ls.cap = 32;
626	ls.a = xmallocmalloc(ls.cap * sizeof *ls.a);
627	if (!ls.a)
628	return REG_ESPACE12;
629	neg.len = 0;
630	neg.negate = *s == '^';
631	if (neg.negate)
632	s++;
633
634	err = parse_bracket_terms(ctx, s, &ls, &neg);
635	if (err != REG_OK0)
636	goto parse_bracket_done;
637
638	if (neg.negate) {
639	/* Sort the array if we need to negate it. */
640	qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641	/* extra lit for the last negated range */
642	lit = tre_new_lit(&ls);
643	if (!lit) {
644	err = REG_ESPACE12;
645	goto parse_bracket_done;
646	}
647	lit->code_min = TRE_CHAR_MAX0x10ffff+1;
648	lit->code_max = TRE_CHAR_MAX0x10ffff+1;
649	lit->position = -1;
650	/* negated classes */
651	if (neg.len) {
652	nc = tre_mem_alloc(ctx->mem, (neg.len+1)sizeof neg.a)__tre_mem_alloc_impl(ctx->mem, 0, ((void)0), 0, (neg.len+ 1)sizeof *neg.a);
653	if (!nc) {
654	err = REG_ESPACE12;
655	goto parse_bracket_done;
656	}
657	memcpy(nc, neg.a, neg.lensizeof neg.a);
658	nc[neg.len] = 0;
659	}
660	}
661
662	/* Build a union of the items in the array, negated if necessary. */
663	negmax = negmin = 0;
664	for (i = 0; i < ls.len; i++) {
665	lit = ls.a[i];
666	min = lit->code_min;
667	max = lit->code_max;
668	if (neg.negate) {
669	if (min <= negmin) {
670	/* Overlap. */
671	negmin = MAX(max + 1, negmin)(((max + 1) >= (negmin)) ? (max + 1) : (negmin));
672	continue;
673	}
674	negmax = min - 1;
675	lit->code_min = negmin;
676	lit->code_max = negmax;
677	negmin = max + 1;
678	}
679	lit->position = ctx->position;
680	lit->neg_classes = nc;
681	n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682	node = tre_ast_new_union(ctx->mem, node, n);
683	if (!node) {
684	err = REG_ESPACE12;
685	break;
686	}
687	}
688
689	parse_bracket_done:
690	xfreefree(ls.a);
691	ctx->position++;
692	ctx->n = node;
693	return err;
694	}
695
696	static const char parse_dup_count(const char s, int *n)
697	{
698	*n = -1;
699	if (!isdigit(s)(0 ? isdigit(s) : ((unsigned)(*s)-'0') < 10))
700	return s;
701	*n = 0;
702	for (;;) {
703	n = 10 n + (s - '0');
704	s++;
705	if (!isdigit(s)(0 ? isdigit(s) : ((unsigned)(s)-'0') < 10) \|\| n > RE_DUP_MAX255)
706	break;
707	}
708	return s;
709	}
710
711	static reg_errcode_t parse_dup(tre_parse_ctx_t ctx, const char s)
712	{
713	int min, max;
714
715	s = parse_dup_count(s, &min);
716	if (*s == ',')
717	s = parse_dup_count(s+1, &max);
718	else
719	max = min;
720
721	if (
722	(max < min && max >= 0) \|\|
723	max > RE_DUP_MAX255 \|\|
724	min > RE_DUP_MAX255 \|\|
725	min < 0 \|\|
726	(!(ctx->cflags & REG_EXTENDED1) && *s++ != '\\') \|\|
727	*s++ != '}'
728	)
729	return REG_BADBR10;
730
731	if (min == 0 && max == 0)
732	ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
733	else
734	ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
735	if (!ctx->n)
736	return REG_ESPACE12;
737	ctx->s = s;
738	return REG_OK0;
739	}
740
741	static int hexval(unsigned c)
742	{
743	if (c-'0'<10) return c-'0';
744	c \|= 32;
745	if (c-'a'<6) return c-'a'+10;
746	return -1;
747	}
748
749	static reg_errcode_t marksub(tre_parse_ctx_t ctx, tre_ast_node_t node, int subid)
750	{
751	if (node->submatch_id >= 0) {
752	tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
753	if (!n)
754	return REG_ESPACE12;
755	n = tre_ast_new_catenation(ctx->mem, n, node);
756	if (!n)
757	return REG_ESPACE12;
758	n->num_submatches = node->num_submatches;
759	node = n;
760	}
761	node->submatch_id = subid;
762	node->num_submatches++;
763	ctx->n = node;
764	return REG_OK0;
765	}
766
767	/*
768	BRE grammar:
769	Regex = Branch \| '^' \| '$' \| '^$' \| '^' Branch \| Branch '$' \| '^' Branch '$'
770	Branch = Atom \| Branch Atom
771	Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '$' Branch '$' \| back_ref
772	Dup = '*' \| '\{' Count '\}' \| '\{' Count ',\}' \| '\{' Count ',' Count '\}'
773
774	(leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
775
776	ERE grammar:
777	Regex = Branch \| Regex '\|' Branch
778	Branch = Atom \| Branch Atom
779	Atom = char \| quoted_char \| '.' \| Bracket \| Atom Dup \| '(' Regex ')' \| '^' \| '$'
780	Dup = '*' \| '+' \| '?' \| '{' Count '}' \| '{' Count ',}' \| '{' Count ',' Count '}'
781
782	(a+?, ^, $+, \X, {, (\|a) are unspecified)
783	*/
784
785	static reg_errcode_t parse_atom(tre_parse_ctx_t ctx, const char s)
786	{
787	int len, ere = ctx->cflags & REG_EXTENDED1;
788	const char *p;
789	tre_ast_node_t *node;
790	wchar_t wc;
791	switch (*s) {
792	case '[':
793	return parse_bracket(ctx, s+1);
794	case '\\':
795	p = tre_expand_macro(s+1);
796	if (p) {
797	/* assume \X expansion is a single atom */
798	reg_errcode_t err = parse_atom(ctx, p);
799	ctx->s = s+2;
800	return err;
801	}
802	/* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
803	switch (*++s) {
804	case 0:
805	return REG_EESCAPE5;
806	case 'b':
807	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB64, -1);
808	break;
809	case 'B':
810	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_WB_NEG128, -1);
811	break;
812	case '<':
813	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOW16, -1);
814	break;
815	case '>':
816	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOW32, -1);
817	break;
818	case 'x':
819	s++;
820	int i, v = 0, c;
821	len = 2;
822	if (*s == '{') {
823	len = 8;
824	s++;
825	}
826	for (i=0; i<len && v<0x110000; i++) {
827	c = hexval(s[i]);
828	if (c < 0) break;
829	v = 16*v + c;
830	}
831	s += i;
832	if (len == 8) {
833	if (*s != '}')
834	return REG_EBRACE9;
835	s++;
836	}
837	node = tre_ast_new_literal(ctx->mem, v, v, ctx->position);
838	ctx->position++;
839	s--;
840	break;
841	default:
842	if (!ere && (unsigned)*s-'1' < 9) {
843	/* back reference */
844	int val = *s - '0';
845	node = tre_ast_new_literal(ctx->mem, BACKREF-4, val, ctx->position);
846	ctx->max_backref = MAX(val, ctx->max_backref)(((val) >= (ctx->max_backref)) ? (val) : (ctx->max_backref ));
847	} else {
848	/* extension: accept unknown escaped char
849	as a literal */
850	goto parse_literal;
851	}
852	ctx->position++;
853	}
854	s++;
855	break;
856	case '.':
857	if (ctx->cflags & REG_NEWLINE4) {
858	tre_ast_node_t tmp1, tmp2;
859	tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
860	tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX0x10ffff, ctx->position++);
861	if (tmp1 && tmp2)
862	node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
863	else
864	node = 0;
865	} else {
866	node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX0x10ffff, ctx->position++);
867	}
868	s++;
869	break;
870	case '^':
871	/* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
872	if (!ere && s != ctx->re)
873	goto parse_literal;
874	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_BOL1, -1);
875	s++;
876	break;
877	case '$':
878	/* '$' is special everywhere in EREs, and in the end of the string in BREs. */
879	if (!ere && s[1])
880	goto parse_literal;
881	node = tre_ast_new_literal(ctx->mem, ASSERTION-2, ASSERT_AT_EOL2, -1);
882	s++;
883	break;
884	case '*':
885	case '\|':
886	case '{':
887	case '+':
888	case '?':
889	if (!ere)
890	goto parse_literal;
891	case 0:
892	node = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
893	break;
894	default:
895	parse_literal:
896	len = mbtowc(&wc, s, -1);
897	if (len < 0)
898	return REG_BADPAT2;
899	if (ctx->cflags & REG_ICASE2 && (tre_isupperiswupper(wc) \|\| tre_isloweriswlower(wc))) {
900	tre_ast_node_t tmp1, tmp2;
901	/* multiple opposite case characters are not supported */
902	tmp1 = tre_ast_new_literal(ctx->mem, tre_touppertowupper(wc), tre_touppertowupper(wc), ctx->position);
903	tmp2 = tre_ast_new_literal(ctx->mem, tre_tolowertowlower(wc), tre_tolowertowlower(wc), ctx->position);
904	if (tmp1 && tmp2)
905	node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
906	else
907	node = 0;
908	} else {
909	node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
910	}
911	ctx->position++;
912	s += len;
913	break;
914	}
915	if (!node)
916	return REG_ESPACE12;
917	ctx->n = node;
918	ctx->s = s;
919	return REG_OK0;
920	}
921
922	#define PUSHPTR(err, s, v)do { if ((err = tre_stack_push_voidptr(s, v)) != 0) return err ; } while(0) do { \
923	if ((err = tre_stack_push_voidptr(s, v)) != REG_OK0) \
924	return err; \
925	} while(0)
926
927	#define PUSHINT(err, s, v)do { if ((err = tre_stack_push_int(s, v)) != 0) return err; } while(0) do { \
928	if ((err = tre_stack_push_int(s, v)) != REG_OK0) \
929	return err; \
930	} while(0)
931
932	static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
933	{
934	tre_ast_node_t nbranch=0, nunion=0;
935	int ere = ctx->cflags & REG_EXTENDED1;
936	const char *s = ctx->re;
937	int subid = 0;
938	int depth = 0;
939	reg_errcode_t err;
940	tre_stack_t *stack = ctx->stack;
941
942	PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return err; } while(0);
943	for (;;) {
944	if ((!ere && *s == '\\' && s[1] == '(') \|\|
945	(ere && *s == '(')) {
946	PUSHPTR(err, stack, nunion)do { if ((err = tre_stack_push_voidptr(stack, nunion)) != 0) return err; } while(0);
947	PUSHPTR(err, stack, nbranch)do { if ((err = tre_stack_push_voidptr(stack, nbranch)) != 0) return err; } while(0);
948	PUSHINT(err, stack, subid++)do { if ((err = tre_stack_push_int(stack, subid++)) != 0) return err; } while(0);
949	s++;
950	if (!ere)
951	s++;
952	depth++;
953	nbranch = nunion = 0;
954	continue;
955	}
956	if ((!ere && *s == '\\' && s[1] == ')') \|\|
957	(ere && *s == ')' && depth)) {
958	ctx->n = tre_ast_new_literal(ctx->mem, EMPTY-1, -1, -1);
959	if (!ctx->n)
960	return REG_ESPACE12;
961	} else {
962	err = parse_atom(ctx, s);
963	if (err != REG_OK0)
964	return err;
965	s = ctx->s;
966	}
967
968	parse_iter:
969	/* extension: repetitions are accepted after an empty node
970	eg. (+), ^, a$?, a\|{2} /
971	switch (*s) {
972	case '+':
973	case '?':
974	if (!ere)
975	break;
976	/* fallthrough */
977	case '*':;
978	int min=0, max=-1;
979	if (*s == '+')
980	min = 1;
981	if (*s == '?')
982	max = 1;
983	s++;
984	ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
985	if (!ctx->n)
986	return REG_ESPACE12;
987	/* extension: multiple consecutive *+?{,} is unspecified,
988	but (a+)+ has to be supported so accepting a++ makes
989	sense, note however that the RE_DUP_MAX limit can be
990	circumvented: (a{255}){255} uses a lot of memory.. */
991	goto parse_iter;
992	case '\\':
993	if (ere \|\| s[1] != '{')
994	break;
995	s++;
996	goto parse_brace;
997	case '{':
998	if (!ere)
999	break;
1000	parse_brace:
1001	err = parse_dup(ctx, s+1);
1002	if (err != REG_OK0)
1003	return err;
1004	s = ctx->s;
1005	goto parse_iter;
1006	}
1007
1008	nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1009	if ((ere && *s == '\|') \|\|
1010	(ere && *s == ')' && depth) \|\|
1011	(!ere && *s == '\\' && s[1] == ')') \|\|
1012	!*s) {
1013	/* extension: empty branch is unspecified (), (\|a), (a\|)
1014	here they are not rejected but match on empty string */
1015	int c = *s;
1016	nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1017	nbranch = 0;
1018	if (c != '\|') {
1019	if (c == '\\') {
1020	if (!depth) return REG_EPAREN8;
1021	s+=2;
1022	} else if (c == ')')
1023	s++;
1024	depth--;
1025	err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1026	if (err != REG_OK0)
1027	return err;
1028	if (!c && depth<0) {
1029	ctx->submatch_id = subid;
1030	return REG_OK0;
1031	}
1032	if (!c \|\| depth<0)
1033	return REG_EPAREN8;
1034	nbranch = tre_stack_pop_voidptr(stack);
1035	nunion = tre_stack_pop_voidptr(stack);
1036	goto parse_iter;
1037	}
1038	s++;
1039	}
1040	}
1041	}
1042
1043
1044	/***********************************************************************
1045	from tre-compile.c
1046	***********************************************************************/
1047
1048
1049	/*
1050	TODO:
1051	- Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1052	function calls.
1053	*/
1054
1055	/*
1056	Algorithms to setup tags so that submatch addressing can be done.
1057	*/
1058
1059
1060	/* Inserts a catenation node to the root of the tree given in `node'.
1061	As the left child a new tag with number `tag_id' to `node' is added,
1062	and the right child is the old root. */
1063	static reg_errcode_t
1064	tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1065	{
1066	tre_catenation_t *c;
1067
1068	c = tre_mem_alloc(mem, sizeof(c))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, sizeof(*c));
1069	if (c == NULL((void*)0))
1070	return REG_ESPACE12;
1071	c->left = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1072	if (c->left == NULL((void*)0))
1073	return REG_ESPACE12;
1074	c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t ));
1075	if (c->right == NULL((void*)0))
1076	return REG_ESPACE12;
1077
1078	c->right->obj = node->obj;
1079	c->right->type = node->type;
1080	c->right->nullable = -1;
1081	c->right->submatch_id = -1;
1082	c->right->firstpos = NULL((void*)0);
1083	c->right->lastpos = NULL((void*)0);
1084	c->right->num_tags = 0;
1085	node->obj = c;
1086	node->type = CATENATION;
1087	return REG_OK0;
1088	}
1089
1090	/* Inserts a catenation node to the root of the tree given in `node'.
1091	As the right child a new tag with number `tag_id' to `node' is added,
1092	and the left child is the old root. */
1093	static reg_errcode_t
1094	tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1095	{
1096	tre_catenation_t *c;
1097
1098	c = tre_mem_alloc(mem, sizeof(c))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, sizeof(*c));
1099	if (c == NULL((void*)0))
1100	return REG_ESPACE12;
1101	c->right = tre_ast_new_literal(mem, TAG-3, tag_id, -1);
1102	if (c->right == NULL((void*)0))
1103	return REG_ESPACE12;
1104	c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(tre_ast_node_t ));
1105	if (c->left == NULL((void*)0))
1106	return REG_ESPACE12;
1107
1108	c->left->obj = node->obj;
1109	c->left->type = node->type;
1110	c->left->nullable = -1;
1111	c->left->submatch_id = -1;
1112	c->left->firstpos = NULL((void*)0);
1113	c->left->lastpos = NULL((void*)0);
1114	c->left->num_tags = 0;
1115	node->obj = c;
1116	node->type = CATENATION;
1117	return REG_OK0;
1118	}
1119
1120	typedef enum {
1121	ADDTAGS_RECURSE,
1122	ADDTAGS_AFTER_ITERATION,
1123	ADDTAGS_AFTER_UNION_LEFT,
1124	ADDTAGS_AFTER_UNION_RIGHT,
1125	ADDTAGS_AFTER_CAT_LEFT,
1126	ADDTAGS_AFTER_CAT_RIGHT,
1127	ADDTAGS_SET_SUBMATCH_END
1128	} tre_addtags_symbol_t;
1129
1130
1131	typedef struct {
1132	int tag;
1133	int next_tag;
1134	} tre_tag_states_t;
1135
1136
1137	/* Go through `regset' and set submatch data for submatches that are
1138	using this tag. */
1139	static void
1140	tre_purge_regset(int regset, tre_tnfa_t tnfa, int tag)
1141	{
1142	int i;
1143
1144	for (i = 0; regset[i] >= 0; i++)
1145	{
1146	int id = regset[i] / 2;
1147	int start = !(regset[i] % 2);
1148	if (start)
1149	tnfa->submatch_data[id].so_tag = tag;
1150	else
1151	tnfa->submatch_data[id].eo_tag = tag;
1152	}
1153	regset[0] = -1;
1154	}
1155
1156
1157	/* Adds tags to appropriate locations in the parse tree in `tree', so that
1158	subexpressions marked for submatch addressing can be traced. */
1159	static reg_errcode_t
1160	tre_add_tags(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree,
1161	tre_tnfa_t *tnfa)
1162	{
1163	reg_errcode_t status = REG_OK0;
1164	tre_addtags_symbol_t symbol;
1165	tre_ast_node_t node = tree; / Tree node we are currently looking at. */
1166	int bottom = tre_stack_num_objects(stack);
1167	/* True for first pass (counting number of needed tags) */
1168	int first_pass = (mem == NULL((void)0) \|\| tnfa == NULL((void)0));
1169	int regset, orig_regset;
1170	int num_tags = 0; /* Total number of tags. */
1171	int num_minimals = 0; /* Number of special minimal tags. */
1172	int tag = 0; /* The tag that is to be added next. */
1173	int next_tag = 1; /* Next tag to use after this one. */
1174	int parents; / Stack of submatches the current submatch is
1175	contained in. */
1176	int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1177	tre_tag_states_t *saved_states;
1178
1179	tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1180	if (!first_pass)
1181	{
1182	tnfa->end_tag = 0;
1183	tnfa->minimal_tags[0] = -1;
1184	}
1185
1186	regset = xmallocmalloc(sizeof(regset) ((tnfa->num_submatches + 1) * 2));
1187	if (regset == NULL((void*)0))
1188	return REG_ESPACE12;
1189	regset[0] = -1;
1190	orig_regset = regset;
1191
1192	parents = xmallocmalloc(sizeof(parents) (tnfa->num_submatches + 1));
1193	if (parents == NULL((void*)0))
1194	{
1195	xfreefree(regset);
1196	return REG_ESPACE12;
1197	}
1198	parents[0] = -1;
1199
1200	saved_states = xmallocmalloc(sizeof(saved_states) (tnfa->num_submatches + 1));
1201	if (saved_states == NULL((void*)0))
1202	{
1203	xfreefree(regset);
1204	xfreefree(parents);
1205	return REG_ESPACE12;
1206	}
1207	else
1208	{
1209	unsigned int i;
1210	for (i = 0; i <= tnfa->num_submatches; i++)
1211	saved_states[i].tag = -1;
1212	}
1213
1214	STACK_PUSH(stack, voidptr, node)do { status = tre_stack_push_voidptr(stack, node); } while ( 0 );
1215	STACK_PUSH(stack, int, ADDTAGS_RECURSE)do { status = tre_stack_push_int(stack, ADDTAGS_RECURSE); } while ( 0);
1216
1217	while (tre_stack_num_objects(stack) > bottom)
1218	{
1219	if (status != REG_OK0)
1220	break;
1221
1222	symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1223	switch (symbol)
1224	{
1225
1226	case ADDTAGS_SET_SUBMATCH_END:
1227	{
1228	int id = tre_stack_pop_int(stack);
1229	int i;
1230
1231	/* Add end of this submatch to regset. */
1232	for (i = 0; regset[i] >= 0; i++);
1233	regset[i] = id * 2 + 1;
1234	regset[i + 1] = -1;
1235
1236	/* Pop this submatch from the parents stack. */
1237	for (i = 0; parents[i] >= 0; i++);
1238	parents[i - 1] = -1;
1239	break;
1240	}
1241
1242	case ADDTAGS_RECURSE:
1243	node = tre_stack_pop_voidptr(stack);
1244
1245	if (node->submatch_id >= 0)
1246	{
1247	int id = node->submatch_id;
1248	int i;
1249
1250
1251	/* Add start of this submatch to regset. */
1252	for (i = 0; regset[i] >= 0; i++);
1253	regset[i] = id * 2;
1254	regset[i + 1] = -1;
1255
1256	if (!first_pass)
1257	{
1258	for (i = 0; parents[i] >= 0; i++);
1259	tnfa->submatch_data[id].parents = NULL((void*)0);
1260	if (i > 0)
1261	{
1262	int p = xmallocmalloc(sizeof(p) * (i + 1));
1263	if (p == NULL((void*)0))
1264	{
1265	status = REG_ESPACE12;
1266	break;
1267	}
1268	assert(tnfa->submatch_data[id].parents == NULL)(void)0;
1269	tnfa->submatch_data[id].parents = p;
1270	for (i = 0; parents[i] >= 0; i++)
1271	p[i] = parents[i];
1272	p[i] = -1;
1273	}
1274	}
1275
1276	/* Add end of this submatch to regset after processing this
1277	node. */
1278	STACK_PUSHX(stack, int, node->submatch_id){ status = tre_stack_push_int(stack, node->submatch_id); if (status != 0) break; };
1279	STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END){ status = tre_stack_push_int(stack, ADDTAGS_SET_SUBMATCH_END ); if (status != 0) break; };
1280	}
1281
1282	switch (node->type)
1283	{
1284	case LITERAL:
1285	{
1286	tre_literal_t *lit = node->obj;
1287
1288	if (!IS_SPECIAL(lit)((lit)->code_min < 0) \|\| IS_BACKREF(lit)((lit)->code_min == -4))
1289	{
1290	int i;
1291	if (regset[0] >= 0)
1292	{
1293	/* Regset is not empty, so add a tag before the
1294	literal or backref. */
1295	if (!first_pass)
1296	{
1297	status = tre_add_tag_left(mem, node, tag);
1298	tnfa->tag_directions[tag] = direction;
1299	if (minimal_tag >= 0)
1300	{
1301	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1302	tnfa->minimal_tags[i] = tag;
1303	tnfa->minimal_tags[i + 1] = minimal_tag;
1304	tnfa->minimal_tags[i + 2] = -1;
1305	minimal_tag = -1;
1306	num_minimals++;
1307	}
1308	tre_purge_regset(regset, tnfa, tag);
1309	}
1310	else
1311	{
1312	node->num_tags = 1;
1313	}
1314
1315	regset[0] = -1;
1316	tag = next_tag;
1317	num_tags++;
1318	next_tag++;
1319	}
1320	}
1321	else
1322	{
1323	assert(!IS_TAG(lit))(void)0;
1324	}
1325	break;
1326	}
1327	case CATENATION:
1328	{
1329	tre_catenation_t *cat = node->obj;
1330	tre_ast_node_t *left = cat->left;
1331	tre_ast_node_t *right = cat->right;
1332	int reserved_tag = -1;
1333
1334
1335	/* After processing right child. */
1336	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1337	STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_RIGHT) ; if (status != 0) break; };
1338
1339	/* Process right child. */
1340	STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status != 0) break; };
1341	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1342
1343	/* After processing left child. */
1344	STACK_PUSHX(stack, int, next_tag + left->num_tags){ status = tre_stack_push_int(stack, next_tag + left->num_tags ); if (status != 0) break; };
1345	if (left->num_tags > 0 && right->num_tags > 0)
1346	{
1347	/* Reserve the next tag to the right child. */
1348	reserved_tag = next_tag;
1349	next_tag++;
1350	}
1351	STACK_PUSHX(stack, int, reserved_tag){ status = tre_stack_push_int(stack, reserved_tag); if (status != 0) break; };
1352	STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_CAT_LEFT); if (status != 0) break; };
1353
1354	/* Process left child. */
1355	STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status != 0) break; };
1356	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1357
1358	}
1359	break;
1360	case ITERATION:
1361	{
1362	tre_iteration_t *iter = node->obj;
1363
1364	if (first_pass)
1365	{
1366	STACK_PUSHX(stack, int, regset[0] >= 0 \|\| iter->minimal){ status = tre_stack_push_int(stack, regset[0] >= 0 \|\| iter ->minimal); if (status != 0) break; };
1367	}
1368	else
1369	{
1370	STACK_PUSHX(stack, int, tag){ status = tre_stack_push_int(stack, tag); if (status != 0) break ; };
1371	STACK_PUSHX(stack, int, iter->minimal){ status = tre_stack_push_int(stack, iter->minimal); if (status != 0) break; };
1372	}
1373	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1374	STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_ITERATION) ; if (status != 0) break; };
1375
1376	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
1377	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1378
1379	/* Regset is not empty, so add a tag here. */
1380	if (regset[0] >= 0 \|\| iter->minimal)
1381	{
1382	if (!first_pass)
1383	{
1384	int i;
1385	status = tre_add_tag_left(mem, node, tag);
1386	if (iter->minimal)
1387	tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1388	else
1389	tnfa->tag_directions[tag] = direction;
1390	if (minimal_tag >= 0)
1391	{
1392	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1393	tnfa->minimal_tags[i] = tag;
1394	tnfa->minimal_tags[i + 1] = minimal_tag;
1395	tnfa->minimal_tags[i + 2] = -1;
1396	minimal_tag = -1;
1397	num_minimals++;
1398	}
1399	tre_purge_regset(regset, tnfa, tag);
1400	}
1401
1402	regset[0] = -1;
1403	tag = next_tag;
1404	num_tags++;
1405	next_tag++;
1406	}
1407	direction = TRE_TAG_MINIMIZE;
1408	}
1409	break;
1410	case UNION:
1411	{
1412	tre_union_t *uni = node->obj;
1413	tre_ast_node_t *left = uni->left;
1414	tre_ast_node_t *right = uni->right;
1415	int left_tag;
1416	int right_tag;
1417
1418	if (regset[0] >= 0)
1419	{
1420	left_tag = next_tag;
1421	right_tag = next_tag + 1;
1422	}
1423	else
1424	{
1425	left_tag = tag;
1426	right_tag = next_tag;
1427	}
1428
1429	/* After processing right child. */
1430	STACK_PUSHX(stack, int, right_tag){ status = tre_stack_push_int(stack, right_tag); if (status != 0) break; };
1431	STACK_PUSHX(stack, int, left_tag){ status = tre_stack_push_int(stack, left_tag); if (status != 0) break; };
1432	STACK_PUSHX(stack, voidptr, regset){ status = tre_stack_push_voidptr(stack, regset); if (status != 0) break; };
1433	STACK_PUSHX(stack, int, regset[0] >= 0){ status = tre_stack_push_int(stack, regset[0] >= 0); if ( status != 0) break; };
1434	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1435	STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status != 0) break; };
1436	STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status != 0) break; };
1437	STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_RIGHT ); if (status != 0) break; };
1438
1439	/* Process right child. */
1440	STACK_PUSHX(stack, voidptr, right){ status = tre_stack_push_voidptr(stack, right); if (status != 0) break; };
1441	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1442
1443	/* After processing left child. */
1444	STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT){ status = tre_stack_push_int(stack, ADDTAGS_AFTER_UNION_LEFT ); if (status != 0) break; };
1445
1446	/* Process left child. */
1447	STACK_PUSHX(stack, voidptr, left){ status = tre_stack_push_voidptr(stack, left); if (status != 0) break; };
1448	STACK_PUSHX(stack, int, ADDTAGS_RECURSE){ status = tre_stack_push_int(stack, ADDTAGS_RECURSE); if (status != 0) break; };
1449
1450	/* Regset is not empty, so add a tag here. */
1451	if (regset[0] >= 0)
1452	{
1453	if (!first_pass)
1454	{
1455	int i;
1456	status = tre_add_tag_left(mem, node, tag);
1457	tnfa->tag_directions[tag] = direction;
1458	if (minimal_tag >= 0)
1459	{
1460	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1461	tnfa->minimal_tags[i] = tag;
1462	tnfa->minimal_tags[i + 1] = minimal_tag;
1463	tnfa->minimal_tags[i + 2] = -1;
1464	minimal_tag = -1;
1465	num_minimals++;
1466	}
1467	tre_purge_regset(regset, tnfa, tag);
1468	}
1469
1470	regset[0] = -1;
1471	tag = next_tag;
1472	num_tags++;
1473	next_tag++;
1474	}
1475
1476	if (node->num_submatches > 0)
1477	{
1478	/* The next two tags are reserved for markers. */
1479	next_tag++;
1480	tag = next_tag;
1481	next_tag++;
1482	}
1483
1484	break;
1485	}
1486	}
1487
1488	if (node->submatch_id >= 0)
1489	{
1490	int i;
1491	/* Push this submatch on the parents stack. */
1492	for (i = 0; parents[i] >= 0; i++);
1493	parents[i] = node->submatch_id;
1494	parents[i + 1] = -1;
1495	}
1496
1497	break; /* end case: ADDTAGS_RECURSE */
1498
1499	case ADDTAGS_AFTER_ITERATION:
1500	{
1501	int minimal = 0;
1502	int enter_tag;
1503	node = tre_stack_pop_voidptr(stack);
1504	if (first_pass)
1505	{
1506	node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1507	+ tre_stack_pop_int(stack);
1508	minimal_tag = -1;
1509	}
1510	else
1511	{
1512	minimal = tre_stack_pop_int(stack);
1513	enter_tag = tre_stack_pop_int(stack);
1514	if (minimal)
1515	minimal_tag = enter_tag;
1516	}
1517
1518	if (!first_pass)
1519	{
1520	if (minimal)
1521	direction = TRE_TAG_MINIMIZE;
1522	else
1523	direction = TRE_TAG_MAXIMIZE;
1524	}
1525	break;
1526	}
1527
1528	case ADDTAGS_AFTER_CAT_LEFT:
1529	{
1530	int new_tag = tre_stack_pop_int(stack);
1531	next_tag = tre_stack_pop_int(stack);
1532	if (new_tag >= 0)
1533	{
1534	tag = new_tag;
1535	}
1536	break;
1537	}
1538
1539	case ADDTAGS_AFTER_CAT_RIGHT:
1540	node = tre_stack_pop_voidptr(stack);
1541	if (first_pass)
1542	node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1543	+ ((tre_catenation_t *)node->obj)->right->num_tags;
1544	break;
1545
1546	case ADDTAGS_AFTER_UNION_LEFT:
1547	/* Lift the bottom of the `regset' array so that when processing
1548	the right operand the items currently in the array are
1549	invisible. The original bottom was saved at ADDTAGS_UNION and
1550	will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1551	while (*regset >= 0)
1552	regset++;
1553	break;
1554
1555	case ADDTAGS_AFTER_UNION_RIGHT:
1556	{
1557	int added_tags, tag_left, tag_right;
1558	tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1559	tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1560	node = tre_stack_pop_voidptr(stack);
1561	added_tags = tre_stack_pop_int(stack);
1562	if (first_pass)
1563	{
1564	node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1565	+ ((tre_union_t *)node->obj)->right->num_tags + added_tags
1566	+ ((node->num_submatches > 0) ? 2 : 0);
1567	}
1568	regset = tre_stack_pop_voidptr(stack);
1569	tag_left = tre_stack_pop_int(stack);
1570	tag_right = tre_stack_pop_int(stack);
1571
1572	/* Add tags after both children, the left child gets a smaller
1573	tag than the right child. This guarantees that we prefer
1574	the left child over the right child. */
1575	/* XXX - This is not always necessary (if the children have
1576	tags which must be seen for every match of that child). */
1577	/* XXX - Check if this is the only place where tre_add_tag_right
1578	is used. If so, use tre_add_tag_left (putting the tag before
1579	the child as opposed after the child) and throw away
1580	tre_add_tag_right. */
1581	if (node->num_submatches > 0)
1582	{
1583	if (!first_pass)
1584	{
1585	status = tre_add_tag_right(mem, left, tag_left);
	Value stored to 'status' is never read
1586	tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1587	status = tre_add_tag_right(mem, right, tag_right);
1588	tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1589	}
1590	num_tags += 2;
1591	}
1592	direction = TRE_TAG_MAXIMIZE;
1593	break;
1594	}
1595
1596	default:
1597	assert(0)(void)0;
1598	break;
1599
1600	} /* end switch(symbol) */
1601	} /* end while(tre_stack_num_objects(stack) > bottom) */
1602
1603	if (!first_pass)
1604	tre_purge_regset(regset, tnfa, tag);
1605
1606	if (!first_pass && minimal_tag >= 0)
1607	{
1608	int i;
1609	for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1610	tnfa->minimal_tags[i] = tag;
1611	tnfa->minimal_tags[i + 1] = minimal_tag;
1612	tnfa->minimal_tags[i + 2] = -1;
1613	minimal_tag = -1;
1614	num_minimals++;
1615	}
1616
1617	assert(tree->num_tags == num_tags)(void)0;
1618	tnfa->end_tag = num_tags;
1619	tnfa->num_tags = num_tags;
1620	tnfa->num_minimals = num_minimals;
1621	xfreefree(orig_regset);
1622	xfreefree(parents);
1623	xfreefree(saved_states);
1624	return status;
1625	}
1626
1627
1628
1629	/*
1630	AST to TNFA compilation routines.
1631	*/
1632
1633	typedef enum {
1634	COPY_RECURSE,
1635	COPY_SET_RESULT_PTR
1636	} tre_copyast_symbol_t;
1637
1638	/* Flags for tre_copy_ast(). */
1639	#define COPY_REMOVE_TAGS1 1
1640	#define COPY_MAXIMIZE_FIRST_TAG2 2
1641
1642	static reg_errcode_t
1643	tre_copy_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,
1644	int flags, int pos_add, tre_tag_direction_t tag_directions,
1645	tre_ast_node_t *copy, int max_pos)
1646	{
1647	reg_errcode_t status = REG_OK0;
1648	int bottom = tre_stack_num_objects(stack);
1649	int num_copied = 0;
1650	int first_tag = 1;
1651	tre_ast_node_t **result = copy;
1652	tre_copyast_symbol_t symbol;
1653
1654	STACK_PUSH(stack, voidptr, ast)do { status = tre_stack_push_voidptr(stack, ast); } while ( 0 );
1655	STACK_PUSH(stack, int, COPY_RECURSE)do { status = tre_stack_push_int(stack, COPY_RECURSE); } while ( 0);
1656
1657	while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1658	{
1659	tre_ast_node_t *node;
1660	if (status != REG_OK0)
1661	break;
1662
1663	symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1664	switch (symbol)
1665	{
1666	case COPY_SET_RESULT_PTR:
1667	result = tre_stack_pop_voidptr(stack);
1668	break;
1669	case COPY_RECURSE:
1670	node = tre_stack_pop_voidptr(stack);
1671	switch (node->type)
1672	{
1673	case LITERAL:
1674	{
1675	tre_literal_t *lit = node->obj;
1676	int pos = lit->position;
1677	int min = lit->code_min;
1678	int max = lit->code_max;
1679	if (!IS_SPECIAL(lit)((lit)->code_min < 0) \|\| IS_BACKREF(lit)((lit)->code_min == -4))
1680	{
1681	/* XXX - e.g. [ab] has only one position but two
1682	nodes, so we are creating holes in the state space
1683	here. Not fatal, just wastes memory. */
1684	pos += *pos_add;
1685	num_copied++;
1686	}
1687	else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_REMOVE_TAGS1))
1688	{
1689	/* Change this tag to empty. */
1690	min = EMPTY-1;
1691	max = pos = -1;
1692	}
1693	else if (IS_TAG(lit)((lit)->code_min == -3) && (flags & COPY_MAXIMIZE_FIRST_TAG2)
1694	&& first_tag)
1695	{
1696	/* Maximize the first tag. */
1697	tag_directions[max] = TRE_TAG_MAXIMIZE;
1698	first_tag = 0;
1699	}
1700	*result = tre_ast_new_literal(mem, min, max, pos);
1701	if (result == NULL((void)0))
1702	status = REG_ESPACE12;
1703	else {
1704	tre_literal_t p = (result)->obj;
1705	p->class = lit->class;
1706	p->neg_classes = lit->neg_classes;
1707	}
1708
1709	if (pos > *max_pos)
1710	*max_pos = pos;
1711	break;
1712	}
1713	case UNION:
1714	{
1715	tre_union_t *uni = node->obj;
1716	tre_union_t *tmp;
1717	*result = tre_ast_new_union(mem, uni->left, uni->right);
1718	if (result == NULL((void)0))
1719	{
1720	status = REG_ESPACE12;
1721	break;
1722	}
1723	tmp = (*result)->obj;
1724	result = &tmp->left;
1725	STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if ( status != 0) break; };
1726	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1727	STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right); if (status != 0) break; };
1728	STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if (status != 0) break; };
1729	STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status != 0) break; };
1730	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1731	break;
1732	}
1733	case CATENATION:
1734	{
1735	tre_catenation_t *cat = node->obj;
1736	tre_catenation_t *tmp;
1737	*result = tre_ast_new_catenation(mem, cat->left, cat->right);
1738	if (result == NULL((void)0))
1739	{
1740	status = REG_ESPACE12;
1741	break;
1742	}
1743	tmp = (*result)->obj;
1744	tmp->left = NULL((void*)0);
1745	tmp->right = NULL((void*)0);
1746	result = &tmp->left;
1747
1748	STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if ( status != 0) break; };
1749	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1750	STACK_PUSHX(stack, voidptr, &tmp->right){ status = tre_stack_push_voidptr(stack, &tmp->right); if (status != 0) break; };
1751	STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR){ status = tre_stack_push_int(stack, COPY_SET_RESULT_PTR); if (status != 0) break; };
1752	STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status != 0) break; };
1753	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1754	break;
1755	}
1756	case ITERATION:
1757	{
1758	tre_iteration_t *iter = node->obj;
1759	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
1760	STACK_PUSHX(stack, int, COPY_RECURSE){ status = tre_stack_push_int(stack, COPY_RECURSE); if (status != 0) break; };
1761	*result = tre_ast_new_iter(mem, iter->arg, iter->min,
1762	iter->max, iter->minimal);
1763	if (result == NULL((void)0))
1764	{
1765	status = REG_ESPACE12;
1766	break;
1767	}
1768	iter = (*result)->obj;
1769	result = &iter->arg;
1770	break;
1771	}
1772	default:
1773	assert(0)(void)0;
1774	break;
1775	}
1776	break;
1777	}
1778	}
1779	*pos_add += num_copied;
1780	return status;
1781	}
1782
1783	typedef enum {
1784	EXPAND_RECURSE,
1785	EXPAND_AFTER_ITER
1786	} tre_expand_ast_symbol_t;
1787
1788	/* Expands each iteration node that has a finite nonzero minimum or maximum
1789	iteration count to a catenated sequence of copies of the node. */
1790	static reg_errcode_t
1791	tre_expand_ast(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t ast,
1792	int position, tre_tag_direction_t tag_directions)
1793	{
1794	reg_errcode_t status = REG_OK0;
1795	int bottom = tre_stack_num_objects(stack);
1796	int pos_add = 0;
1797	int pos_add_total = 0;
1798	int max_pos = 0;
1799	int iter_depth = 0;
1800
1801	STACK_PUSHR(stack, voidptr, ast){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ast); if (_status != 0) return _status; };
1802	STACK_PUSHR(stack, int, EXPAND_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, EXPAND_RECURSE ); if (_status != 0) return _status; };
1803	while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
1804	{
1805	tre_ast_node_t *node;
1806	tre_expand_ast_symbol_t symbol;
1807
1808	if (status != REG_OK0)
1809	break;
1810
1811	symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1812	node = tre_stack_pop_voidptr(stack);
1813	switch (symbol)
1814	{
1815	case EXPAND_RECURSE:
1816	switch (node->type)
1817	{
1818	case LITERAL:
1819	{
1820	tre_literal_t *lit= node->obj;
1821	if (!IS_SPECIAL(lit)((lit)->code_min < 0) \|\| IS_BACKREF(lit)((lit)->code_min == -4))
1822	{
1823	lit->position += pos_add;
1824	if (lit->position > max_pos)
1825	max_pos = lit->position;
1826	}
1827	break;
1828	}
1829	case UNION:
1830	{
1831	tre_union_t *uni = node->obj;
1832	STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if ( status != 0) break; };
1833	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1834	STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status != 0) break; };
1835	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1836	break;
1837	}
1838	case CATENATION:
1839	{
1840	tre_catenation_t *cat = node->obj;
1841	STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if ( status != 0) break; };
1842	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1843	STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status != 0) break; };
1844	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1845	break;
1846	}
1847	case ITERATION:
1848	{
1849	tre_iteration_t *iter = node->obj;
1850	STACK_PUSHX(stack, int, pos_add){ status = tre_stack_push_int(stack, pos_add); if (status != 0 ) break; };
1851	STACK_PUSHX(stack, voidptr, node){ status = tre_stack_push_voidptr(stack, node); if (status != 0) break; };
1852	STACK_PUSHX(stack, int, EXPAND_AFTER_ITER){ status = tre_stack_push_int(stack, EXPAND_AFTER_ITER); if ( status != 0) break; };
1853	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
1854	STACK_PUSHX(stack, int, EXPAND_RECURSE){ status = tre_stack_push_int(stack, EXPAND_RECURSE); if (status != 0) break; };
1855	/* If we are going to expand this node at EXPAND_AFTER_ITER
1856	then don't increase the `pos' fields of the nodes now, it
1857	will get done when expanding. */
1858	if (iter->min > 1 \|\| iter->max > 1)
1859	pos_add = 0;
1860	iter_depth++;
1861	break;
1862	}
1863	default:
1864	assert(0)(void)0;
1865	break;
1866	}
1867	break;
1868	case EXPAND_AFTER_ITER:
1869	{
1870	tre_iteration_t *iter = node->obj;
1871	int pos_add_last;
1872	pos_add = tre_stack_pop_int(stack);
1873	pos_add_last = pos_add;
1874	if (iter->min > 1 \|\| iter->max > 1)
1875	{
1876	tre_ast_node_t seq1 = NULL((void)0), seq2 = NULL((void)0);
1877	int j;
1878	int pos_add_save = pos_add;
1879
1880	/* Create a catenated sequence of copies of the node. */
1881	for (j = 0; j < iter->min; j++)
1882	{
1883	tre_ast_node_t *copy;
1884	/* Remove tags from all but the last copy. */
1885	int flags = ((j + 1 < iter->min)
1886	? COPY_REMOVE_TAGS1
1887	: COPY_MAXIMIZE_FIRST_TAG2);
1888	pos_add_save = pos_add;
1889	status = tre_copy_ast(mem, stack, iter->arg, flags,
1890	&pos_add, tag_directions, &copy,
1891	&max_pos);
1892	if (status != REG_OK0)
1893	return status;
1894	if (seq1 != NULL((void*)0))
1895	seq1 = tre_ast_new_catenation(mem, seq1, copy);
1896	else
1897	seq1 = copy;
1898	if (seq1 == NULL((void*)0))
1899	return REG_ESPACE12;
1900	}
1901
1902	if (iter->max == -1)
1903	{
1904	/* No upper limit. */
1905	pos_add_save = pos_add;
1906	status = tre_copy_ast(mem, stack, iter->arg, 0,
1907	&pos_add, NULL((void*)0), &seq2, &max_pos);
1908	if (status != REG_OK0)
1909	return status;
1910	seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1911	if (seq2 == NULL((void*)0))
1912	return REG_ESPACE12;
1913	}
1914	else
1915	{
1916	for (j = iter->min; j < iter->max; j++)
1917	{
1918	tre_ast_node_t tmp, copy;
1919	pos_add_save = pos_add;
1920	status = tre_copy_ast(mem, stack, iter->arg, 0,
1921	&pos_add, NULL((void*)0), &copy, &max_pos);
1922	if (status != REG_OK0)
1923	return status;
1924	if (seq2 != NULL((void*)0))
1925	seq2 = tre_ast_new_catenation(mem, copy, seq2);
1926	else
1927	seq2 = copy;
1928	if (seq2 == NULL((void*)0))
1929	return REG_ESPACE12;
1930	tmp = tre_ast_new_literal(mem, EMPTY-1, -1, -1);
1931	if (tmp == NULL((void*)0))
1932	return REG_ESPACE12;
1933	seq2 = tre_ast_new_union(mem, tmp, seq2);
1934	if (seq2 == NULL((void*)0))
1935	return REG_ESPACE12;
1936	}
1937	}
1938
1939	pos_add = pos_add_save;
1940	if (seq1 == NULL((void*)0))
1941	seq1 = seq2;
1942	else if (seq2 != NULL((void*)0))
1943	seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1944	if (seq1 == NULL((void*)0))
1945	return REG_ESPACE12;
1946	node->obj = seq1->obj;
1947	node->type = seq1->type;
1948	}
1949
1950	iter_depth--;
1951	pos_add_total += pos_add - pos_add_last;
1952	if (iter_depth == 0)
1953	pos_add = pos_add_total;
1954
1955	break;
1956	}
1957	default:
1958	assert(0)(void)0;
1959	break;
1960	}
1961	}
1962
1963	*position += pos_add_total;
1964
1965	/* `max_pos' should never be larger than `*position' if the above
1966	code works, but just an extra safeguard let's make sure
1967	`*position' is set large enough so enough memory will be
1968	allocated for the transition table. */
1969	if (max_pos > *position)
1970	*position = max_pos;
1971
1972	return status;
1973	}
1974
1975	static tre_pos_and_tags_t *
1976	tre_set_empty(tre_mem_t mem)
1977	{
1978	tre_pos_and_tags_t *new_set;
1979
1980	new_set = tre_mem_calloc(mem, sizeof(new_set))__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof(*new_set));
1981	if (new_set == NULL((void*)0))
1982	return NULL((void*)0);
1983
1984	new_set[0].position = -1;
1985	new_set[0].code_min = -1;
1986	new_set[0].code_max = -1;
1987
1988	return new_set;
1989	}
1990
1991	static tre_pos_and_tags_t *
1992	tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1993	tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1994	{
1995	tre_pos_and_tags_t *new_set;
1996
1997	new_set = tre_mem_calloc(mem, sizeof(new_set) 2)__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof(new_set) * 2);
1998	if (new_set == NULL((void*)0))
1999	return NULL((void*)0);
2000
2001	new_set[0].position = position;
2002	new_set[0].code_min = code_min;
2003	new_set[0].code_max = code_max;
2004	new_set[0].class = class;
2005	new_set[0].neg_classes = neg_classes;
2006	new_set[0].backref = backref;
2007	new_set[1].position = -1;
2008	new_set[1].code_min = -1;
2009	new_set[1].code_max = -1;
2010
2011	return new_set;
2012	}
2013
2014	static tre_pos_and_tags_t *
2015	tre_set_union(tre_mem_t mem, tre_pos_and_tags_t set1, tre_pos_and_tags_t set2,
2016	int *tags, int assertions)
2017	{
2018	int s1, s2, i, j;
2019	tre_pos_and_tags_t *new_set;
2020	int *new_tags;
2021	int num_tags;
2022
2023	for (num_tags = 0; tags != NULL((void*)0) && tags[num_tags] >= 0; num_tags++);
2024	for (s1 = 0; set1[s1].position >= 0; s1++);
2025	for (s2 = 0; set2[s2].position >= 0; s2++);
2026	new_set = tre_mem_calloc(mem, sizeof(new_set) (s1 + s2 + 1))__tre_mem_alloc_impl(mem, 0, ((void)0), 1, sizeof(new_set) * (s1 + s2 + 1));
2027	if (!new_set )
2028	return NULL((void*)0);
2029
2030	for (s1 = 0; set1[s1].position >= 0; s1++)
2031	{
2032	new_set[s1].position = set1[s1].position;
2033	new_set[s1].code_min = set1[s1].code_min;
2034	new_set[s1].code_max = set1[s1].code_max;
2035	new_set[s1].assertions = set1[s1].assertions \| assertions;
2036	new_set[s1].class = set1[s1].class;
2037	new_set[s1].neg_classes = set1[s1].neg_classes;
2038	new_set[s1].backref = set1[s1].backref;
2039	if (set1[s1].tags == NULL((void)0) && tags == NULL((void)0))
2040	new_set[s1].tags = NULL((void*)0);
2041	else
2042	{
2043	for (i = 0; set1[s1].tags != NULL((void*)0) && set1[s1].tags[i] >= 0; i++);
2044	new_tags = tre_mem_alloc(mem, (sizeof(new_tags)__tre_mem_alloc_impl(mem, 0, ((void)0), 0, (sizeof(new_tags ) (i + num_tags + 1)))
2045	* (i + num_tags + 1)))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, (sizeof(new_tags ) * (i + num_tags + 1)));
2046	if (new_tags == NULL((void*)0))
2047	return NULL((void*)0);
2048	for (j = 0; j < i; j++)
2049	new_tags[j] = set1[s1].tags[j];
2050	for (i = 0; i < num_tags; i++)
2051	new_tags[j + i] = tags[i];
2052	new_tags[j + i] = -1;
2053	new_set[s1].tags = new_tags;
2054	}
2055	}
2056
2057	for (s2 = 0; set2[s2].position >= 0; s2++)
2058	{
2059	new_set[s1 + s2].position = set2[s2].position;
2060	new_set[s1 + s2].code_min = set2[s2].code_min;
2061	new_set[s1 + s2].code_max = set2[s2].code_max;
2062	/* XXX - why not \| assertions here as well? */
2063	new_set[s1 + s2].assertions = set2[s2].assertions;
2064	new_set[s1 + s2].class = set2[s2].class;
2065	new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2066	new_set[s1 + s2].backref = set2[s2].backref;
2067	if (set2[s2].tags == NULL((void*)0))
2068	new_set[s1 + s2].tags = NULL((void*)0);
2069	else
2070	{
2071	for (i = 0; set2[s2].tags[i] >= 0; i++);
2072	new_tags = tre_mem_alloc(mem, sizeof(new_tags) (i + 1))__tre_mem_alloc_impl(mem, 0, ((void)0), 0, sizeof(new_tags) * (i + 1));
2073	if (new_tags == NULL((void*)0))
2074	return NULL((void*)0);
2075	for (j = 0; j < i; j++)
2076	new_tags[j] = set2[s2].tags[j];
2077	new_tags[j] = -1;
2078	new_set[s1 + s2].tags = new_tags;
2079	}
2080	}
2081	new_set[s1 + s2].position = -1;
2082	return new_set;
2083	}
2084
2085	/* Finds the empty path through `node' which is the one that should be
2086	taken according to POSIX.2 rules, and adds the tags on that path to
2087	`tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2088	set to the number of tags seen on the path. */
2089	static reg_errcode_t
2090	tre_match_empty(tre_stack_t stack, tre_ast_node_t node, int *tags,
2091	int assertions, int num_tags_seen)
2092	{
2093	tre_literal_t *lit;
2094	tre_union_t *uni;
2095	tre_catenation_t *cat;
2096	tre_iteration_t *iter;
2097	int i;
2098	int bottom = tre_stack_num_objects(stack);
2099	reg_errcode_t status = REG_OK0;
2100	if (num_tags_seen)
2101	*num_tags_seen = 0;
2102
2103	status = tre_stack_push_voidptr(stack, node);
2104
2105	/* Walk through the tree recursively. */
2106	while (status == REG_OK0 && tre_stack_num_objects(stack) > bottom)
2107	{
2108	node = tre_stack_pop_voidptr(stack);
2109
2110	switch (node->type)
2111	{
2112	case LITERAL:
2113	lit = (tre_literal_t *)node->obj;
2114	switch (lit->code_min)
2115	{
2116	case TAG-3:
2117	if (lit->code_max >= 0)
2118	{
2119	if (tags != NULL((void*)0))
2120	{
2121	/* Add the tag to `tags'. */
2122	for (i = 0; tags[i] >= 0; i++)
2123	if (tags[i] == lit->code_max)
2124	break;
2125	if (tags[i] < 0)
2126	{
2127	tags[i] = lit->code_max;
2128	tags[i + 1] = -1;
2129	}
2130	}
2131	if (num_tags_seen)
2132	(*num_tags_seen)++;
2133	}
2134	break;
2135	case ASSERTION-2:
2136	assert(lit->code_max >= 1(void)0
2137	\|\| lit->code_max <= ASSERT_LAST)(void)0;
2138	if (assertions != NULL((void*)0))
2139	*assertions \|= lit->code_max;
2140	break;
2141	case EMPTY-1:
2142	break;
2143	default:
2144	assert(0)(void)0;
2145	break;
2146	}
2147	break;
2148
2149	case UNION:
2150	/* Subexpressions starting earlier take priority over ones
2151	starting later, so we prefer the left subexpression over the
2152	right subexpression. */
2153	uni = (tre_union_t *)node->obj;
2154	if (uni->left->nullable)
2155	STACK_PUSHX(stack, voidptr, uni->left){ status = tre_stack_push_voidptr(stack, uni->left); if (status != 0) break; }
2156	else if (uni->right->nullable)
2157	STACK_PUSHX(stack, voidptr, uni->right){ status = tre_stack_push_voidptr(stack, uni->right); if ( status != 0) break; }
2158	else
2159	assert(0)(void)0;
2160	break;
2161
2162	case CATENATION:
2163	/* The path must go through both children. */
2164	cat = (tre_catenation_t *)node->obj;
2165	assert(cat->left->nullable)(void)0;
2166	assert(cat->right->nullable)(void)0;
2167	STACK_PUSHX(stack, voidptr, cat->left){ status = tre_stack_push_voidptr(stack, cat->left); if (status != 0) break; };
2168	STACK_PUSHX(stack, voidptr, cat->right){ status = tre_stack_push_voidptr(stack, cat->right); if ( status != 0) break; };
2169	break;
2170
2171	case ITERATION:
2172	/* A match with an empty string is preferred over no match at
2173	all, so we go through the argument if possible. */
2174	iter = (tre_iteration_t *)node->obj;
2175	if (iter->arg->nullable)
2176	STACK_PUSHX(stack, voidptr, iter->arg){ status = tre_stack_push_voidptr(stack, iter->arg); if (status != 0) break; };
2177	break;
2178
2179	default:
2180	assert(0)(void)0;
2181	break;
2182	}
2183	}
2184
2185	return status;
2186	}
2187
2188
2189	typedef enum {
2190	NFL_RECURSE,
2191	NFL_POST_UNION,
2192	NFL_POST_CATENATION,
2193	NFL_POST_ITERATION
2194	} tre_nfl_stack_symbol_t;
2195
2196
2197	/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2198	the nodes of the AST `tree'. */
2199	static reg_errcode_t
2200	tre_compute_nfl(tre_mem_t mem, tre_stack_t stack, tre_ast_node_t tree)
2201	{
2202	int bottom = tre_stack_num_objects(stack);
2203
2204	STACK_PUSHR(stack, voidptr, tree){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , tree); if (_status != 0) return _status; };
2205	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2206
2207	while (tre_stack_num_objects(stack) > bottom)
2208	{
2209	tre_nfl_stack_symbol_t symbol;
2210	tre_ast_node_t *node;
2211
2212	symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2213	node = tre_stack_pop_voidptr(stack);
2214	switch (symbol)
2215	{
2216	case NFL_RECURSE:
2217	switch (node->type)
2218	{
2219	case LITERAL:
2220	{
2221	tre_literal_t lit = (tre_literal_t )node->obj;
2222	if (IS_BACKREF(lit)((lit)->code_min == -4))
2223	{
2224	/* Back references: nullable = false, firstpos = {i},
2225	lastpos = {i}. */
2226	node->nullable = 0;
2227	node->firstpos = tre_set_one(mem, lit->position, 0,
2228	TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0), -1);
2229	if (!node->firstpos)
2230	return REG_ESPACE12;
2231	node->lastpos = tre_set_one(mem, lit->position, 0,
2232	TRE_CHAR_MAX0x10ffff, 0, NULL((void*)0),
2233	(int)lit->code_max);
2234	if (!node->lastpos)
2235	return REG_ESPACE12;
2236	}
2237	else if (lit->code_min < 0)
2238	{
2239	/* Tags, empty strings, params, and zero width assertions:
2240	nullable = true, firstpos = {}, and lastpos = {}. */
2241	node->nullable = 1;
2242	node->firstpos = tre_set_empty(mem);
2243	if (!node->firstpos)
2244	return REG_ESPACE12;
2245	node->lastpos = tre_set_empty(mem);
2246	if (!node->lastpos)
2247	return REG_ESPACE12;
2248	}
2249	else
2250	{
2251	/* Literal at position i: nullable = false, firstpos = {i},
2252	lastpos = {i}. */
2253	node->nullable = 0;
2254	node->firstpos =
2255	tre_set_one(mem, lit->position, (int)lit->code_min,
2256	(int)lit->code_max, 0, NULL((void*)0), -1);
2257	if (!node->firstpos)
2258	return REG_ESPACE12;
2259	node->lastpos = tre_set_one(mem, lit->position,
2260	(int)lit->code_min,
2261	(int)lit->code_max,
2262	lit->class, lit->neg_classes,
2263	-1);
2264	if (!node->lastpos)
2265	return REG_ESPACE12;
2266	}
2267	break;
2268	}
2269
2270	case UNION:
2271	/* Compute the attributes for the two subtrees, and after that
2272	for this node. */
2273	STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , node); if (_status != 0) return _status; };
2274	STACK_PUSHR(stack, int, NFL_POST_UNION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_UNION ); if (_status != 0) return _status; };
2275	STACK_PUSHR(stack, voidptr, ((tre_union_t )node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_union_t )node->obj)->right); if (_status != 0) return _status; };
2276	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2277	STACK_PUSHR(stack, voidptr, ((tre_union_t )node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_union_t )node->obj)->left); if (_status != 0) return _status; };
2278	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2279	break;
2280
2281	case CATENATION:
2282	/* Compute the attributes for the two subtrees, and after that
2283	for this node. */
2284	STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , node); if (_status != 0) return _status; };
2285	STACK_PUSHR(stack, int, NFL_POST_CATENATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_CATENATION ); if (_status != 0) return _status; };
2286	STACK_PUSHR(stack, voidptr, ((tre_catenation_t )node->obj)->right){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_catenation_t )node->obj)->right); if (_status != 0) return _status; };
2287	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2288	STACK_PUSHR(stack, voidptr, ((tre_catenation_t )node->obj)->left){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_catenation_t )node->obj)->left); if (_status != 0) return _status; };
2289	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2290	break;
2291
2292	case ITERATION:
2293	/* Compute the attributes for the subtree, and after that for
2294	this node. */
2295	STACK_PUSHR(stack, voidptr, node){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , node); if (_status != 0) return _status; };
2296	STACK_PUSHR(stack, int, NFL_POST_ITERATION){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_POST_ITERATION ); if (_status != 0) return _status; };
2297	STACK_PUSHR(stack, voidptr, ((tre_iteration_t )node->obj)->arg){ reg_errcode_t _status; _status = tre_stack_push_voidptr(stack , ((tre_iteration_t )node->obj)->arg); if (_status != 0 ) return _status; };
2298	STACK_PUSHR(stack, int, NFL_RECURSE){ reg_errcode_t _status; _status = tre_stack_push_int(stack, NFL_RECURSE ); if (_status != 0) return _status; };
2299	break;
2300	}
2301	break; /* end case: NFL_RECURSE */
2302
2303	case NFL_POST_UNION:
2304	{
2305	tre_union_t uni = (tre_union_t )node->obj;
2306	node->nullable = uni->left->nullable \|\| uni->right->nullable;
2307	node->firstpos = tre_set_union(mem, uni->left->firstpos,
2308	uni->right->firstpos, NULL((void*)0), 0);
2309	if (!node->firstpos)
2310	return REG_ESPACE12;
2311	node->lastpos = tre_set_union(mem, uni->left->lastpos,
2312	uni->right->lastpos, NULL((void*)0), 0);
2313	if (!node->lastpos)
2314	return REG_ESPACE12;
2315	break;
2316	}
2317
2318	case NFL_POST_ITERATION:
2319	{
2320	tre_iteration_t iter = (tre_iteration_t )node->obj;
2321
2322	if (iter->min == 0 \|\| iter->arg->nullable)
2323	node->nullable = 1;
2324	else
2325	node->nullable = 0;
2326	node->firstpos = iter->arg->firstpos;
2327	node->lastpos = iter->arg->lastpos;
2328	break;
2329	}
2330
2331	case NFL_POST_CATENATION:
2332	{
2333	int num_tags, *tags, assertions;
2334	reg_errcode_t status;
2335	tre_catenation_t *cat = node->obj;
2336	node->nullable = cat->left->nullable && cat->right->nullable;
2337
2338	/* Compute firstpos. */
2339	if (cat->left->nullable)
2340	{
2341	/* The left side matches the empty string. Make a first pass
2342	with tre_match_empty() to get the number of tags and
2343	parameters. */
2344	status = tre_match_empty(stack, cat->left,
2345	NULL((void)0), NULL((void)0), &num_tags);
2346	if (status != REG_OK0)
2347	return status;
2348	/* Allocate arrays for the tags and parameters. */
2349	tags = xmallocmalloc(sizeof(tags) (num_tags + 1));
2350	if (!tags)
2351	return REG_ESPACE12;
2352	tags[0] = -1;
2353	assertions = 0;
2354	/* Second pass with tre_mach_empty() to get the list of
2355	tags and parameters. */
2356	status = tre_match_empty(stack, cat->left, tags,
2357	&assertions, NULL((void*)0));
2358	if (status != REG_OK0)
2359	{
2360	xfreefree(tags);
2361	return status;
2362	}
2363	node->firstpos =
2364	tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2365	tags, assertions);
2366	xfreefree(tags);
2367	if (!node->firstpos)
2368	return REG_ESPACE12;
2369	}
2370	else
2371	{
2372	node->firstpos = cat->left->firstpos;
2373	}
2374
2375	/* Compute lastpos. */
2376	if (cat->right->nullable)
2377	{
2378	/* The right side matches the empty string. Make a first pass
2379	with tre_match_empty() to get the number of tags and
2380	parameters. */
2381	status = tre_match_empty(stack, cat->right,
2382	NULL((void)0), NULL((void)0), &num_tags);
2383	if (status != REG_OK0)
2384	return status;
2385	/* Allocate arrays for the tags and parameters. */
2386	tags = xmallocmalloc(sizeof(int) * (num_tags + 1));
2387	if (!tags)
2388	return REG_ESPACE12;
2389	tags[0] = -1;
2390	assertions = 0;
2391	/* Second pass with tre_mach_empty() to get the list of
2392	tags and parameters. */
2393	status = tre_match_empty(stack, cat->right, tags,
2394	&assertions, NULL((void*)0));
2395	if (status != REG_OK0)
2396	{
2397	xfreefree(tags);
2398	return status;
2399	}
2400	node->lastpos =
2401	tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2402	tags, assertions);
2403	xfreefree(tags);
2404	if (!node->lastpos)
2405	return REG_ESPACE12;
2406	}
2407	else
2408	{
2409	node->lastpos = cat->right->lastpos;
2410	}
2411	break;
2412	}
2413
2414	default:
2415	assert(0)(void)0;
2416	break;
2417	}
2418	}
2419
2420	return REG_OK0;
2421	}
2422
2423
2424	/* Adds a transition from each position in `p1' to each position in `p2'. */
2425	static reg_errcode_t
2426	tre_make_trans(tre_pos_and_tags_t p1, tre_pos_and_tags_t p2,
2427	tre_tnfa_transition_t *transitions,
2428	int counts, int offs)
2429	{
2430	tre_pos_and_tags_t *orig_p2 = p2;
2431	tre_tnfa_transition_t *trans;
2432	int i, j, k, l, dup, prev_p2_pos;
2433
2434	if (transitions != NULL((void*)0))
2435	while (p1->position >= 0)
2436	{
2437	p2 = orig_p2;
2438	prev_p2_pos = -1;
2439	while (p2->position >= 0)
2440	{
2441	/* Optimization: if this position was already handled, skip it. */
2442	if (p2->position == prev_p2_pos)
2443	{
2444	p2++;
2445	continue;
2446	}
2447	prev_p2_pos = p2->position;
2448	/* Set `trans' to point to the next unused transition from
2449	position `p1->position'. */
2450	trans = transitions + offs[p1->position];
2451	while (trans->state != NULL((void*)0))
2452	{
2453	#if 0
2454	/* If we find a previous transition from `p1->position' to
2455	`p2->position', it is overwritten. This can happen only
2456	if there are nested loops in the regexp, like in "((a))".
2457	In POSIX.2 repetition using the outer loop is always
2458	preferred over using the inner loop. Therefore the
2459	transition for the inner loop is useless and can be thrown
2460	away. */
2461	/* XXX - The same position is used for all nodes in a bracket
2462	expression, so this optimization cannot be used (it will
2463	break bracket expressions) unless I figure out a way to
2464	detect it here. */
2465	if (trans->state_id == p2->position)
2466	{
2467	break;
2468	}
2469	#endif
2470	trans++;
2471	}
2472
2473	if (trans->state == NULL((void*)0))
2474	(trans + 1)->state = NULL((void*)0);
2475	/* Use the character ranges, assertions, etc. from `p1' for
2476	the transition from `p1' to `p2'. */
2477	trans->code_min = p1->code_min;
2478	trans->code_max = p1->code_max;
2479	trans->state = transitions + offs[p2->position];
2480	trans->state_id = p2->position;
2481	trans->assertions = p1->assertions \| p2->assertions
2482	\| (p1->class ? ASSERT_CHAR_CLASS4 : 0)
2483	\| (p1->neg_classes != NULL((void*)0) ? ASSERT_CHAR_CLASS_NEG8 : 0);
2484	if (p1->backref >= 0)
2485	{
2486	assert((trans->assertions & ASSERT_CHAR_CLASS) == 0)(void)0;
2487	assert(p2->backref < 0)(void)0;
2488	trans->u.backref = p1->backref;
2489	trans->assertions \|= ASSERT_BACKREF256;
2490	}
2491	else
2492	trans->u.class = p1->class;
2493	if (p1->neg_classes != NULL((void*)0))
2494	{
2495	for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2496	trans->neg_classes =
2497	xmallocmalloc(sizeof(trans->neg_classes) (i + 1));
2498	if (trans->neg_classes == NULL((void*)0))
2499	return REG_ESPACE12;
2500	for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2501	trans->neg_classes[i] = p1->neg_classes[i];
2502	trans->neg_classes[i] = (tre_ctype_t)0;
2503	}
2504	else
2505	trans->neg_classes = NULL((void*)0);
2506
2507	/* Find out how many tags this transition has. */
2508	i = 0;
2509	if (p1->tags != NULL((void*)0))
2510	while(p1->tags[i] >= 0)
2511	i++;
2512	j = 0;
2513	if (p2->tags != NULL((void*)0))
2514	while(p2->tags[j] >= 0)
2515	j++;
2516
2517	/* If we are overwriting a transition, free the old tag array. */
2518	if (trans->tags != NULL((void*)0))
2519	xfreefree(trans->tags);
2520	trans->tags = NULL((void*)0);
2521
2522	/* If there were any tags, allocate an array and fill it. */
2523	if (i + j > 0)
2524	{
2525	trans->tags = xmallocmalloc(sizeof(trans->tags) (i + j + 1));
2526	if (!trans->tags)
2527	return REG_ESPACE12;
2528	i = 0;
2529	if (p1->tags != NULL((void*)0))
2530	while(p1->tags[i] >= 0)
2531	{
2532	trans->tags[i] = p1->tags[i];
2533	i++;
2534	}
2535	l = i;
2536	j = 0;
2537	if (p2->tags != NULL((void*)0))
2538	while (p2->tags[j] >= 0)
2539	{
2540	/* Don't add duplicates. */
2541	dup = 0;
2542	for (k = 0; k < i; k++)
2543	if (trans->tags[k] == p2->tags[j])
2544	{
2545	dup = 1;
2546	break;
2547	}
2548	if (!dup)
2549	trans->tags[l++] = p2->tags[j];
2550	j++;
2551	}
2552	trans->tags[l] = -1;
2553	}
2554
2555	p2++;
2556	}
2557	p1++;
2558	}
2559	else
2560	/* Compute a maximum limit for the number of transitions leaving
2561	from each state. */
2562	while (p1->position >= 0)
2563	{
2564	p2 = orig_p2;
2565	while (p2->position >= 0)
2566	{
2567	counts[p1->position]++;
2568	p2++;
2569	}
2570	p1++;
2571	}
2572	return REG_OK0;
2573	}
2574
2575	/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2576	labelled with one character range (there are no transitions on empty
2577	strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2578	the regexp. */
2579	static reg_errcode_t
2580	tre_ast_to_tnfa(tre_ast_node_t node, tre_tnfa_transition_t transitions,
2581	int counts, int offs)
2582	{
2583	tre_union_t *uni;
2584	tre_catenation_t *cat;
2585	tre_iteration_t *iter;
2586	reg_errcode_t errcode = REG_OK0;
2587
2588	/* XXX - recurse using a stack!. */
2589	switch (node->type)
2590	{
2591	case LITERAL:
2592	break;
2593	case UNION:
2594	uni = (tre_union_t *)node->obj;
2595	errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2596	if (errcode != REG_OK0)
2597	return errcode;
2598	errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2599	break;
2600
2601	case CATENATION:
2602	cat = (tre_catenation_t *)node->obj;
2603	/* Add a transition from each position in cat->left->lastpos
2604	to each position in cat->right->firstpos. */
2605	errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2606	transitions, counts, offs);
2607	if (errcode != REG_OK0)
2608	return errcode;
2609	errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2610	if (errcode != REG_OK0)
2611	return errcode;
2612	errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2613	break;
2614
2615	case ITERATION:
2616	iter = (tre_iteration_t *)node->obj;
2617	assert(iter->max == -1 \|\| iter->max == 1)(void)0;
2618
2619	if (iter->max == -1)
2620	{
2621	assert(iter->min == 0 \|\| iter->min == 1)(void)0;
2622	/* Add a transition from each last position in the iterated
2623	expression to each first position. */
2624	errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2625	transitions, counts, offs);
2626	if (errcode != REG_OK0)
2627	return errcode;
2628	}
2629	errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2630	break;
2631	}
2632	return errcode;
2633	}
2634
2635
2636	#define ERROR_EXIT(err)do { errcode = err; if ( 1) goto error_exit; } while ( 0) \
2637	do \
2638	{ \
2639	errcode = err; \
2640	if (/CONSTCOND/1) \
2641	goto error_exit; \
2642	} \
2643	while (/CONSTCOND/0)
2644
2645
2646	int
2647	regcomp(regex_t restrict preg, const char restrict regex, int cflags)
2648	{
2649	tre_stack_t *stack;
2650	tre_ast_node_t tree, tmp_ast_l, *tmp_ast_r;
2651	tre_pos_and_tags_t *p;
2652	int counts = NULL((void)0), offs = NULL((void)0);
2653	int i, add = 0;
2654	tre_tnfa_transition_t transitions, initial;
2655	tre_tnfa_t tnfa = NULL((void)0);
2656	tre_submatch_data_t *submatch_data;
2657	tre_tag_direction_t tag_directions = NULL((void)0);
2658	reg_errcode_t errcode;
2659	tre_mem_t mem;
2660
2661	/* Parse context. */
2662	tre_parse_ctx_t parse_ctx;
2663
2664	/* Allocate a stack used throughout the compilation process for various
2665	purposes. */
2666	stack = tre_stack_new(512, 10240, 128);
2667	if (!stack)
2668	return REG_ESPACE12;
2669	/* Allocate a fast memory allocator. */
2670	mem = tre_mem_new()__tre_mem_new_impl(0, ((void*)0));
2671	if (!mem)
2672	{
2673	tre_stack_destroy(stack);
2674	return REG_ESPACE12;
2675	}
2676
2677	/* Parse the regexp. */
2678	memset(&parse_ctx, 0, sizeof(parse_ctx));
2679	parse_ctx.mem = mem;
2680	parse_ctx.stack = stack;
2681	parse_ctx.re = regex;
2682	parse_ctx.cflags = cflags;
2683	parse_ctx.max_backref = -1;
2684	errcode = tre_parse(&parse_ctx);
2685	if (errcode != REG_OK0)
2686	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2687	preg->re_nsub = parse_ctx.submatch_id - 1;
2688	tree = parse_ctx.n;
2689
2690	#ifdef TRE_DEBUG
2691	tre_ast_print(tree);
2692	#endif /* TRE_DEBUG */
2693
2694	/* Referring to nonexistent subexpressions is illegal. */
2695	if (parse_ctx.max_backref > (int)preg->re_nsub)
2696	ERROR_EXIT(REG_ESUBREG)do { errcode = 6; if ( 1) goto error_exit; } while ( 0);
2697
2698	/* Allocate the TNFA struct. */
2699	tnfa = xcalloccalloc(1, sizeof(tre_tnfa_t));
2700	if (tnfa == NULL((void*)0))
2701	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2702	tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2703	tnfa->have_approx = 0;
2704	tnfa->num_submatches = parse_ctx.submatch_id;
2705
2706	/* Set up tags for submatch addressing. If REG_NOSUB is set and the
2707	regexp does not have back references, this can be skipped. */
2708	if (tnfa->have_backrefs \|\| !(cflags & REG_NOSUB8))
2709	{
2710
2711	/* Figure out how many tags we will need. */
2712	errcode = tre_add_tags(NULL((void*)0), stack, tree, tnfa);
2713	if (errcode != REG_OK0)
2714	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2715
2716	if (tnfa->num_tags > 0)
2717	{
2718	tag_directions = xmallocmalloc(sizeof(*tag_directions)
2719	* (tnfa->num_tags + 1));
2720	if (tag_directions == NULL((void*)0))
2721	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2722	tnfa->tag_directions = tag_directions;
2723	memset(tag_directions, -1,
2724	sizeof(tag_directions) (tnfa->num_tags + 1));
2725	}
2726	tnfa->minimal_tags = xcalloccalloc((unsigned)tnfa->num_tags * 2 + 1,
2727	sizeof(*tnfa->minimal_tags));
2728	if (tnfa->minimal_tags == NULL((void*)0))
2729	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2730
2731	submatch_data = xcalloccalloc((unsigned)parse_ctx.submatch_id,
2732	sizeof(*submatch_data));
2733	if (submatch_data == NULL((void*)0))
2734	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2735	tnfa->submatch_data = submatch_data;
2736
2737	errcode = tre_add_tags(mem, stack, tree, tnfa);
2738	if (errcode != REG_OK0)
2739	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2740
2741	}
2742
2743	/* Expand iteration nodes. */
2744	errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2745	tag_directions);
2746	if (errcode != REG_OK0)
2747	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2748
2749	/* Add a dummy node for the final state.
2750	XXX - For certain patterns this dummy node can be optimized away,
2751	for example "a" or "ab". Figure out a simple way to detect
2752	this possibility. */
2753	tmp_ast_l = tree;
2754	tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2755	if (tmp_ast_r == NULL((void*)0))
2756	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2757
2758	tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2759	if (tree == NULL((void*)0))
2760	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2761
2762	errcode = tre_compute_nfl(mem, stack, tree);
2763	if (errcode != REG_OK0)
2764	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2765
2766	counts = xmallocmalloc(sizeof(int) * parse_ctx.position);
2767	if (counts == NULL((void*)0))
2768	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2769
2770	offs = xmallocmalloc(sizeof(int) * parse_ctx.position);
2771	if (offs == NULL((void*)0))
2772	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2773
2774	for (i = 0; i < parse_ctx.position; i++)
2775	counts[i] = 0;
2776	tre_ast_to_tnfa(tree, NULL((void)0), counts, NULL((void)0));
2777
2778	add = 0;
2779	for (i = 0; i < parse_ctx.position; i++)
2780	{
2781	offs[i] = add;
2782	add += counts[i] + 1;
2783	counts[i] = 0;
2784	}
2785	transitions = xcalloccalloc((unsigned)add + 1, sizeof(*transitions));
2786	if (transitions == NULL((void*)0))
2787	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2788	tnfa->transitions = transitions;
2789	tnfa->num_transitions = add;
2790
2791	errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2792	if (errcode != REG_OK0)
2793	ERROR_EXIT(errcode)do { errcode = errcode; if ( 1) goto error_exit; } while ( 0);
2794
2795	tnfa->firstpos_chars = NULL((void*)0);
2796
2797	p = tree->firstpos;
2798	i = 0;
2799	while (p->position >= 0)
2800	{
2801	i++;
2802	p++;
2803	}
2804
2805	initial = xcalloccalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2806	if (initial == NULL((void*)0))
2807	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2808	tnfa->initial = initial;
2809
2810	i = 0;
2811	for (p = tree->firstpos; p->position >= 0; p++)
2812	{
2813	initial[i].state = transitions + offs[p->position];
2814	initial[i].state_id = p->position;
2815	initial[i].tags = NULL((void*)0);
2816	/* Copy the arrays p->tags, and p->params, they are allocated
2817	from a tre_mem object. */
2818	if (p->tags)
2819	{
2820	int j;
2821	for (j = 0; p->tags[j] >= 0; j++);
2822	initial[i].tags = xmallocmalloc(sizeof(p->tags) (j + 1));
2823	if (!initial[i].tags)
2824	ERROR_EXIT(REG_ESPACE)do { errcode = 12; if ( 1) goto error_exit; } while ( 0);
2825	memcpy(initial[i].tags, p->tags, sizeof(p->tags) (j + 1));
2826	}
2827	initial[i].assertions = p->assertions;
2828	i++;
2829	}
2830	initial[i].state = NULL((void*)0);
2831
2832	tnfa->num_transitions = add;
2833	tnfa->final = transitions + offs[tree->lastpos[0].position];
2834	tnfa->num_states = parse_ctx.position;
2835	tnfa->cflags = cflags;
2836
2837	tre_mem_destroy__tre_mem_destroy(mem);
2838	tre_stack_destroy(stack);
2839	xfreefree(counts);
2840	xfreefree(offs);
2841
2842	preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2843	return REG_OK0;
2844
2845	error_exit:
2846	/* Free everything that was allocated and return the error code. */
2847	tre_mem_destroy__tre_mem_destroy(mem);
2848	if (stack != NULL((void*)0))
2849	tre_stack_destroy(stack);
2850	if (counts != NULL((void*)0))
2851	xfreefree(counts);
2852	if (offs != NULL((void*)0))
2853	xfreefree(offs);
2854	preg->TRE_REGEX_T_FIELD__opaque = (void *)tnfa;
2855	regfree(preg);
2856	return errcode;
2857	}
2858
2859
2860
2861
2862	void
2863	regfree(regex_t *preg)
2864	{
2865	tre_tnfa_t *tnfa;
2866	unsigned int i;
2867	tre_tnfa_transition_t *trans;
2868
2869	tnfa = (void *)preg->TRE_REGEX_T_FIELD__opaque;
2870	if (!tnfa)
2871	return;
2872
2873	for (i = 0; i < tnfa->num_transitions; i++)
2874	if (tnfa->transitions[i].state)
2875	{
2876	if (tnfa->transitions[i].tags)
2877	xfreefree(tnfa->transitions[i].tags);
2878	if (tnfa->transitions[i].neg_classes)
2879	xfreefree(tnfa->transitions[i].neg_classes);
2880	}
2881	if (tnfa->transitions)
2882	xfreefree(tnfa->transitions);
2883
2884	if (tnfa->initial)
2885	{
2886	for (trans = tnfa->initial; trans->state; trans++)
2887	{
2888	if (trans->tags)
2889	xfreefree(trans->tags);
2890	}
2891	xfreefree(tnfa->initial);
2892	}
2893
2894	if (tnfa->submatch_data)
2895	{
2896	for (i = 0; i < tnfa->num_submatches; i++)
2897	if (tnfa->submatch_data[i].parents)
2898	xfreefree(tnfa->submatch_data[i].parents);
2899	xfreefree(tnfa->submatch_data);
2900	}
2901
2902	if (tnfa->tag_directions)
2903	xfreefree(tnfa->tag_directions);
2904	if (tnfa->firstpos_chars)
2905	xfreefree(tnfa->firstpos_chars);
2906	if (tnfa->minimal_tags)
2907	xfreefree(tnfa->minimal_tags);
2908	xfreefree(tnfa);
2909	}