File: | regex/regexec.c |
Location: | line 854, column 27 |
Description: | Array access (from variable 'tags') results in a null pointer dereference |
1 | /* | |||
2 | regexec.c - TRE POSIX compatible matching functions (and more). | |||
3 | ||||
4 | Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> | |||
5 | All rights reserved. | |||
6 | ||||
7 | Redistribution and use in source and binary forms, with or without | |||
8 | modification, are permitted provided that the following conditions | |||
9 | are met: | |||
10 | ||||
11 | 1. Redistributions of source code must retain the above copyright | |||
12 | notice, this list of conditions and the following disclaimer. | |||
13 | ||||
14 | 2. Redistributions in binary form must reproduce the above copyright | |||
15 | notice, this list of conditions and the following disclaimer in the | |||
16 | documentation and/or other materials provided with the distribution. | |||
17 | ||||
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS | |||
19 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
29 | ||||
30 | */ | |||
31 | ||||
32 | #include <stdlib.h> | |||
33 | #include <string.h> | |||
34 | #include <wchar.h> | |||
35 | #include <wctype.h> | |||
36 | #include <limits.h> | |||
37 | ||||
38 | #include <regex.h> | |||
39 | ||||
40 | #include "tre.h" | |||
41 | ||||
42 | #include <assert.h> | |||
43 | ||||
44 | static void | |||
45 | tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, | |||
46 | const tre_tnfa_t *tnfa, int *tags, int match_eo); | |||
47 | ||||
48 | /*********************************************************************** | |||
49 | from tre-match-utils.h | |||
50 | ***********************************************************************/ | |||
51 | ||||
52 | #define GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0) do { \ | |||
53 | prev_c = next_c; pos += pos_add_next; \ | |||
54 | if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX4)) <= 0) { \ | |||
55 | if (pos_add_next < 0) { ret = REG_NOMATCH1; goto error_exit; } \ | |||
56 | else pos_add_next++; \ | |||
57 | } \ | |||
58 | str_byte += pos_add_next; \ | |||
59 | } while (0) | |||
60 | ||||
61 | #define IS_WORD_CHAR(c)((c) == L'_' || iswalnum(c)) ((c) == L'_' || tre_isalnumiswalnum(c)) | |||
62 | ||||
63 | #define CHECK_ASSERTIONS(assertions)(((assertions & 1) && (pos > 0 || reg_notbol) && (prev_c != L'\n' || !reg_newline)) || ((assertions & 2) && (next_c != L'\0' || reg_noteol) && (next_c != L'\n' || !reg_newline)) || ((assertions & 16) && (((prev_c ) == L'_' || iswalnum(prev_c)) || !((next_c) == L'_' || iswalnum (next_c)))) || ((assertions & 32) && (!((prev_c) == L'_' || iswalnum(prev_c)) || ((next_c) == L'_' || iswalnum(next_c )))) || ((assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c )) == ((next_c) == L'_' || iswalnum(next_c)))) || ((assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c))))) \ | |||
64 | (((assertions & ASSERT_AT_BOL1) \ | |||
65 | && (pos > 0 || reg_notbol) \ | |||
66 | && (prev_c != L'\n' || !reg_newline)) \ | |||
67 | || ((assertions & ASSERT_AT_EOL2) \ | |||
68 | && (next_c != L'\0' || reg_noteol) \ | |||
69 | && (next_c != L'\n' || !reg_newline)) \ | |||
70 | || ((assertions & ASSERT_AT_BOW16) \ | |||
71 | && (IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) || !IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c)))) \ | |||
72 | || ((assertions & ASSERT_AT_EOW32) \ | |||
73 | && (!IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) || IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c)))) \ | |||
74 | || ((assertions & ASSERT_AT_WB64) \ | |||
75 | && (pos != 0 && next_c != L'\0' \ | |||
76 | && IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) == IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c)))) \ | |||
77 | || ((assertions & ASSERT_AT_WB_NEG128) \ | |||
78 | && (pos == 0 || next_c == L'\0' \ | |||
79 | || IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) != IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c))))) | |||
80 | ||||
81 | #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)(((trans_i->assertions & 4) && !(tnfa->cflags & 2) && !iswctype((tre_cint_t)prev_c, trans_i-> u.class)) || ((trans_i->assertions & 4) && (tnfa ->cflags & 2) && !iswctype(towlower((tre_cint_t )prev_c),trans_i->u.class) && !iswctype(towupper(( tre_cint_t)prev_c),trans_i->u.class)) || ((trans_i->assertions & 8) && tre_neg_char_classes_match(trans_i->neg_classes ,(tre_cint_t)prev_c, tnfa->cflags & 2))) \ | |||
82 | (((trans_i->assertions & ASSERT_CHAR_CLASS4) \ | |||
83 | && !(tnfa->cflags & REG_ICASE2) \ | |||
84 | && !tre_isctypeiswctype((tre_cint_t)prev_c, trans_i->u.class)) \ | |||
85 | || ((trans_i->assertions & ASSERT_CHAR_CLASS4) \ | |||
86 | && (tnfa->cflags & REG_ICASE2) \ | |||
87 | && !tre_isctypeiswctype(tre_tolowertowlower((tre_cint_t)prev_c),trans_i->u.class) \ | |||
88 | && !tre_isctypeiswctype(tre_touppertowupper((tre_cint_t)prev_c),trans_i->u.class)) \ | |||
89 | || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG8) \ | |||
90 | && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ | |||
91 | tnfa->cflags & REG_ICASE2))) | |||
92 | ||||
93 | ||||
94 | ||||
95 | ||||
96 | /* Returns 1 if `t1' wins `t2', 0 otherwise. */ | |||
97 | static int | |||
98 | tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, | |||
99 | int *t1, int *t2) | |||
100 | { | |||
101 | int i; | |||
102 | for (i = 0; i < num_tags; i++) | |||
103 | { | |||
104 | if (tag_directions[i] == TRE_TAG_MINIMIZE) | |||
105 | { | |||
106 | if (t1[i] < t2[i]) | |||
107 | return 1; | |||
108 | if (t1[i] > t2[i]) | |||
109 | return 0; | |||
110 | } | |||
111 | else | |||
112 | { | |||
113 | if (t1[i] > t2[i]) | |||
114 | return 1; | |||
115 | if (t1[i] < t2[i]) | |||
116 | return 0; | |||
117 | } | |||
118 | } | |||
119 | /* assert(0);*/ | |||
120 | return 0; | |||
121 | } | |||
122 | ||||
123 | static int | |||
124 | tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) | |||
125 | { | |||
126 | while (*classes != (tre_ctype_t)0) | |||
127 | if ((!icase && tre_isctypeiswctype(wc, *classes)) | |||
128 | || (icase && (tre_isctypeiswctype(tre_touppertowupper(wc), *classes) | |||
129 | || tre_isctypeiswctype(tre_tolowertowlower(wc), *classes)))) | |||
130 | return 1; /* Match. */ | |||
131 | else | |||
132 | classes++; | |||
133 | return 0; /* No match. */ | |||
134 | } | |||
135 | ||||
136 | ||||
137 | /*********************************************************************** | |||
138 | from tre-match-parallel.c | |||
139 | ***********************************************************************/ | |||
140 | ||||
141 | /* | |||
142 | This algorithm searches for matches basically by reading characters | |||
143 | in the searched string one by one, starting at the beginning. All | |||
144 | matching paths in the TNFA are traversed in parallel. When two or | |||
145 | more paths reach the same state, exactly one is chosen according to | |||
146 | tag ordering rules; if returning submatches is not required it does | |||
147 | not matter which path is chosen. | |||
148 | ||||
149 | The worst case time required for finding the leftmost and longest | |||
150 | match, or determining that there is no match, is always linearly | |||
151 | dependent on the length of the text being searched. | |||
152 | ||||
153 | This algorithm cannot handle TNFAs with back referencing nodes. | |||
154 | See `tre-match-backtrack.c'. | |||
155 | */ | |||
156 | ||||
157 | typedef struct { | |||
158 | tre_tnfa_transition_t *state; | |||
159 | int *tags; | |||
160 | } tre_tnfa_reach_t; | |||
161 | ||||
162 | typedef struct { | |||
163 | int pos; | |||
164 | int **tags; | |||
165 | } tre_reach_pos_t; | |||
166 | ||||
167 | ||||
168 | static reg_errcode_t | |||
169 | tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, | |||
170 | int *match_tags, int eflags, | |||
171 | int *match_end_ofs) | |||
172 | { | |||
173 | /* State variables required by GET_NEXT_WCHAR. */ | |||
174 | tre_char_t prev_c = 0, next_c = 0; | |||
175 | const char *str_byte = string; | |||
176 | int pos = -1; | |||
177 | int pos_add_next = 1; | |||
178 | #ifdef TRE_MBSTATE | |||
179 | mbstate_t mbstate; | |||
180 | #endif /* TRE_MBSTATE */ | |||
181 | int reg_notbol = eflags & REG_NOTBOL1; | |||
182 | int reg_noteol = eflags & REG_NOTEOL2; | |||
183 | int reg_newline = tnfa->cflags & REG_NEWLINE4; | |||
184 | reg_errcode_t ret; | |||
185 | ||||
186 | char *buf; | |||
187 | tre_tnfa_transition_t *trans_i; | |||
188 | tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; | |||
189 | tre_reach_pos_t *reach_pos; | |||
190 | int *tag_i; | |||
191 | int num_tags, i; | |||
192 | ||||
193 | int match_eo = -1; /* end offset of match (-1 if no match found yet) */ | |||
194 | int new_match = 0; | |||
195 | int *tmp_tags = NULL((void*)0); | |||
196 | int *tmp_iptr; | |||
197 | ||||
198 | #ifdef TRE_MBSTATE | |||
199 | memset(&mbstate, '\0', sizeof(mbstate)); | |||
200 | #endif /* TRE_MBSTATE */ | |||
201 | ||||
202 | if (!match_tags) | |||
203 | num_tags = 0; | |||
204 | else | |||
205 | num_tags = tnfa->num_tags; | |||
206 | ||||
207 | /* Allocate memory for temporary data required for matching. This needs to | |||
208 | be done for every matching operation to be thread safe. This allocates | |||
209 | everything in a single large block from the stack frame using alloca() | |||
210 | or with malloc() if alloca is unavailable. */ | |||
211 | { | |||
212 | int tbytes, rbytes, pbytes, xbytes, total_bytes; | |||
213 | char *tmp_buf; | |||
214 | /* Compute the length of the block we need. */ | |||
215 | tbytes = sizeof(*tmp_tags) * num_tags; | |||
216 | rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); | |||
217 | pbytes = sizeof(*reach_pos) * tnfa->num_states; | |||
218 | xbytes = sizeof(int) * num_tags; | |||
219 | total_bytes = | |||
220 | (sizeof(long) - 1) * 4 /* for alignment paddings */ | |||
221 | + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; | |||
222 | ||||
223 | /* Allocate the memory. */ | |||
224 | buf = xmallocmalloc((unsigned)total_bytes); | |||
225 | if (buf == NULL((void*)0)) | |||
226 | return REG_ESPACE12; | |||
227 | memset(buf, 0, (size_t)total_bytes); | |||
228 | ||||
229 | /* Get the various pointers within tmp_buf (properly aligned). */ | |||
230 | tmp_tags = (void *)buf; | |||
231 | tmp_buf = buf + tbytes; | |||
232 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
233 | reach_next = (void *)tmp_buf; | |||
234 | tmp_buf += rbytes; | |||
235 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
236 | reach = (void *)tmp_buf; | |||
237 | tmp_buf += rbytes; | |||
238 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
239 | reach_pos = (void *)tmp_buf; | |||
240 | tmp_buf += pbytes; | |||
241 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
242 | for (i = 0; i < tnfa->num_states; i++) | |||
243 | { | |||
244 | reach[i].tags = (void *)tmp_buf; | |||
245 | tmp_buf += xbytes; | |||
246 | reach_next[i].tags = (void *)tmp_buf; | |||
247 | tmp_buf += xbytes; | |||
248 | } | |||
249 | } | |||
250 | ||||
251 | for (i = 0; i < tnfa->num_states; i++) | |||
252 | reach_pos[i].pos = -1; | |||
253 | ||||
254 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
255 | pos = 0; | |||
256 | ||||
257 | reach_next_i = reach_next; | |||
258 | while (1) | |||
259 | { | |||
260 | /* If no match found yet, add the initial states to `reach_next'. */ | |||
261 | if (match_eo < 0) | |||
262 | { | |||
263 | trans_i = tnfa->initial; | |||
264 | while (trans_i->state != NULL((void*)0)) | |||
265 | { | |||
266 | if (reach_pos[trans_i->state_id].pos < pos) | |||
267 | { | |||
268 | if (trans_i->assertions | |||
269 | && CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c)))))) | |||
270 | { | |||
271 | trans_i++; | |||
272 | continue; | |||
273 | } | |||
274 | ||||
275 | reach_next_i->state = trans_i->state; | |||
276 | for (i = 0; i < num_tags; i++) | |||
277 | reach_next_i->tags[i] = -1; | |||
278 | tag_i = trans_i->tags; | |||
279 | if (tag_i) | |||
280 | while (*tag_i >= 0) | |||
281 | { | |||
282 | if (*tag_i < num_tags) | |||
283 | reach_next_i->tags[*tag_i] = pos; | |||
284 | tag_i++; | |||
285 | } | |||
286 | if (reach_next_i->state == tnfa->final) | |||
287 | { | |||
288 | match_eo = pos; | |||
289 | new_match = 1; | |||
290 | for (i = 0; i < num_tags; i++) | |||
291 | match_tags[i] = reach_next_i->tags[i]; | |||
292 | } | |||
293 | reach_pos[trans_i->state_id].pos = pos; | |||
294 | reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | |||
295 | reach_next_i++; | |||
296 | } | |||
297 | trans_i++; | |||
298 | } | |||
299 | reach_next_i->state = NULL((void*)0); | |||
300 | } | |||
301 | else | |||
302 | { | |||
303 | if (num_tags == 0 || reach_next_i == reach_next) | |||
304 | /* We have found a match. */ | |||
305 | break; | |||
306 | } | |||
307 | ||||
308 | /* Check for end of string. */ | |||
309 | if (!next_c) break; | |||
310 | ||||
311 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
312 | ||||
313 | /* Swap `reach' and `reach_next'. */ | |||
314 | reach_i = reach; | |||
315 | reach = reach_next; | |||
316 | reach_next = reach_i; | |||
317 | ||||
318 | /* For each state in `reach', weed out states that don't fulfill the | |||
319 | minimal matching conditions. */ | |||
320 | if (tnfa->num_minimals && new_match) | |||
321 | { | |||
322 | new_match = 0; | |||
323 | reach_next_i = reach_next; | |||
324 | for (reach_i = reach; reach_i->state; reach_i++) | |||
325 | { | |||
326 | int skip = 0; | |||
327 | for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) | |||
328 | { | |||
329 | int end = tnfa->minimal_tags[i]; | |||
330 | int start = tnfa->minimal_tags[i + 1]; | |||
331 | if (end >= num_tags) | |||
332 | { | |||
333 | skip = 1; | |||
334 | break; | |||
335 | } | |||
336 | else if (reach_i->tags[start] == match_tags[start] | |||
337 | && reach_i->tags[end] < match_tags[end]) | |||
338 | { | |||
339 | skip = 1; | |||
340 | break; | |||
341 | } | |||
342 | } | |||
343 | if (!skip) | |||
344 | { | |||
345 | reach_next_i->state = reach_i->state; | |||
346 | tmp_iptr = reach_next_i->tags; | |||
347 | reach_next_i->tags = reach_i->tags; | |||
348 | reach_i->tags = tmp_iptr; | |||
349 | reach_next_i++; | |||
350 | } | |||
351 | } | |||
352 | reach_next_i->state = NULL((void*)0); | |||
353 | ||||
354 | /* Swap `reach' and `reach_next'. */ | |||
355 | reach_i = reach; | |||
356 | reach = reach_next; | |||
357 | reach_next = reach_i; | |||
358 | } | |||
359 | ||||
360 | /* For each state in `reach' see if there is a transition leaving with | |||
361 | the current input symbol to a state not yet in `reach_next', and | |||
362 | add the destination states to `reach_next'. */ | |||
363 | reach_next_i = reach_next; | |||
364 | for (reach_i = reach; reach_i->state; reach_i++) | |||
365 | { | |||
366 | for (trans_i = reach_i->state; trans_i->state; trans_i++) | |||
367 | { | |||
368 | /* Does this transition match the input symbol? */ | |||
369 | if (trans_i->code_min <= (tre_cint_t)prev_c && | |||
370 | trans_i->code_max >= (tre_cint_t)prev_c) | |||
371 | { | |||
372 | if (trans_i->assertions | |||
373 | && (CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c))))) | |||
374 | || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)(((trans_i->assertions & 4) && !(tnfa->cflags & 2) && !iswctype((tre_cint_t)prev_c, trans_i-> u.class)) || ((trans_i->assertions & 4) && (tnfa ->cflags & 2) && !iswctype(towlower((tre_cint_t )prev_c),trans_i->u.class) && !iswctype(towupper(( tre_cint_t)prev_c),trans_i->u.class)) || ((trans_i->assertions & 8) && tre_neg_char_classes_match(trans_i->neg_classes ,(tre_cint_t)prev_c, tnfa->cflags & 2))))) | |||
375 | { | |||
376 | continue; | |||
377 | } | |||
378 | ||||
379 | /* Compute the tags after this transition. */ | |||
380 | for (i = 0; i < num_tags; i++) | |||
381 | tmp_tags[i] = reach_i->tags[i]; | |||
382 | tag_i = trans_i->tags; | |||
383 | if (tag_i != NULL((void*)0)) | |||
384 | while (*tag_i >= 0) | |||
385 | { | |||
386 | if (*tag_i < num_tags) | |||
387 | tmp_tags[*tag_i] = pos; | |||
388 | tag_i++; | |||
389 | } | |||
390 | ||||
391 | if (reach_pos[trans_i->state_id].pos < pos) | |||
392 | { | |||
393 | /* Found an unvisited node. */ | |||
394 | reach_next_i->state = trans_i->state; | |||
395 | tmp_iptr = reach_next_i->tags; | |||
396 | reach_next_i->tags = tmp_tags; | |||
397 | tmp_tags = tmp_iptr; | |||
398 | reach_pos[trans_i->state_id].pos = pos; | |||
399 | reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | |||
400 | ||||
401 | if (reach_next_i->state == tnfa->final | |||
402 | && (match_eo == -1 | |||
403 | || (num_tags > 0 | |||
404 | && reach_next_i->tags[0] <= match_tags[0]))) | |||
405 | { | |||
406 | match_eo = pos; | |||
407 | new_match = 1; | |||
408 | for (i = 0; i < num_tags; i++) | |||
409 | match_tags[i] = reach_next_i->tags[i]; | |||
410 | } | |||
411 | reach_next_i++; | |||
412 | ||||
413 | } | |||
414 | else | |||
415 | { | |||
416 | assert(reach_pos[trans_i->state_id].pos == pos)(void)0; | |||
417 | /* Another path has also reached this state. We choose | |||
418 | the winner by examining the tag values for both | |||
419 | paths. */ | |||
420 | if (tre_tag_order(num_tags, tnfa->tag_directions, | |||
421 | tmp_tags, | |||
422 | *reach_pos[trans_i->state_id].tags)) | |||
423 | { | |||
424 | /* The new path wins. */ | |||
425 | tmp_iptr = *reach_pos[trans_i->state_id].tags; | |||
426 | *reach_pos[trans_i->state_id].tags = tmp_tags; | |||
427 | if (trans_i->state == tnfa->final) | |||
428 | { | |||
429 | match_eo = pos; | |||
430 | new_match = 1; | |||
431 | for (i = 0; i < num_tags; i++) | |||
432 | match_tags[i] = tmp_tags[i]; | |||
433 | } | |||
434 | tmp_tags = tmp_iptr; | |||
435 | } | |||
436 | } | |||
437 | } | |||
438 | } | |||
439 | } | |||
440 | reach_next_i->state = NULL((void*)0); | |||
441 | } | |||
442 | ||||
443 | *match_end_ofs = match_eo; | |||
444 | ret = match_eo >= 0 ? REG_OK0 : REG_NOMATCH1; | |||
445 | error_exit: | |||
446 | xfreefree(buf); | |||
447 | return ret; | |||
448 | } | |||
449 | ||||
450 | ||||
451 | ||||
452 | /*********************************************************************** | |||
453 | from tre-match-backtrack.c | |||
454 | ***********************************************************************/ | |||
455 | ||||
456 | /* | |||
457 | This matcher is for regexps that use back referencing. Regexp matching | |||
458 | with back referencing is an NP-complete problem on the number of back | |||
459 | references. The easiest way to match them is to use a backtracking | |||
460 | routine which basically goes through all possible paths in the TNFA | |||
461 | and chooses the one which results in the best (leftmost and longest) | |||
462 | match. This can be spectacularly expensive and may run out of stack | |||
463 | space, but there really is no better known generic algorithm. Quoting | |||
464 | Henry Spencer from comp.compilers: | |||
465 | <URL: http://compilers.iecc.com/comparch/article/93-03-102> | |||
466 | ||||
467 | POSIX.2 REs require longest match, which is really exciting to | |||
468 | implement since the obsolete ("basic") variant also includes | |||
469 | \<digit>. I haven't found a better way of tackling this than doing | |||
470 | a preliminary match using a DFA (or simulation) on a modified RE | |||
471 | that just replicates subREs for \<digit>, and then doing a | |||
472 | backtracking match to determine whether the subRE matches were | |||
473 | right. This can be rather slow, but I console myself with the | |||
474 | thought that people who use \<digit> deserve very slow execution. | |||
475 | (Pun unintentional but very appropriate.) | |||
476 | ||||
477 | */ | |||
478 | ||||
479 | typedef struct { | |||
480 | int pos; | |||
481 | const char *str_byte; | |||
482 | tre_tnfa_transition_t *state; | |||
483 | int state_id; | |||
484 | int next_c; | |||
485 | int *tags; | |||
486 | #ifdef TRE_MBSTATE | |||
487 | mbstate_t mbstate; | |||
488 | #endif /* TRE_MBSTATE */ | |||
489 | } tre_backtrack_item_t; | |||
490 | ||||
491 | typedef struct tre_backtrack_struct { | |||
492 | tre_backtrack_item_t item; | |||
493 | struct tre_backtrack_struct *prev; | |||
494 | struct tre_backtrack_struct *next; | |||
495 | } *tre_backtrack_t; | |||
496 | ||||
497 | #ifdef TRE_MBSTATE | |||
498 | #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) | |||
499 | #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate | |||
500 | #else /* !TRE_MBSTATE */ | |||
501 | #define BT_STACK_MBSTATE_IN | |||
502 | #define BT_STACK_MBSTATE_OUT | |||
503 | #endif /* !TRE_MBSTATE */ | |||
504 | ||||
505 | #define tre_bt_mem_newtre_mem_new tre_mem_new | |||
506 | #define tre_bt_mem_alloctre_mem_alloc tre_mem_alloc | |||
507 | #define tre_bt_mem_destroy__tre_mem_destroy tre_mem_destroy__tre_mem_destroy | |||
508 | ||||
509 | ||||
510 | #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate)do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (_pos); stack->item.str_byte = ( _str_byte); stack->item.state = (_state); stack->item.state_id = (_state_id); stack->item.next_c = (_next_c); for (i = 0 ; i < tnfa->num_tags; i++) stack->item.tags[i] = (_tags )[i]; ; } while (0) \ | |||
511 | do \ | |||
512 | { \ | |||
513 | int i; \ | |||
514 | if (!stack->next) \ | |||
515 | { \ | |||
516 | tre_backtrack_t s; \ | |||
517 | s = tre_bt_mem_alloc(mem, sizeof(*s))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*s)); \ | |||
518 | if (!s) \ | |||
519 | { \ | |||
520 | tre_bt_mem_destroy__tre_mem_destroy(mem); \ | |||
521 | if (tags) \ | |||
522 | xfreefree(tags); \ | |||
523 | if (pmatch) \ | |||
524 | xfreefree(pmatch); \ | |||
525 | if (states_seen) \ | |||
526 | xfreefree(states_seen); \ | |||
527 | return REG_ESPACE12; \ | |||
528 | } \ | |||
529 | s->prev = stack; \ | |||
530 | s->next = NULL((void*)0); \ | |||
531 | s->item.tags = tre_bt_mem_alloc(mem, \__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa ->num_tags) | |||
532 | sizeof(*tags) * tnfa->num_tags)__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa ->num_tags); \ | |||
533 | if (!s->item.tags) \ | |||
534 | { \ | |||
535 | tre_bt_mem_destroy__tre_mem_destroy(mem); \ | |||
536 | if (tags) \ | |||
537 | xfreefree(tags); \ | |||
538 | if (pmatch) \ | |||
539 | xfreefree(pmatch); \ | |||
540 | if (states_seen) \ | |||
541 | xfreefree(states_seen); \ | |||
542 | return REG_ESPACE12; \ | |||
543 | } \ | |||
544 | stack->next = s; \ | |||
545 | stack = s; \ | |||
546 | } \ | |||
547 | else \ | |||
548 | stack = stack->next; \ | |||
549 | stack->item.pos = (_pos); \ | |||
550 | stack->item.str_byte = (_str_byte); \ | |||
551 | stack->item.state = (_state); \ | |||
552 | stack->item.state_id = (_state_id); \ | |||
553 | stack->item.next_c = (_next_c); \ | |||
554 | for (i = 0; i < tnfa->num_tags; i++) \ | |||
555 | stack->item.tags[i] = (_tags)[i]; \ | |||
556 | BT_STACK_MBSTATE_IN; \ | |||
557 | } \ | |||
558 | while (0) | |||
559 | ||||
560 | #define BT_STACK_POP()do { int i; (void)0; pos = stack->item.pos; str_byte = stack ->item.str_byte; state = stack->item.state; next_c = stack ->item.next_c; for (i = 0; i < tnfa->num_tags; i++) tags [i] = stack->item.tags[i]; ; stack = stack->prev; } while (0) \ | |||
561 | do \ | |||
562 | { \ | |||
563 | int i; \ | |||
564 | assert(stack->prev)(void)0; \ | |||
565 | pos = stack->item.pos; \ | |||
566 | str_byte = stack->item.str_byte; \ | |||
567 | state = stack->item.state; \ | |||
568 | next_c = stack->item.next_c; \ | |||
569 | for (i = 0; i < tnfa->num_tags; i++) \ | |||
570 | tags[i] = stack->item.tags[i]; \ | |||
571 | BT_STACK_MBSTATE_OUT; \ | |||
572 | stack = stack->prev; \ | |||
573 | } \ | |||
574 | while (0) | |||
575 | ||||
576 | #undef MIN | |||
577 | #define MIN(a, b)((a) <= (b) ? (a) : (b)) ((a) <= (b) ? (a) : (b)) | |||
578 | ||||
579 | static reg_errcode_t | |||
580 | tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, | |||
581 | int *match_tags, int eflags, int *match_end_ofs) | |||
582 | { | |||
583 | /* State variables required by GET_NEXT_WCHAR. */ | |||
584 | tre_char_t prev_c = 0, next_c = 0; | |||
585 | const char *str_byte = string; | |||
586 | int pos = 0; | |||
587 | int pos_add_next = 1; | |||
588 | #ifdef TRE_MBSTATE | |||
589 | mbstate_t mbstate; | |||
590 | #endif /* TRE_MBSTATE */ | |||
591 | int reg_notbol = eflags & REG_NOTBOL1; | |||
592 | int reg_noteol = eflags & REG_NOTEOL2; | |||
593 | int reg_newline = tnfa->cflags & REG_NEWLINE4; | |||
594 | ||||
595 | /* These are used to remember the necessary values of the above | |||
596 | variables to return to the position where the current search | |||
597 | started from. */ | |||
598 | int next_c_start; | |||
599 | const char *str_byte_start; | |||
600 | int pos_start = -1; | |||
601 | #ifdef TRE_MBSTATE | |||
602 | mbstate_t mbstate_start; | |||
603 | #endif /* TRE_MBSTATE */ | |||
604 | ||||
605 | /* End offset of best match so far, or -1 if no match found yet. */ | |||
606 | int match_eo = -1; | |||
607 | /* Tag arrays. */ | |||
608 | int *next_tags, *tags = NULL((void*)0); | |||
| ||||
609 | /* Current TNFA state. */ | |||
610 | tre_tnfa_transition_t *state; | |||
611 | int *states_seen = NULL((void*)0); | |||
612 | ||||
613 | /* Memory allocator to for allocating the backtracking stack. */ | |||
614 | tre_mem_t mem = tre_bt_mem_new()__tre_mem_new_impl(0, ((void*)0)); | |||
615 | ||||
616 | /* The backtracking stack. */ | |||
617 | tre_backtrack_t stack; | |||
618 | ||||
619 | tre_tnfa_transition_t *trans_i; | |||
620 | regmatch_t *pmatch = NULL((void*)0); | |||
621 | int ret; | |||
622 | ||||
623 | #ifdef TRE_MBSTATE | |||
624 | memset(&mbstate, '\0', sizeof(mbstate)); | |||
625 | #endif /* TRE_MBSTATE */ | |||
626 | ||||
627 | if (!mem) | |||
628 | return REG_ESPACE12; | |||
629 | stack = tre_bt_mem_alloc(mem, sizeof(*stack))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*stack)); | |||
630 | if (!stack) | |||
631 | { | |||
632 | ret = REG_ESPACE12; | |||
633 | goto error_exit; | |||
634 | } | |||
635 | stack->prev = NULL((void*)0); | |||
636 | stack->next = NULL((void*)0); | |||
637 | ||||
638 | if (tnfa->num_tags) | |||
639 | { | |||
640 | tags = xmallocmalloc(sizeof(*tags) * tnfa->num_tags); | |||
641 | if (!tags) | |||
642 | { | |||
643 | ret = REG_ESPACE12; | |||
644 | goto error_exit; | |||
645 | } | |||
646 | } | |||
647 | if (tnfa->num_submatches) | |||
648 | { | |||
649 | pmatch = xmallocmalloc(sizeof(*pmatch) * tnfa->num_submatches); | |||
650 | if (!pmatch) | |||
651 | { | |||
652 | ret = REG_ESPACE12; | |||
653 | goto error_exit; | |||
654 | } | |||
655 | } | |||
656 | if (tnfa->num_states) | |||
657 | { | |||
658 | states_seen = xmallocmalloc(sizeof(*states_seen) * tnfa->num_states); | |||
659 | if (!states_seen) | |||
660 | { | |||
661 | ret = REG_ESPACE12; | |||
662 | goto error_exit; | |||
663 | } | |||
664 | } | |||
665 | ||||
666 | retry: | |||
667 | { | |||
668 | int i; | |||
669 | for (i = 0; i < tnfa->num_tags; i++) | |||
670 | { | |||
671 | tags[i] = -1; | |||
672 | if (match_tags) | |||
673 | match_tags[i] = -1; | |||
674 | } | |||
675 | for (i = 0; i < tnfa->num_states; i++) | |||
676 | states_seen[i] = 0; | |||
677 | } | |||
678 | ||||
679 | state = NULL((void*)0); | |||
680 | pos = pos_start; | |||
681 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
682 | pos_start = pos; | |||
683 | next_c_start = next_c; | |||
684 | str_byte_start = str_byte; | |||
685 | #ifdef TRE_MBSTATE | |||
686 | mbstate_start = mbstate; | |||
687 | #endif /* TRE_MBSTATE */ | |||
688 | ||||
689 | /* Handle initial states. */ | |||
690 | next_tags = NULL((void*)0); | |||
691 | for (trans_i = tnfa->initial; trans_i->state; trans_i++) | |||
692 | { | |||
693 | if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c)))))) | |||
694 | { | |||
695 | continue; | |||
696 | } | |||
697 | if (state == NULL((void*)0)) | |||
698 | { | |||
699 | /* Start from this state. */ | |||
700 | state = trans_i->state; | |||
701 | next_tags = trans_i->tags; | |||
702 | } | |||
703 | else | |||
704 | { | |||
705 | /* Backtrack to this state. */ | |||
706 | BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0) | |||
707 | trans_i->state_id, next_c, tags, mbstate)do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0); | |||
708 | { | |||
709 | int *tmp = trans_i->tags; | |||
710 | if (tmp) | |||
711 | while (*tmp >= 0) | |||
712 | stack->item.tags[*tmp++] = pos; | |||
713 | } | |||
714 | } | |||
715 | } | |||
716 | ||||
717 | if (next_tags) | |||
718 | for (; *next_tags >= 0; next_tags++) | |||
719 | tags[*next_tags] = pos; | |||
720 | ||||
721 | ||||
722 | if (state == NULL((void*)0)) | |||
723 | goto backtrack; | |||
724 | ||||
725 | while (1) | |||
726 | { | |||
727 | tre_tnfa_transition_t *next_state; | |||
728 | int empty_br_match; | |||
729 | ||||
730 | if (state == tnfa->final) | |||
731 | { | |||
732 | if (match_eo < pos | |||
733 | || (match_eo == pos | |||
734 | && match_tags | |||
735 | && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, | |||
736 | tags, match_tags))) | |||
737 | { | |||
738 | int i; | |||
739 | /* This match wins the previous match. */ | |||
740 | match_eo = pos; | |||
741 | if (match_tags) | |||
742 | for (i = 0; i < tnfa->num_tags; i++) | |||
743 | match_tags[i] = tags[i]; | |||
744 | } | |||
745 | /* Our TNFAs never have transitions leaving from the final state, | |||
746 | so we jump right to backtracking. */ | |||
747 | goto backtrack; | |||
748 | } | |||
749 | ||||
750 | /* Go to the next character in the input string. */ | |||
751 | empty_br_match = 0; | |||
752 | trans_i = state; | |||
753 | if (trans_i->state && trans_i->assertions & ASSERT_BACKREF256) | |||
754 | { | |||
755 | /* This is a back reference state. All transitions leaving from | |||
756 | this state have the same back reference "assertion". Instead | |||
757 | of reading the next character, we match the back reference. */ | |||
758 | int so, eo, bt = trans_i->u.backref; | |||
759 | int bt_len; | |||
760 | int result; | |||
761 | ||||
762 | /* Get the substring we need to match against. Remember to | |||
763 | turn off REG_NOSUB temporarily. */ | |||
764 | tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB8, | |||
765 | tnfa, tags, pos); | |||
766 | so = pmatch[bt].rm_so; | |||
767 | eo = pmatch[bt].rm_eo; | |||
768 | bt_len = eo - so; | |||
769 | ||||
770 | result = strncmp((const char*)string + so, str_byte - 1, | |||
771 | (size_t)bt_len); | |||
772 | ||||
773 | if (result == 0) | |||
774 | { | |||
775 | /* Back reference matched. Check for infinite loop. */ | |||
776 | if (bt_len == 0) | |||
777 | empty_br_match = 1; | |||
778 | if (empty_br_match && states_seen[trans_i->state_id]) | |||
779 | { | |||
780 | goto backtrack; | |||
781 | } | |||
782 | ||||
783 | states_seen[trans_i->state_id] = empty_br_match; | |||
784 | ||||
785 | /* Advance in input string and resync `prev_c', `next_c' | |||
786 | and pos. */ | |||
787 | str_byte += bt_len - 1; | |||
788 | pos += bt_len - 1; | |||
789 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
790 | } | |||
791 | else | |||
792 | { | |||
793 | goto backtrack; | |||
794 | } | |||
795 | } | |||
796 | else | |||
797 | { | |||
798 | /* Check for end of string. */ | |||
799 | if (next_c == L'\0') | |||
800 | goto backtrack; | |||
801 | ||||
802 | /* Read the next character. */ | |||
803 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
804 | } | |||
805 | ||||
806 | next_state = NULL((void*)0); | |||
807 | for (trans_i = state; trans_i->state; trans_i++) | |||
808 | { | |||
809 | if (trans_i->code_min <= (tre_cint_t)prev_c | |||
810 | && trans_i->code_max >= (tre_cint_t)prev_c) | |||
811 | { | |||
812 | if (trans_i->assertions | |||
813 | && (CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c))))) | |||
814 | || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)(((trans_i->assertions & 4) && !(tnfa->cflags & 2) && !iswctype((tre_cint_t)prev_c, trans_i-> u.class)) || ((trans_i->assertions & 4) && (tnfa ->cflags & 2) && !iswctype(towlower((tre_cint_t )prev_c),trans_i->u.class) && !iswctype(towupper(( tre_cint_t)prev_c),trans_i->u.class)) || ((trans_i->assertions & 8) && tre_neg_char_classes_match(trans_i->neg_classes ,(tre_cint_t)prev_c, tnfa->cflags & 2))))) | |||
815 | { | |||
816 | continue; | |||
817 | } | |||
818 | ||||
819 | if (next_state == NULL((void*)0)) | |||
820 | { | |||
821 | /* First matching transition. */ | |||
822 | next_state = trans_i->state; | |||
823 | next_tags = trans_i->tags; | |||
824 | } | |||
825 | else | |||
826 | { | |||
827 | /* Second matching transition. We may need to backtrack here | |||
828 | to take this transition instead of the first one, so we | |||
829 | push this transition in the backtracking stack so we can | |||
830 | jump back here if needed. */ | |||
831 | BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0) | |||
832 | trans_i->state_id, next_c, tags, mbstate)do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0); | |||
833 | { | |||
834 | int *tmp; | |||
835 | for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) | |||
836 | stack->item.tags[*tmp] = pos; | |||
837 | } | |||
838 | #if 0 /* XXX - it's important not to look at all transitions here to keep | |||
839 | the stack small! */ | |||
840 | break; | |||
841 | #endif | |||
842 | } | |||
843 | } | |||
844 | } | |||
845 | ||||
846 | if (next_state != NULL((void*)0)) | |||
847 | { | |||
848 | /* Matching transitions were found. Take the first one. */ | |||
849 | state = next_state; | |||
850 | ||||
851 | /* Update the tag values. */ | |||
852 | if (next_tags) | |||
853 | while (*next_tags >= 0) | |||
854 | tags[*next_tags++] = pos; | |||
| ||||
855 | } | |||
856 | else | |||
857 | { | |||
858 | backtrack: | |||
859 | /* A matching transition was not found. Try to backtrack. */ | |||
860 | if (stack->prev) | |||
861 | { | |||
862 | if (stack->item.state->assertions & ASSERT_BACKREF256) | |||
863 | { | |||
864 | states_seen[stack->item.state_id] = 0; | |||
865 | } | |||
866 | ||||
867 | BT_STACK_POP()do { int i; (void)0; pos = stack->item.pos; str_byte = stack ->item.str_byte; state = stack->item.state; next_c = stack ->item.next_c; for (i = 0; i < tnfa->num_tags; i++) tags [i] = stack->item.tags[i]; ; stack = stack->prev; } while (0); | |||
868 | } | |||
869 | else if (match_eo < 0) | |||
870 | { | |||
871 | /* Try starting from a later position in the input string. */ | |||
872 | /* Check for end of string. */ | |||
873 | if (next_c == L'\0') | |||
874 | { | |||
875 | break; | |||
876 | } | |||
877 | next_c = next_c_start; | |||
878 | #ifdef TRE_MBSTATE | |||
879 | mbstate = mbstate_start; | |||
880 | #endif /* TRE_MBSTATE */ | |||
881 | str_byte = str_byte_start; | |||
882 | goto retry; | |||
883 | } | |||
884 | else | |||
885 | { | |||
886 | break; | |||
887 | } | |||
888 | } | |||
889 | } | |||
890 | ||||
891 | ret = match_eo >= 0 ? REG_OK0 : REG_NOMATCH1; | |||
892 | *match_end_ofs = match_eo; | |||
893 | ||||
894 | error_exit: | |||
895 | tre_bt_mem_destroy__tre_mem_destroy(mem); | |||
896 | #ifndef TRE_USE_ALLOCA | |||
897 | if (tags) | |||
898 | xfreefree(tags); | |||
899 | if (pmatch) | |||
900 | xfreefree(pmatch); | |||
901 | if (states_seen) | |||
902 | xfreefree(states_seen); | |||
903 | #endif /* !TRE_USE_ALLOCA */ | |||
904 | ||||
905 | return ret; | |||
906 | } | |||
907 | ||||
908 | /*********************************************************************** | |||
909 | from regexec.c | |||
910 | ***********************************************************************/ | |||
911 | ||||
912 | /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match | |||
913 | endpoint values. */ | |||
914 | static void | |||
915 | tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, | |||
916 | const tre_tnfa_t *tnfa, int *tags, int match_eo) | |||
917 | { | |||
918 | tre_submatch_data_t *submatch_data; | |||
919 | unsigned int i, j; | |||
920 | int *parents; | |||
921 | ||||
922 | i = 0; | |||
923 | if (match_eo >= 0 && !(cflags & REG_NOSUB8)) | |||
924 | { | |||
925 | /* Construct submatch offsets from the tags. */ | |||
926 | submatch_data = tnfa->submatch_data; | |||
927 | while (i < tnfa->num_submatches && i < nmatch) | |||
928 | { | |||
929 | if (submatch_data[i].so_tag == tnfa->end_tag) | |||
930 | pmatch[i].rm_so = match_eo; | |||
931 | else | |||
932 | pmatch[i].rm_so = tags[submatch_data[i].so_tag]; | |||
933 | ||||
934 | if (submatch_data[i].eo_tag == tnfa->end_tag) | |||
935 | pmatch[i].rm_eo = match_eo; | |||
936 | else | |||
937 | pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; | |||
938 | ||||
939 | /* If either of the endpoints were not used, this submatch | |||
940 | was not part of the match. */ | |||
941 | if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) | |||
942 | pmatch[i].rm_so = pmatch[i].rm_eo = -1; | |||
943 | ||||
944 | i++; | |||
945 | } | |||
946 | /* Reset all submatches that are not within all of their parent | |||
947 | submatches. */ | |||
948 | i = 0; | |||
949 | while (i < tnfa->num_submatches && i < nmatch) | |||
950 | { | |||
951 | if (pmatch[i].rm_eo == -1) | |||
952 | assert(pmatch[i].rm_so == -1)(void)0; | |||
953 | assert(pmatch[i].rm_so <= pmatch[i].rm_eo)(void)0; | |||
954 | ||||
955 | parents = submatch_data[i].parents; | |||
956 | if (parents != NULL((void*)0)) | |||
957 | for (j = 0; parents[j] >= 0; j++) | |||
958 | { | |||
959 | if (pmatch[i].rm_so < pmatch[parents[j]].rm_so | |||
960 | || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) | |||
961 | pmatch[i].rm_so = pmatch[i].rm_eo = -1; | |||
962 | } | |||
963 | i++; | |||
964 | } | |||
965 | } | |||
966 | ||||
967 | while (i < nmatch) | |||
968 | { | |||
969 | pmatch[i].rm_so = -1; | |||
970 | pmatch[i].rm_eo = -1; | |||
971 | i++; | |||
972 | } | |||
973 | } | |||
974 | ||||
975 | ||||
976 | /* | |||
977 | Wrapper functions for POSIX compatible regexp matching. | |||
978 | */ | |||
979 | ||||
980 | int | |||
981 | regexec(const regex_t *restrict preg, const char *restrict string, | |||
982 | size_t nmatch, regmatch_t pmatch[restrict], int eflags) | |||
983 | { | |||
984 | tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD__opaque; | |||
985 | reg_errcode_t status; | |||
986 | int *tags = NULL((void*)0), eo; | |||
987 | if (tnfa->cflags & REG_NOSUB8) nmatch = 0; | |||
988 | if (tnfa->num_tags > 0 && nmatch > 0) | |||
989 | { | |||
990 | tags = xmallocmalloc(sizeof(*tags) * tnfa->num_tags); | |||
991 | if (tags == NULL((void*)0)) | |||
992 | return REG_ESPACE12; | |||
993 | } | |||
994 | ||||
995 | /* Dispatch to the appropriate matcher. */ | |||
996 | if (tnfa->have_backrefs) | |||
997 | { | |||
998 | /* The regex has back references, use the backtracking matcher. */ | |||
999 | status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); | |||
1000 | } | |||
1001 | else | |||
1002 | { | |||
1003 | /* Exact matching, no back references, use the parallel matcher. */ | |||
1004 | status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); | |||
1005 | } | |||
1006 | ||||
1007 | if (status == REG_OK0) | |||
1008 | /* A match was found, so fill the submatch registers. */ | |||
1009 | tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); | |||
1010 | if (tags) | |||
1011 | xfreefree(tags); | |||
1012 | return status; | |||
1013 | } |