modules/up/src/Core/gnu/regex.c
/* [<][>][^][v][top][bottom][index][help] */
FUNCTIONS
This source file includes following functions.
- bcmp
- bcopy
- bzero
- init_syntax_once
- SYNTAX
- isascii
- ISBLANK
- ISBLANK
- ISGRAPH
- ISGRAPH
- ISPRINT
- ISDIGIT
- ISALNUM
- ISALPHA
- ISCNTRL
- ISLOWER
- ISPUNCT
- ISSPACE
- ISUPPER
- ISXDIGIT
- SIGN_EXTEND_CHAR
- SIGN_EXTEND_CHAR
- REGEX_REALLOCATE
- REGEX_REALLOCATE
- FIRST_STRING_P
- TALLOC
- RETALLOC
- REGEX_TALLOC
- STREQ
- MAX
- MIN
- STORE_NUMBER
- STORE_NUMBER_AND_INCR
- EXTRACT_NUMBER
- extract_number
- EXTRACT_NUMBER
- EXTRACT_NUMBER_AND_INCR
- extract_number_and_incr
- EXTRACT_NUMBER_AND_INCR
- DEBUG_STATEMENT
- DEBUG_PRINT1
- DEBUG_PRINT2
- DEBUG_PRINT3
- DEBUG_PRINT4
- DEBUG_PRINT_COMPILED_PATTERN
- DEBUG_PRINT_DOUBLE_STRING
- printchar
- print_fastmap
- print_partial_compiled_pattern
- print_compiled_pattern
- print_double_string
- assert
- DEBUG_STATEMENT
- DEBUG_PRINT1
- DEBUG_PRINT2
- DEBUG_PRINT3
- DEBUG_PRINT4
- DEBUG_PRINT_COMPILED_PATTERN
- DEBUG_PRINT_DOUBLE_STRING
- re_set_syntax
- PATFETCH
- PATFETCH_RAW
- TRANSLATE
- GET_BUFFER_SPACE
- BUF_PUSH
- BUF_PUSH_2
- BUF_PUSH_3
- STORE_JUMP
- STORE_JUMP2
- INSERT_JUMP
- INSERT_JUMP2
- EXTEND_BUFFER
- SET_LIST_BIT
- GET_UNSIGNED_NUMBER
- IS_CHAR_CLASS
- regex_compile
- store_op1
- store_op2
- insert_op1
- insert_op2
- at_begline_loc_p
- at_endline_loc_p
- group_in_compile_stack
- compile_range
- FAIL_STACK_EMPTY
- FAIL_STACK_PTR_EMPTY
- FAIL_STACK_FULL
- FAIL_STACK_TOP
- INIT_FAIL_STACK
- DOUBLE_FAIL_STACK
- PUSH_PATTERN_OP
- PUSH_FAILURE_ITEM
- POP_FAILURE_ITEM
- DEBUG_POP
- DEBUG_PUSH
- DEBUG_POP
- PUSH_FAILURE_POINT
- POP_FAILURE_POINT
- re_compile_fastmap
- re_set_registers
- re_search
- re_search_2
- REG_MATCH_NULL_STRING_P
- IS_ACTIVE
- MATCHED_SOMETHING
- EVER_MATCHED_SOMETHING
- SET_REGS_MATCHED
- POINTER_TO_OFFSET
- REG_UNSET
- PREFETCH
- AT_STRINGS_BEG
- AT_STRINGS_END
- WORDCHAR_P
- AT_WORD_BOUNDARY
- FREE_VAR
- FREE_VARIABLES
- FREE_VARIABLES
- re_match
- re_match_2
- group_match_null_string_p
- alt_match_null_string_p
- common_op_match_null_string_p
- bcmp_translate
- re_compile_pattern
- re_comp
- re_exec
- regcomp
- regexec
- regerror
- regfree
1 /* Extended regular expression matching and search library,
2 version 0.12.
3 (Implements POSIX draft P10003.2/D11.2, except for
4 internationalization features.)
5
6 Copyright (C) 1993 Free Software Foundation, Inc.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
21
22 /* AIX requires this to be the first thing in the file. */
23 #if defined (_AIX) && !defined (REGEX_MALLOC)
24 #pragma alloca
25 #endif
26
27 #define _GNU_SOURCE
28
29 /* We need this for `regex.h', and perhaps for the Emacs include files. */
30 #include <sys/types.h>
31
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 /* The `emacs' switch turns on certain matching commands
37 that make sense only in Emacs. */
38 #ifdef emacs
39
40 #include "lisp.h"
41 #include "buffer.h"
42 #include "syntax.h"
43
44 /* Emacs uses `NULL' as a predicate. */
45 #undef NULL
46
47 #else /* not emacs */
48
49 /* We used to test for `BSTRING' here, but only GCC and Emacs define
50 `BSTRING', as far as I know, and neither of them use this code. */
51 #if HAVE_STRING_H || STDC_HEADERS
52 #include <string.h>
53 #ifndef bcmp
54 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
/* [<][>][^][v][top][bottom][index][help] */
55 #endif
56 #ifndef bcopy
57 #define bcopy(s, d, n) memcpy ((d), (s), (n))
/* [<][>][^][v][top][bottom][index][help] */
58 #endif
59 #ifndef bzero
60 #define bzero(s, n) memset ((s), 0, (n))
/* [<][>][^][v][top][bottom][index][help] */
61 #endif
62 #else
63 #include <strings.h>
64 #endif
65
66 #ifdef STDC_HEADERS
67 #include <stdlib.h>
68 #else
69 char *malloc ();
70 char *realloc ();
71 #endif
72
73
74 /* Define the syntax stuff for \<, \>, etc. */
75
76 /* This must be nonzero for the wordchar and notwordchar pattern
77 commands in re_match_2. */
78 #ifndef Sword
79 #define Sword 1
80 #endif
81
82 #ifdef SYNTAX_TABLE
83
84 extern char *re_syntax_table;
85
86 #else /* not SYNTAX_TABLE */
87
88 /* How many characters in the character set. */
89 #define CHAR_SET_SIZE 256
90
91 static char re_syntax_table[CHAR_SET_SIZE];
92
93 static void
94 init_syntax_once ()
/* [<][>][^][v][top][bottom][index][help] */
95 {
96 register int c;
97 static int done = 0;
98
99 if (done)
100 return;
101
102 bzero (re_syntax_table, sizeof re_syntax_table);
103
104 for (c = 'a'; c <= 'z'; c++)
105 re_syntax_table[c] = Sword;
106
107 for (c = 'A'; c <= 'Z'; c++)
108 re_syntax_table[c] = Sword;
109
110 for (c = '0'; c <= '9'; c++)
111 re_syntax_table[c] = Sword;
112
113 re_syntax_table['_'] = Sword;
114
115 done = 1;
116 }
117
118 #endif /* not SYNTAX_TABLE */
119
120 #define SYNTAX(c) re_syntax_table[c]
/* [<][>][^][v][top][bottom][index][help] */
121
122 #endif /* not emacs */
123
124 /* Get the interface, including the syntax bits. */
125 #include "regex.h"
126
127 /* isalpha etc. are used for the character classes. */
128 #include <ctype.h>
129
130 #ifndef isascii
131 #define isascii(c) 1
/* [<][>][^][v][top][bottom][index][help] */
132 #endif
133
134 #ifdef isblank
135 #define ISBLANK(c) (isascii (c) && isblank (c))
/* [<][>][^][v][top][bottom][index][help] */
136 #else
137 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
/* [<][>][^][v][top][bottom][index][help] */
138 #endif
139 #ifdef isgraph
140 #define ISGRAPH(c) (isascii (c) && isgraph (c))
/* [<][>][^][v][top][bottom][index][help] */
141 #else
142 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
/* [<][>][^][v][top][bottom][index][help] */
143 #endif
144
145 #define ISPRINT(c) (isascii (c) && isprint (c))
/* [<][>][^][v][top][bottom][index][help] */
146 #define ISDIGIT(c) (isascii (c) && isdigit (c))
/* [<][>][^][v][top][bottom][index][help] */
147 #define ISALNUM(c) (isascii (c) && isalnum (c))
/* [<][>][^][v][top][bottom][index][help] */
148 #define ISALPHA(c) (isascii (c) && isalpha (c))
/* [<][>][^][v][top][bottom][index][help] */
149 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
/* [<][>][^][v][top][bottom][index][help] */
150 #define ISLOWER(c) (isascii (c) && islower (c))
/* [<][>][^][v][top][bottom][index][help] */
151 #define ISPUNCT(c) (isascii (c) && ispunct (c))
/* [<][>][^][v][top][bottom][index][help] */
152 #define ISSPACE(c) (isascii (c) && isspace (c))
/* [<][>][^][v][top][bottom][index][help] */
153 #define ISUPPER(c) (isascii (c) && isupper (c))
/* [<][>][^][v][top][bottom][index][help] */
154 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
/* [<][>][^][v][top][bottom][index][help] */
155
156 #ifndef NULL
157 #define NULL 0
158 #endif
159
160 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
161 since ours (we hope) works properly with all combinations of
162 machines, compilers, `char' and `unsigned char' argument types.
163 (Per Bothner suggested the basic approach.) */
164 #undef SIGN_EXTEND_CHAR
165 #if __STDC__
166 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
/* [<][>][^][v][top][bottom][index][help] */
167 #else /* not __STDC__ */
168 /* As in Harbison and Steele. */
169 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
/* [<][>][^][v][top][bottom][index][help] */
170 #endif
171
172 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
173 use `alloca' instead of `malloc'. This is because using malloc in
174 re_search* or re_match* could cause memory leaks when C-g is used in
175 Emacs; also, malloc is slower and causes storage fragmentation. On
176 the other hand, malloc is more portable, and easier to debug.
177
178 Because we sometimes use alloca, some routines have to be macros,
179 not functions -- `alloca'-allocated space disappears at the end of the
180 function it is called in. */
181
182 #ifdef REGEX_MALLOC
183
184 #define REGEX_ALLOCATE malloc
185 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
/* [<][>][^][v][top][bottom][index][help] */
186
187 #else /* not REGEX_MALLOC */
188
189 /* Emacs already defines alloca, sometimes. */
190 #ifndef alloca
191
192 /* Make alloca work the best possible way. */
193 #ifdef __GNUC__
194 #define alloca __builtin_alloca
195 #else /* not __GNUC__ */
196 #if HAVE_ALLOCA_H
197 #include <alloca.h>
198 #else /* not __GNUC__ or HAVE_ALLOCA_H */
199 #ifndef _AIX /* Already did AIX, up at the top. */
200 char *alloca ();
201 #endif /* not _AIX */
202 #endif /* not HAVE_ALLOCA_H */
203 #endif /* not __GNUC__ */
204
205 #endif /* not alloca */
206
207 #define REGEX_ALLOCATE alloca
208
209 /* Assumes a `char *destination' variable. */
210 #define REGEX_REALLOCATE(source, osize, nsize) \
/* [<][>][^][v][top][bottom][index][help] */
211 (destination = (char *) alloca (nsize), \
212 bcopy (source, destination, osize), \
213 destination)
214
215 #endif /* not REGEX_MALLOC */
216
217
218 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
219 `string1' or just past its end. This works if PTR is NULL, which is
220 a good thing. */
221 #define FIRST_STRING_P(ptr) \
/* [<][>][^][v][top][bottom][index][help] */
222 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
223
224 /* (Re)Allocate N items of type T using malloc, or fail. */
225 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
/* [<][>][^][v][top][bottom][index][help] */
226 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
/* [<][>][^][v][top][bottom][index][help] */
227 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
/* [<][>][^][v][top][bottom][index][help] */
228
229 #define BYTEWIDTH 8 /* In bits. */
230
231 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
/* [<][>][^][v][top][bottom][index][help] */
232
233 #define MAX(a, b) ((a) > (b) ? (a) : (b))
/* [<][>][^][v][top][bottom][index][help] */
234 #define MIN(a, b) ((a) < (b) ? (a) : (b))
/* [<][>][^][v][top][bottom][index][help] */
235
236 typedef char boolean;
237 #define false 0
238 #define true 1
239
240 /* These are the command codes that appear in compiled regular
241 expressions. Some opcodes are followed by argument bytes. A
242 command code can specify any interpretation whatsoever for its
243 arguments. Zero bytes may appear in the compiled regular expression.
244
245 The value of `exactn' is needed in search.c (search_buffer) in Emacs.
246 So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
247 `exactn' we use here must also be 1. */
248
249 typedef enum
250 {
251 no_op = 0,
252
253 /* Followed by one byte giving n, then by n literal bytes. */
254 exactn = 1,
255
256 /* Matches any (more or less) character. */
257 anychar,
258
259 /* Matches any one char belonging to specified set. First
260 following byte is number of bitmap bytes. Then come bytes
261 for a bitmap saying which chars are in. Bits in each byte
262 are ordered low-bit-first. A character is in the set if its
263 bit is 1. A character too large to have a bit in the map is
264 automatically not in the set. */
265 charset,
266
267 /* Same parameters as charset, but match any character that is
268 not one of those specified. */
269 charset_not,
270
271 /* Start remembering the text that is matched, for storing in a
272 register. Followed by one byte with the register number, in
273 the range 0 to one less than the pattern buffer's re_nsub
274 field. Then followed by one byte with the number of groups
275 inner to this one. (This last has to be part of the
276 start_memory only because we need it in the on_failure_jump
277 of re_match_2.) */
278 start_memory,
279
280 /* Stop remembering the text that is matched and store it in a
281 memory register. Followed by one byte with the register
282 number, in the range 0 to one less than `re_nsub' in the
283 pattern buffer, and one byte with the number of inner groups,
284 just like `start_memory'. (We need the number of inner
285 groups here because we don't have any easy way of finding the
286 corresponding start_memory when we're at a stop_memory.) */
287 stop_memory,
288
289 /* Match a duplicate of something remembered. Followed by one
290 byte containing the register number. */
291 duplicate,
292
293 /* Fail unless at beginning of line. */
294 begline,
295
296 /* Fail unless at end of line. */
297 endline,
298
299 /* Succeeds if at beginning of buffer (if emacs) or at beginning
300 of string to be matched (if not). */
301 begbuf,
302
303 /* Analogously, for end of buffer/string. */
304 endbuf,
305
306 /* Followed by two byte relative address to which to jump. */
307 jump,
308
309 /* Same as jump, but marks the end of an alternative. */
310 jump_past_alt,
311
312 /* Followed by two-byte relative address of place to resume at
313 in case of failure. */
314 on_failure_jump,
315
316 /* Like on_failure_jump, but pushes a placeholder instead of the
317 current string position when executed. */
318 on_failure_keep_string_jump,
319
320 /* Throw away latest failure point and then jump to following
321 two-byte relative address. */
322 pop_failure_jump,
323
324 /* Change to pop_failure_jump if know won't have to backtrack to
325 match; otherwise change to jump. This is used to jump
326 back to the beginning of a repeat. If what follows this jump
327 clearly won't match what the repeat does, such that we can be
328 sure that there is no use backtracking out of repetitions
329 already matched, then we change it to a pop_failure_jump.
330 Followed by two-byte address. */
331 maybe_pop_jump,
332
333 /* Jump to following two-byte address, and push a dummy failure
334 point. This failure point will be thrown away if an attempt
335 is made to use it for a failure. A `+' construct makes this
336 before the first repeat. Also used as an intermediary kind
337 of jump when compiling an alternative. */
338 dummy_failure_jump,
339
340 /* Push a dummy failure point and continue. Used at the end of
341 alternatives. */
342 push_dummy_failure,
343
344 /* Followed by two-byte relative address and two-byte number n.
345 After matching N times, jump to the address upon failure. */
346 succeed_n,
347
348 /* Followed by two-byte relative address, and two-byte number n.
349 Jump to the address N times, then fail. */
350 jump_n,
351
352 /* Set the following two-byte relative address to the
353 subsequent two-byte number. The address *includes* the two
354 bytes of number. */
355 set_number_at,
356
357 wordchar, /* Matches any word-constituent character. */
358 notwordchar, /* Matches any char that is not a word-constituent. */
359
360 wordbeg, /* Succeeds if at word beginning. */
361 wordend, /* Succeeds if at word end. */
362
363 wordbound, /* Succeeds if at a word boundary. */
364 notwordbound /* Succeeds if not at a word boundary. */
365
366 #ifdef emacs
367 ,before_dot, /* Succeeds if before point. */
368 at_dot, /* Succeeds if at point. */
369 after_dot, /* Succeeds if after point. */
370
371 /* Matches any character whose syntax is specified. Followed by
372 a byte which contains a syntax code, e.g., Sword. */
373 syntaxspec,
374
375 /* Matches any character whose syntax is not that specified. */
376 notsyntaxspec
377 #endif /* emacs */
378 } re_opcode_t;
379
380 /* Common operations on the compiled pattern. */
381
382 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
383
384 #define STORE_NUMBER(destination, number) \
/* [<][>][^][v][top][bottom][index][help] */
385 do { \
386 (destination)[0] = (number) & 0377; \
387 (destination)[1] = (number) >> 8; \
388 } while (0)
389
390 /* Same as STORE_NUMBER, except increment DESTINATION to
391 the byte after where the number is stored. Therefore, DESTINATION
392 must be an lvalue. */
393
394 #define STORE_NUMBER_AND_INCR(destination, number) \
/* [<][>][^][v][top][bottom][index][help] */
395 do { \
396 STORE_NUMBER (destination, number); \
397 (destination) += 2; \
398 } while (0)
399
400 /* Put into DESTINATION a number stored in two contiguous bytes starting
401 at SOURCE. */
402
403 #define EXTRACT_NUMBER(destination, source) \
/* [<][>][^][v][top][bottom][index][help] */
404 do { \
405 (destination) = *(source) & 0377; \
406 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
407 } while (0)
408
409 #ifdef DEBUG
410 static void
411 extract_number (dest, source)
/* [<][>][^][v][top][bottom][index][help] */
412 int *dest;
413 unsigned char *source;
414 {
415 int temp = SIGN_EXTEND_CHAR (*(source + 1));
416 *dest = *source & 0377;
417 *dest += temp << 8;
418 }
419
420 #ifndef EXTRACT_MACROS /* To debug the macros. */
421 #undef EXTRACT_NUMBER
422 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
/* [<][>][^][v][top][bottom][index][help] */
423 #endif /* not EXTRACT_MACROS */
424
425 #endif /* DEBUG */
426
427 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
428 SOURCE must be an lvalue. */
429
430 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
/* [<][>][^][v][top][bottom][index][help] */
431 do { \
432 EXTRACT_NUMBER (destination, source); \
433 (source) += 2; \
434 } while (0)
435
436 #ifdef DEBUG
437 static void
438 extract_number_and_incr (destination, source)
/* [<][>][^][v][top][bottom][index][help] */
439 int *destination;
440 unsigned char **source;
441 {
442 extract_number (destination, *source);
443 *source += 2;
444 }
445
446 #ifndef EXTRACT_MACROS
447 #undef EXTRACT_NUMBER_AND_INCR
448 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
/* [<][>][^][v][top][bottom][index][help] */
449 extract_number_and_incr (&dest, &src)
450 #endif /* not EXTRACT_MACROS */
451
452 #endif /* DEBUG */
453
454 /* If DEBUG is defined, Regex prints many voluminous messages about what
455 it is doing (if the variable `debug' is nonzero). If linked with the
456 main program in `iregex.c', you can enter patterns and strings
457 interactively. And if linked with the main program in `main.c' and
458 the other test files, you can run the already-written tests. */
459
460 #ifdef DEBUG
461
462 /* We use standard I/O for debugging. */
463 #include <stdio.h>
464
465 /* It is useful to test things that ``must'' be true when debugging. */
466 #include <assert.h>
467
468 static int debug = 0;
469
470 #define DEBUG_STATEMENT(e) e
/* [<][>][^][v][top][bottom][index][help] */
471 #define DEBUG_PRINT1(x) if (debug) printf (x)
/* [<][>][^][v][top][bottom][index][help] */
472 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
/* [<][>][^][v][top][bottom][index][help] */
473 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
/* [<][>][^][v][top][bottom][index][help] */
474 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
/* [<][>][^][v][top][bottom][index][help] */
475 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
/* [<][>][^][v][top][bottom][index][help] */
476 if (debug) print_partial_compiled_pattern (s, e)
477 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
/* [<][>][^][v][top][bottom][index][help] */
478 if (debug) print_double_string (w, s1, sz1, s2, sz2)
479
480 // Commented out by wlee@isi.edu
481 //extern void printchar ();
482
483 // Instead, put the following in
484 static void
485 printchar (c)
/* [<][>][^][v][top][bottom][index][help] */
486 char c;
487 {
488 if (c < 040 || c >= 0177)
489 {
490 putchar ('\\');
491 putchar (((c >> 6) & 3) + '0');
492 putchar (((c >> 3) & 7) + '0');
493 putchar ((c & 7) + '0');
494 }
495 else
496 putchar (c);
497 }
498
499
500 /* Print the fastmap in human-readable form. */
501
502 void
503 print_fastmap (fastmap)
/* [<][>][^][v][top][bottom][index][help] */
504 char *fastmap;
505 {
506 unsigned was_a_range = 0;
507 unsigned i = 0;
508
509 while (i < (1 << BYTEWIDTH))
510 {
511 if (fastmap[i++])
512 {
513 was_a_range = 0;
514 printchar (i - 1);
515 while (i < (1 << BYTEWIDTH) && fastmap[i])
516 {
517 was_a_range = 1;
518 i++;
519 }
520 if (was_a_range)
521 {
522 printf ("-");
523 printchar (i - 1);
524 }
525 }
526 }
527 putchar ('\n');
528 }
529
530
531 /* Print a compiled pattern string in human-readable form, starting at
532 the START pointer into it and ending just before the pointer END. */
533
534 void
535 print_partial_compiled_pattern (start, end)
/* [<][>][^][v][top][bottom][index][help] */
536 unsigned char *start;
537 unsigned char *end;
538 {
539 int mcnt, mcnt2;
540 unsigned char *p = start;
541 unsigned char *pend = end;
542
543 if (start == NULL)
544 {
545 printf ("(null)\n");
546 return;
547 }
548
549 /* Loop over pattern commands. */
550 while (p < pend)
551 {
552 switch ((re_opcode_t) *p++)
553 {
554 case no_op:
555 printf ("/no_op");
556 break;
557
558 case exactn:
559 mcnt = *p++;
560 printf ("/exactn/%d", mcnt);
561 do
562 {
563 putchar ('/');
564 printchar (*p++);
565 }
566 while (--mcnt);
567 break;
568
569 case start_memory:
570 mcnt = *p++;
571 printf ("/start_memory/%d/%d", mcnt, *p++);
572 break;
573
574 case stop_memory:
575 mcnt = *p++;
576 printf ("/stop_memory/%d/%d", mcnt, *p++);
577 break;
578
579 case duplicate:
580 printf ("/duplicate/%d", *p++);
581 break;
582
583 case anychar:
584 printf ("/anychar");
585 break;
586
587 case charset:
588 case charset_not:
589 {
590 register int c;
591
592 printf ("/charset%s",
593 (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
594
595 assert (p + *p < pend);
596
597 for (c = 0; c < *p; c++)
598 {
599 unsigned bit;
600 unsigned char map_byte = p[1 + c];
601
602 putchar ('/');
603
604 for (bit = 0; bit < BYTEWIDTH; bit++)
605 if (map_byte & (1 << bit))
606 printchar (c * BYTEWIDTH + bit);
607 }
608 p += 1 + *p;
609 break;
610 }
611
612 case begline:
613 printf ("/begline");
614 break;
615
616 case endline:
617 printf ("/endline");
618 break;
619
620 case on_failure_jump:
621 extract_number_and_incr (&mcnt, &p);
622 printf ("/on_failure_jump/0/%d", mcnt);
623 break;
624
625 case on_failure_keep_string_jump:
626 extract_number_and_incr (&mcnt, &p);
627 printf ("/on_failure_keep_string_jump/0/%d", mcnt);
628 break;
629
630 case dummy_failure_jump:
631 extract_number_and_incr (&mcnt, &p);
632 printf ("/dummy_failure_jump/0/%d", mcnt);
633 break;
634
635 case push_dummy_failure:
636 printf ("/push_dummy_failure");
637 break;
638
639 case maybe_pop_jump:
640 extract_number_and_incr (&mcnt, &p);
641 printf ("/maybe_pop_jump/0/%d", mcnt);
642 break;
643
644 case pop_failure_jump:
645 extract_number_and_incr (&mcnt, &p);
646 printf ("/pop_failure_jump/0/%d", mcnt);
647 break;
648
649 case jump_past_alt:
650 extract_number_and_incr (&mcnt, &p);
651 printf ("/jump_past_alt/0/%d", mcnt);
652 break;
653
654 case jump:
655 extract_number_and_incr (&mcnt, &p);
656 printf ("/jump/0/%d", mcnt);
657 break;
658
659 case succeed_n:
660 extract_number_and_incr (&mcnt, &p);
661 extract_number_and_incr (&mcnt2, &p);
662 printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
663 break;
664
665 case jump_n:
666 extract_number_and_incr (&mcnt, &p);
667 extract_number_and_incr (&mcnt2, &p);
668 printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
669 break;
670
671 case set_number_at:
672 extract_number_and_incr (&mcnt, &p);
673 extract_number_and_incr (&mcnt2, &p);
674 printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
675 break;
676
677 case wordbound:
678 printf ("/wordbound");
679 break;
680
681 case notwordbound:
682 printf ("/notwordbound");
683 break;
684
685 case wordbeg:
686 printf ("/wordbeg");
687 break;
688
689 case wordend:
690 printf ("/wordend");
691
692 #ifdef emacs
693 case before_dot:
694 printf ("/before_dot");
695 break;
696
697 case at_dot:
698 printf ("/at_dot");
699 break;
700
701 case after_dot:
702 printf ("/after_dot");
703 break;
704
705 case syntaxspec:
706 printf ("/syntaxspec");
707 mcnt = *p++;
708 printf ("/%d", mcnt);
709 break;
710
711 case notsyntaxspec:
712 printf ("/notsyntaxspec");
713 mcnt = *p++;
714 printf ("/%d", mcnt);
715 break;
716 #endif /* emacs */
717
718 case wordchar:
719 printf ("/wordchar");
720 break;
721
722 case notwordchar:
723 printf ("/notwordchar");
724 break;
725
726 case begbuf:
727 printf ("/begbuf");
728 break;
729
730 case endbuf:
731 printf ("/endbuf");
732 break;
733
734 default:
735 printf ("?%d", *(p-1));
736 }
737 }
738 printf ("/\n");
739 }
740
741
742 void
743 print_compiled_pattern (bufp)
/* [<][>][^][v][top][bottom][index][help] */
744 struct re_pattern_buffer *bufp;
745 {
746 unsigned char *buffer = bufp->buffer;
747
748 print_partial_compiled_pattern (buffer, buffer + bufp->used);
749 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
750
751 if (bufp->fastmap_accurate && bufp->fastmap)
752 {
753 printf ("fastmap: ");
754 print_fastmap (bufp->fastmap);
755 }
756
757 printf ("re_nsub: %d\t", bufp->re_nsub);
758 printf ("regs_alloc: %d\t", bufp->regs_allocated);
759 printf ("can_be_null: %d\t", bufp->can_be_null);
760 printf ("newline_anchor: %d\n", bufp->newline_anchor);
761 printf ("no_sub: %d\t", bufp->no_sub);
762 printf ("not_bol: %d\t", bufp->not_bol);
763 printf ("not_eol: %d\t", bufp->not_eol);
764 printf ("syntax: %d\n", bufp->syntax);
765 /* Perhaps we should print the translate table? */
766 }
767
768
769 void
770 print_double_string (where, string1, size1, string2, size2)
/* [<][>][^][v][top][bottom][index][help] */
771 const char *where;
772 const char *string1;
773 const char *string2;
774 int size1;
775 int size2;
776 {
777 unsigned this_char;
778
779 if (where == NULL)
780 printf ("(null)");
781 else
782 {
783 if (FIRST_STRING_P (where))
784 {
785 for (this_char = where - string1; this_char < size1; this_char++)
786 printchar (string1[this_char]);
787
788 where = string2;
789 }
790
791 for (this_char = where - string2; this_char < size2; this_char++)
792 printchar (string2[this_char]);
793 }
794 }
795
796 #else /* not DEBUG */
797
798 #undef assert
799 #define assert(e)
/* [<][>][^][v][top][bottom][index][help] */
800
801 #define DEBUG_STATEMENT(e)
/* [<][>][^][v][top][bottom][index][help] */
802 #define DEBUG_PRINT1(x)
/* [<][>][^][v][top][bottom][index][help] */
803 #define DEBUG_PRINT2(x1, x2)
/* [<][>][^][v][top][bottom][index][help] */
804 #define DEBUG_PRINT3(x1, x2, x3)
/* [<][>][^][v][top][bottom][index][help] */
805 #define DEBUG_PRINT4(x1, x2, x3, x4)
/* [<][>][^][v][top][bottom][index][help] */
806 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
/* [<][>][^][v][top][bottom][index][help] */
807 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
/* [<][>][^][v][top][bottom][index][help] */
808
809 #endif /* not DEBUG */
810
811 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
812 also be assigned to arbitrarily: each pattern buffer stores its own
813 syntax, so it can be changed between regex compilations. */
814 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
815
816
817 /* Specify the precise syntax of regexps for compilation. This provides
818 for compatibility for various utilities which historically have
819 different, incompatible syntaxes.
820
821 The argument SYNTAX is a bit mask comprised of the various bits
822 defined in regex.h. We return the old syntax. */
823
824 reg_syntax_t
825 re_set_syntax (syntax)
/* [<][>][^][v][top][bottom][index][help] */
826 reg_syntax_t syntax;
827 {
828 reg_syntax_t ret = re_syntax_options;
829
830 re_syntax_options = syntax;
831 return ret;
832 }
833
834 /* This table gives an error message for each of the error codes listed
835 in regex.h. Obviously the order here has to be same as there. */
836
837 static const char *re_error_msg[] =
838 { NULL, /* REG_NOERROR */
839 "No match", /* REG_NOMATCH */
840 "Invalid regular expression", /* REG_BADPAT */
841 "Invalid collation character", /* REG_ECOLLATE */
842 "Invalid character class name", /* REG_ECTYPE */
843 "Trailing backslash", /* REG_EESCAPE */
844 "Invalid back reference", /* REG_ESUBREG */
845 "Unmatched [ or [^", /* REG_EBRACK */
846 "Unmatched ( or \\(", /* REG_EPAREN */
847 "Unmatched \\{", /* REG_EBRACE */
848 "Invalid content of \\{\\}", /* REG_BADBR */
849 "Invalid range end", /* REG_ERANGE */
850 "Memory exhausted", /* REG_ESPACE */
851 "Invalid preceding regular expression", /* REG_BADRPT */
852 "Premature end of regular expression", /* REG_EEND */
853 "Regular expression too big", /* REG_ESIZE */
854 "Unmatched ) or \\)", /* REG_ERPAREN */
855 };
856
857 /* Subroutine declarations and macros for regex_compile. */
858
859 static void store_op1 (), store_op2 ();
860 static void insert_op1 (), insert_op2 ();
861 static boolean at_begline_loc_p (), at_endline_loc_p ();
862 static boolean group_in_compile_stack ();
863 static reg_errcode_t compile_range ();
864
865 /* Fetch the next character in the uncompiled pattern---translating it
866 if necessary. Also cast from a signed character in the constant
867 string passed to us by the user to an unsigned char that we can use
868 as an array index (in, e.g., `translate'). */
869 #define PATFETCH(c) \
/* [<][>][^][v][top][bottom][index][help] */
870 do {if (p == pend) return REG_EEND; \
871 c = (unsigned char) *p++; \
872 if (translate) c = translate[c]; \
873 } while (0)
874
875 /* Fetch the next character in the uncompiled pattern, with no
876 translation. */
877 #define PATFETCH_RAW(c) \
/* [<][>][^][v][top][bottom][index][help] */
878 do {if (p == pend) return REG_EEND; \
879 c = (unsigned char) *p++; \
880 } while (0)
881
882 /* Go backwards one character in the pattern. */
883 #define PATUNFETCH p--
884
885
886 /* If `translate' is non-null, return translate[D], else just D. We
887 cast the subscript to translate because some data is declared as
888 `char *', to avoid warnings when a string constant is passed. But
889 when we use a character as a subscript we must make it unsigned. */
890 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
/* [<][>][^][v][top][bottom][index][help] */
891
892
893 /* Macros for outputting the compiled pattern into `buffer'. */
894
895 /* If the buffer isn't allocated when it comes in, use this. */
896 #define INIT_BUF_SIZE 32
897
898 /* Make sure we have at least N more bytes of space in buffer. */
899 #define GET_BUFFER_SPACE(n) \
/* [<][>][^][v][top][bottom][index][help] */
900 while (b - bufp->buffer + (n) > bufp->allocated) \
901 EXTEND_BUFFER ()
902
903 /* Make sure we have one more byte of buffer space and then add C to it. */
904 #define BUF_PUSH(c) \
/* [<][>][^][v][top][bottom][index][help] */
905 do { \
906 GET_BUFFER_SPACE (1); \
907 *b++ = (unsigned char) (c); \
908 } while (0)
909
910
911 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
912 #define BUF_PUSH_2(c1, c2) \
/* [<][>][^][v][top][bottom][index][help] */
913 do { \
914 GET_BUFFER_SPACE (2); \
915 *b++ = (unsigned char) (c1); \
916 *b++ = (unsigned char) (c2); \
917 } while (0)
918
919
920 /* As with BUF_PUSH_2, except for three bytes. */
921 #define BUF_PUSH_3(c1, c2, c3) \
/* [<][>][^][v][top][bottom][index][help] */
922 do { \
923 GET_BUFFER_SPACE (3); \
924 *b++ = (unsigned char) (c1); \
925 *b++ = (unsigned char) (c2); \
926 *b++ = (unsigned char) (c3); \
927 } while (0)
928
929
930 /* Store a jump with opcode OP at LOC to location TO. We store a
931 relative address offset by the three bytes the jump itself occupies. */
932 #define STORE_JUMP(op, loc, to) \
/* [<][>][^][v][top][bottom][index][help] */
933 store_op1 (op, loc, (to) - (loc) - 3)
934
935 /* Likewise, for a two-argument jump. */
936 #define STORE_JUMP2(op, loc, to, arg) \
/* [<][>][^][v][top][bottom][index][help] */
937 store_op2 (op, loc, (to) - (loc) - 3, arg)
938
939 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
940 #define INSERT_JUMP(op, loc, to) \
/* [<][>][^][v][top][bottom][index][help] */
941 insert_op1 (op, loc, (to) - (loc) - 3, b)
942
943 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
944 #define INSERT_JUMP2(op, loc, to, arg) \
/* [<][>][^][v][top][bottom][index][help] */
945 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
946
947
948 /* This is not an arbitrary limit: the arguments which represent offsets
949 into the pattern are two bytes long. So if 2^16 bytes turns out to
950 be too small, many things would have to change. */
951 #define MAX_BUF_SIZE (1L << 16)
952
953
954 /* Extend the buffer by twice its current size via realloc and
955 reset the pointers that pointed into the old block to point to the
956 correct places in the new one. If extending the buffer results in it
957 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
958 #define EXTEND_BUFFER() \
/* [<][>][^][v][top][bottom][index][help] */
959 do { \
960 unsigned char *old_buffer = bufp->buffer; \
961 if (bufp->allocated == MAX_BUF_SIZE) \
962 return REG_ESIZE; \
963 bufp->allocated <<= 1; \
964 if (bufp->allocated > MAX_BUF_SIZE) \
965 bufp->allocated = MAX_BUF_SIZE; \
966 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
967 if (bufp->buffer == NULL) \
968 return REG_ESPACE; \
969 /* If the buffer moved, move all the pointers into it. */ \
970 if (old_buffer != bufp->buffer) \
971 { \
972 b = (b - old_buffer) + bufp->buffer; \
973 begalt = (begalt - old_buffer) + bufp->buffer; \
974 if (fixup_alt_jump) \
975 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
976 if (laststart) \
977 laststart = (laststart - old_buffer) + bufp->buffer; \
978 if (pending_exact) \
979 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
980 } \
981 } while (0)
982
983
984 /* Since we have one byte reserved for the register number argument to
985 {start,stop}_memory, the maximum number of groups we can report
986 things about is what fits in that byte. */
987 #define MAX_REGNUM 255
988
989 /* But patterns can have more than `MAX_REGNUM' registers. We just
990 ignore the excess. */
991 typedef unsigned regnum_t;
992
993
994 /* Macros for the compile stack. */
995
996 /* Since offsets can go either forwards or backwards, this type needs to
997 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
998 typedef int pattern_offset_t;
999
1000 typedef struct
1001 {
1002 pattern_offset_t begalt_offset;
1003 pattern_offset_t fixup_alt_jump;
1004 pattern_offset_t inner_group_offset;
1005 pattern_offset_t laststart_offset;
1006 regnum_t regnum;
1007 } compile_stack_elt_t;
1008
1009
1010 typedef struct
1011 {
1012 compile_stack_elt_t *stack;
1013 unsigned size;
1014 unsigned avail; /* Offset of next open position. */
1015 } compile_stack_type;
1016
1017
1018 #define INIT_COMPILE_STACK_SIZE 32
1019
1020 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1021 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1022
1023 /* The next available element. */
1024 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1025
1026
1027 /* Set the bit for character C in a list. */
1028 #define SET_LIST_BIT(c) \
/* [<][>][^][v][top][bottom][index][help] */
1029 (b[((unsigned char) (c)) / BYTEWIDTH] \
1030 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1031
1032
1033 /* Get the next unsigned number in the uncompiled pattern. */
1034 #define GET_UNSIGNED_NUMBER(num) \
/* [<][>][^][v][top][bottom][index][help] */
1035 { if (p != pend) \
1036 { \
1037 PATFETCH (c); \
1038 while (ISDIGIT (c)) \
1039 { \
1040 if (num < 0) \
1041 num = 0; \
1042 num = num * 10 + c - '0'; \
1043 if (p == pend) \
1044 break; \
1045 PATFETCH (c); \
1046 } \
1047 } \
1048 }
1049
1050 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1051
1052 #define IS_CHAR_CLASS(string) \
/* [<][>][^][v][top][bottom][index][help] */
1053 (STREQ (string, "alpha") || STREQ (string, "upper") \
1054 || STREQ (string, "lower") || STREQ (string, "digit") \
1055 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1056 || STREQ (string, "space") || STREQ (string, "print") \
1057 || STREQ (string, "punct") || STREQ (string, "graph") \
1058 || STREQ (string, "cntrl") || STREQ (string, "blank"))
1059
1060 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1061 Returns one of error codes defined in `regex.h', or zero for success.
1062
1063 Assumes the `allocated' (and perhaps `buffer') and `translate'
1064 fields are set in BUFP on entry.
1065
1066 If it succeeds, results are put in BUFP (if it returns an error, the
1067 contents of BUFP are undefined):
1068 `buffer' is the compiled pattern;
1069 `syntax' is set to SYNTAX;
1070 `used' is set to the length of the compiled pattern;
1071 `fastmap_accurate' is zero;
1072 `re_nsub' is the number of subexpressions in PATTERN;
1073 `not_bol' and `not_eol' are zero;
1074
1075 The `fastmap' and `newline_anchor' fields are neither
1076 examined nor set. */
1077
1078 static reg_errcode_t
1079 regex_compile (pattern, size, syntax, bufp)
/* [<][>][^][v][top][bottom][index][help] */
1080 const char *pattern;
1081 int size;
1082 reg_syntax_t syntax;
1083 struct re_pattern_buffer *bufp;
1084 {
1085 /* We fetch characters from PATTERN here. Even though PATTERN is
1086 `char *' (i.e., signed), we declare these variables as unsigned, so
1087 they can be reliably used as array indices. */
1088 register unsigned char c, c1;
1089
1090 /* A random tempory spot in PATTERN. */
1091 const char *p1;
1092
1093 /* Points to the end of the buffer, where we should append. */
1094 register unsigned char *b;
1095
1096 /* Keeps track of unclosed groups. */
1097 compile_stack_type compile_stack;
1098
1099 /* Points to the current (ending) position in the pattern. */
1100 const char *p = pattern;
1101 const char *pend = pattern + size;
1102
1103 /* How to translate the characters in the pattern. */
1104 char *translate = bufp->translate;
1105
1106 /* Address of the count-byte of the most recently inserted `exactn'
1107 command. This makes it possible to tell if a new exact-match
1108 character can be added to that command or if the character requires
1109 a new `exactn' command. */
1110 unsigned char *pending_exact = 0;
1111
1112 /* Address of start of the most recently finished expression.
1113 This tells, e.g., postfix * where to find the start of its
1114 operand. Reset at the beginning of groups and alternatives. */
1115 unsigned char *laststart = 0;
1116
1117 /* Address of beginning of regexp, or inside of last group. */
1118 unsigned char *begalt;
1119
1120 /* Place in the uncompiled pattern (i.e., the {) to
1121 which to go back if the interval is invalid. */
1122 const char *beg_interval;
1123
1124 /* Address of the place where a forward jump should go to the end of
1125 the containing expression. Each alternative of an `or' -- except the
1126 last -- ends with a forward jump of this sort. */
1127 unsigned char *fixup_alt_jump = 0;
1128
1129 /* Counts open-groups as they are encountered. Remembered for the
1130 matching close-group on the compile stack, so the same register
1131 number is put in the stop_memory as the start_memory. */
1132 regnum_t regnum = 0;
1133
1134 #ifdef DEBUG
1135 DEBUG_PRINT1 ("\nCompiling pattern: ");
1136 if (debug)
1137 {
1138 unsigned debug_count;
1139
1140 for (debug_count = 0; debug_count < size; debug_count++)
1141 printchar (pattern[debug_count]);
1142 putchar ('\n');
1143 }
1144 #endif /* DEBUG */
1145
1146 /* Initialize the compile stack. */
1147 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1148 if (compile_stack.stack == NULL)
1149 return REG_ESPACE;
1150
1151 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1152 compile_stack.avail = 0;
1153
1154 /* Initialize the pattern buffer. */
1155 bufp->syntax = syntax;
1156 bufp->fastmap_accurate = 0;
1157 bufp->not_bol = bufp->not_eol = 0;
1158
1159 /* Set `used' to zero, so that if we return an error, the pattern
1160 printer (for debugging) will think there's no pattern. We reset it
1161 at the end. */
1162 bufp->used = 0;
1163
1164 /* Always count groups, whether or not bufp->no_sub is set. */
1165 bufp->re_nsub = 0;
1166
1167 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1168 /* Initialize the syntax table. */
1169 init_syntax_once ();
1170 #endif
1171
1172 if (bufp->allocated == 0)
1173 {
1174 if (bufp->buffer)
1175 { /* If zero allocated, but buffer is non-null, try to realloc
1176 enough space. This loses if buffer's address is bogus, but
1177 that is the user's responsibility. */
1178 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1179 }
1180 else
1181 { /* Caller did not allocate a buffer. Do it for them. */
1182 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1183 }
1184 if (!bufp->buffer) return REG_ESPACE;
1185
1186 bufp->allocated = INIT_BUF_SIZE;
1187 }
1188
1189 begalt = b = bufp->buffer;
1190
1191 /* Loop through the uncompiled pattern until we're at the end. */
1192 while (p != pend)
1193 {
1194 PATFETCH (c);
1195
1196 switch (c)
1197 {
1198 case '^':
1199 {
1200 if ( /* If at start of pattern, it's an operator. */
1201 p == pattern + 1
1202 /* If context independent, it's an operator. */
1203 || syntax & RE_CONTEXT_INDEP_ANCHORS
1204 /* Otherwise, depends on what's come before. */
1205 || at_begline_loc_p (pattern, p, syntax))
1206 BUF_PUSH (begline);
1207 else
1208 goto normal_char;
1209 }
1210 break;
1211
1212
1213 case '$':
1214 {
1215 if ( /* If at end of pattern, it's an operator. */
1216 p == pend
1217 /* If context independent, it's an operator. */
1218 || syntax & RE_CONTEXT_INDEP_ANCHORS
1219 /* Otherwise, depends on what's next. */
1220 || at_endline_loc_p (p, pend, syntax))
1221 BUF_PUSH (endline);
1222 else
1223 goto normal_char;
1224 }
1225 break;
1226
1227
1228 case '+':
1229 case '?':
1230 if ((syntax & RE_BK_PLUS_QM)
1231 || (syntax & RE_LIMITED_OPS))
1232 goto normal_char;
1233 handle_plus:
1234 case '*':
1235 /* If there is no previous pattern... */
1236 if (!laststart)
1237 {
1238 if (syntax & RE_CONTEXT_INVALID_OPS)
1239 return REG_BADRPT;
1240 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1241 goto normal_char;
1242 }
1243
1244 {
1245 /* Are we optimizing this jump? */
1246 boolean keep_string_p = false;
1247
1248 /* 1 means zero (many) matches is allowed. */
1249 char zero_times_ok = 0, many_times_ok = 0;
1250
1251 /* If there is a sequence of repetition chars, collapse it
1252 down to just one (the right one). We can't combine
1253 interval operators with these because of, e.g., `a{2}*',
1254 which should only match an even number of `a's. */
1255
1256 for (;;)
1257 {
1258 zero_times_ok |= c != '+';
1259 many_times_ok |= c != '?';
1260
1261 if (p == pend)
1262 break;
1263
1264 PATFETCH (c);
1265
1266 if (c == '*'
1267 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1268 ;
1269
1270 else if (syntax & RE_BK_PLUS_QM && c == '\\')
1271 {
1272 if (p == pend) return REG_EESCAPE;
1273
1274 PATFETCH (c1);
1275 if (!(c1 == '+' || c1 == '?'))
1276 {
1277 PATUNFETCH;
1278 PATUNFETCH;
1279 break;
1280 }
1281
1282 c = c1;
1283 }
1284 else
1285 {
1286 PATUNFETCH;
1287 break;
1288 }
1289
1290 /* If we get here, we found another repeat character. */
1291 }
1292
1293 /* Star, etc. applied to an empty pattern is equivalent
1294 to an empty pattern. */
1295 if (!laststart)
1296 break;
1297
1298 /* Now we know whether or not zero matches is allowed
1299 and also whether or not two or more matches is allowed. */
1300 if (many_times_ok)
1301 { /* More than one repetition is allowed, so put in at the
1302 end a backward relative jump from `b' to before the next
1303 jump we're going to put in below (which jumps from
1304 laststart to after this jump).
1305
1306 But if we are at the `*' in the exact sequence `.*\n',
1307 insert an unconditional jump backwards to the .,
1308 instead of the beginning of the loop. This way we only
1309 push a failure point once, instead of every time
1310 through the loop. */
1311 assert (p - 1 > pattern);
1312
1313 /* Allocate the space for the jump. */
1314 GET_BUFFER_SPACE (3);
1315
1316 /* We know we are not at the first character of the pattern,
1317 because laststart was nonzero. And we've already
1318 incremented `p', by the way, to be the character after
1319 the `*'. Do we have to do something analogous here
1320 for null bytes, because of RE_DOT_NOT_NULL? */
1321 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1322 && zero_times_ok
1323 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1324 && !(syntax & RE_DOT_NEWLINE))
1325 { /* We have .*\n. */
1326 STORE_JUMP (jump, b, laststart);
1327 keep_string_p = true;
1328 }
1329 else
1330 /* Anything else. */
1331 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1332
1333 /* We've added more stuff to the buffer. */
1334 b += 3;
1335 }
1336
1337 /* On failure, jump from laststart to b + 3, which will be the
1338 end of the buffer after this jump is inserted. */
1339 GET_BUFFER_SPACE (3);
1340 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1341 : on_failure_jump,
1342 laststart, b + 3);
1343 pending_exact = 0;
1344 b += 3;
1345
1346 if (!zero_times_ok)
1347 {
1348 /* At least one repetition is required, so insert a
1349 `dummy_failure_jump' before the initial
1350 `on_failure_jump' instruction of the loop. This
1351 effects a skip over that instruction the first time
1352 we hit that loop. */
1353 GET_BUFFER_SPACE (3);
1354 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1355 b += 3;
1356 }
1357 }
1358 break;
1359
1360
1361 case '.':
1362 laststart = b;
1363 BUF_PUSH (anychar);
1364 break;
1365
1366
1367 case '[':
1368 {
1369 boolean had_char_class = false;
1370
1371 if (p == pend) return REG_EBRACK;
1372
1373 /* Ensure that we have enough space to push a charset: the
1374 opcode, the length count, and the bitset; 34 bytes in all. */
1375 GET_BUFFER_SPACE (34);
1376
1377 laststart = b;
1378
1379 /* We test `*p == '^' twice, instead of using an if
1380 statement, so we only need one BUF_PUSH. */
1381 BUF_PUSH (*p == '^' ? charset_not : charset);
1382 if (*p == '^')
1383 p++;
1384
1385 /* Remember the first position in the bracket expression. */
1386 p1 = p;
1387
1388 /* Push the number of bytes in the bitmap. */
1389 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1390
1391 /* Clear the whole map. */
1392 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1393
1394 /* charset_not matches newline according to a syntax bit. */
1395 if ((re_opcode_t) b[-2] == charset_not
1396 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1397 SET_LIST_BIT ('\n');
1398
1399 /* Read in characters and ranges, setting map bits. */
1400 for (;;)
1401 {
1402 if (p == pend) return REG_EBRACK;
1403
1404 PATFETCH (c);
1405
1406 /* \ might escape characters inside [...] and [^...]. */
1407 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1408 {
1409 if (p == pend) return REG_EESCAPE;
1410
1411 PATFETCH (c1);
1412 SET_LIST_BIT (c1);
1413 continue;
1414 }
1415
1416 /* Could be the end of the bracket expression. If it's
1417 not (i.e., when the bracket expression is `[]' so
1418 far), the ']' character bit gets set way below. */
1419 if (c == ']' && p != p1 + 1)
1420 break;
1421
1422 /* Look ahead to see if it's a range when the last thing
1423 was a character class. */
1424 if (had_char_class && c == '-' && *p != ']')
1425 return REG_ERANGE;
1426
1427 /* Look ahead to see if it's a range when the last thing
1428 was a character: if this is a hyphen not at the
1429 beginning or the end of a list, then it's the range
1430 operator. */
1431 if (c == '-'
1432 && !(p - 2 >= pattern && p[-2] == '[')
1433 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1434 && *p != ']')
1435 {
1436 reg_errcode_t ret
1437 = compile_range (&p, pend, translate, syntax, b);
1438 if (ret != REG_NOERROR) return ret;
1439 }
1440
1441 else if (p[0] == '-' && p[1] != ']')
1442 { /* This handles ranges made up of characters only. */
1443 reg_errcode_t ret;
1444
1445 /* Move past the `-'. */
1446 PATFETCH (c1);
1447
1448 ret = compile_range (&p, pend, translate, syntax, b);
1449 if (ret != REG_NOERROR) return ret;
1450 }
1451
1452 /* See if we're at the beginning of a possible character
1453 class. */
1454
1455 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1456 { /* Leave room for the null. */
1457 char str[CHAR_CLASS_MAX_LENGTH + 1];
1458
1459 PATFETCH (c);
1460 c1 = 0;
1461
1462 /* If pattern is `[[:'. */
1463 if (p == pend) return REG_EBRACK;
1464
1465 for (;;)
1466 {
1467 PATFETCH (c);
1468 if (c == ':' || c == ']' || p == pend
1469 || c1 == CHAR_CLASS_MAX_LENGTH)
1470 break;
1471 str[c1++] = c;
1472 }
1473 str[c1] = '\0';
1474
1475 /* If isn't a word bracketed by `[:' and:`]':
1476 undo the ending character, the letters, and leave
1477 the leading `:' and `[' (but set bits for them). */
1478 if (c == ':' && *p == ']')
1479 {
1480 int ch;
1481 boolean is_alnum = STREQ (str, "alnum");
1482 boolean is_alpha = STREQ (str, "alpha");
1483 boolean is_blank = STREQ (str, "blank");
1484 boolean is_cntrl = STREQ (str, "cntrl");
1485 boolean is_digit = STREQ (str, "digit");
1486 boolean is_graph = STREQ (str, "graph");
1487 boolean is_lower = STREQ (str, "lower");
1488 boolean is_print = STREQ (str, "print");
1489 boolean is_punct = STREQ (str, "punct");
1490 boolean is_space = STREQ (str, "space");
1491 boolean is_upper = STREQ (str, "upper");
1492 boolean is_xdigit = STREQ (str, "xdigit");
1493
1494 if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1495
1496 /* Throw away the ] at the end of the character
1497 class. */
1498 PATFETCH (c);
1499
1500 if (p == pend) return REG_EBRACK;
1501
1502 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1503 {
1504 if ( (is_alnum && ISALNUM (ch))
1505 || (is_alpha && ISALPHA (ch))
1506 || (is_blank && ISBLANK (ch))
1507 || (is_cntrl && ISCNTRL (ch))
1508 || (is_digit && ISDIGIT (ch))
1509 || (is_graph && ISGRAPH (ch))
1510 || (is_lower && ISLOWER (ch))
1511 || (is_print && ISPRINT (ch))
1512 || (is_punct && ISPUNCT (ch))
1513 || (is_space && ISSPACE (ch))
1514 || (is_upper && ISUPPER (ch))
1515 || (is_xdigit && ISXDIGIT (ch)))
1516 SET_LIST_BIT (ch);
1517 }
1518 had_char_class = true;
1519 }
1520 else
1521 {
1522 c1++;
1523 while (c1--)
1524 PATUNFETCH;
1525 SET_LIST_BIT ('[');
1526 SET_LIST_BIT (':');
1527 had_char_class = false;
1528 }
1529 }
1530 else
1531 {
1532 had_char_class = false;
1533 SET_LIST_BIT (c);
1534 }
1535 }
1536
1537 /* Discard any (non)matching list bytes that are all 0 at the
1538 end of the map. Decrease the map-length byte too. */
1539 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1540 b[-1]--;
1541 b += b[-1];
1542 }
1543 break;
1544
1545
1546 case '(':
1547 if (syntax & RE_NO_BK_PARENS)
1548 goto handle_open;
1549 else
1550 goto normal_char;
1551
1552
1553 case ')':
1554 if (syntax & RE_NO_BK_PARENS)
1555 goto handle_close;
1556 else
1557 goto normal_char;
1558
1559
1560 case '\n':
1561 if (syntax & RE_NEWLINE_ALT)
1562 goto handle_alt;
1563 else
1564 goto normal_char;
1565
1566
1567 case '|':
1568 if (syntax & RE_NO_BK_VBAR)
1569 goto handle_alt;
1570 else
1571 goto normal_char;
1572
1573
1574 case '{':
1575 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1576 goto handle_interval;
1577 else
1578 goto normal_char;
1579
1580
1581 case '\\':
1582 if (p == pend) return REG_EESCAPE;
1583
1584 /* Do not translate the character after the \, so that we can
1585 distinguish, e.g., \B from \b, even if we normally would
1586 translate, e.g., B to b. */
1587 PATFETCH_RAW (c);
1588
1589 switch (c)
1590 {
1591 case '(':
1592 if (syntax & RE_NO_BK_PARENS)
1593 goto normal_backslash;
1594
1595 handle_open:
1596 bufp->re_nsub++;
1597 regnum++;
1598
1599 if (COMPILE_STACK_FULL)
1600 {
1601 RETALLOC (compile_stack.stack, compile_stack.size << 1,
1602 compile_stack_elt_t);
1603 if (compile_stack.stack == NULL) return REG_ESPACE;
1604
1605 compile_stack.size <<= 1;
1606 }
1607
1608 /* These are the values to restore when we hit end of this
1609 group. They are all relative offsets, so that if the
1610 whole pattern moves because of realloc, they will still
1611 be valid. */
1612 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1613 COMPILE_STACK_TOP.fixup_alt_jump
1614 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1615 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1616 COMPILE_STACK_TOP.regnum = regnum;
1617
1618 /* We will eventually replace the 0 with the number of
1619 groups inner to this one. But do not push a
1620 start_memory for groups beyond the last one we can
1621 represent in the compiled pattern. */
1622 if (regnum <= MAX_REGNUM)
1623 {
1624 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1625 BUF_PUSH_3 (start_memory, regnum, 0);
1626 }
1627
1628 compile_stack.avail++;
1629
1630 fixup_alt_jump = 0;
1631 laststart = 0;
1632 begalt = b;
1633 /* If we've reached MAX_REGNUM groups, then this open
1634 won't actually generate any code, so we'll have to
1635 clear pending_exact explicitly. */
1636 pending_exact = 0;
1637 break;
1638
1639
1640 case ')':
1641 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1642
1643 if (COMPILE_STACK_EMPTY) {
1644 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1645 goto normal_backslash;
1646 else
1647 return REG_ERPAREN;
1648 }
1649
1650 handle_close:
1651 if (fixup_alt_jump)
1652 { /* Push a dummy failure point at the end of the
1653 alternative for a possible future
1654 `pop_failure_jump' to pop. See comments at
1655 `push_dummy_failure' in `re_match_2'. */
1656 BUF_PUSH (push_dummy_failure);
1657
1658 /* We allocated space for this jump when we assigned
1659 to `fixup_alt_jump', in the `handle_alt' case below. */
1660 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1661 }
1662
1663 /* See similar code for backslashed left paren above. */
1664 if (COMPILE_STACK_EMPTY) {
1665 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1666 goto normal_char;
1667 else
1668 return REG_ERPAREN;
1669 }
1670
1671 /* Since we just checked for an empty stack above, this
1672 ``can't happen''. */
1673 assert (compile_stack.avail != 0);
1674 {
1675 /* We don't just want to restore into `regnum', because
1676 later groups should continue to be numbered higher,
1677 as in `(ab)c(de)' -- the second group is #2. */
1678 regnum_t this_group_regnum;
1679
1680 compile_stack.avail--;
1681 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1682 fixup_alt_jump
1683 = COMPILE_STACK_TOP.fixup_alt_jump
1684 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1685 : 0;
1686 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1687 this_group_regnum = COMPILE_STACK_TOP.regnum;
1688 /* If we've reached MAX_REGNUM groups, then this open
1689 won't actually generate any code, so we'll have to
1690 clear pending_exact explicitly. */
1691 pending_exact = 0;
1692
1693 /* We're at the end of the group, so now we know how many
1694 groups were inside this one. */
1695 if (this_group_regnum <= MAX_REGNUM)
1696 {
1697 unsigned char *inner_group_loc
1698 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1699
1700 *inner_group_loc = regnum - this_group_regnum;
1701 BUF_PUSH_3 (stop_memory, this_group_regnum,
1702 regnum - this_group_regnum);
1703 }
1704 }
1705 break;
1706
1707
1708 case '|': /* `\|'. */
1709 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1710 goto normal_backslash;
1711 handle_alt:
1712 if (syntax & RE_LIMITED_OPS)
1713 goto normal_char;
1714
1715 /* Insert before the previous alternative a jump which
1716 jumps to this alternative if the former fails. */
1717 GET_BUFFER_SPACE (3);
1718 INSERT_JUMP (on_failure_jump, begalt, b + 6);
1719 pending_exact = 0;
1720 b += 3;
1721
1722 /* The alternative before this one has a jump after it
1723 which gets executed if it gets matched. Adjust that
1724 jump so it will jump to this alternative's analogous
1725 jump (put in below, which in turn will jump to the next
1726 (if any) alternative's such jump, etc.). The last such
1727 jump jumps to the correct final destination. A picture:
1728 _____ _____
1729 | | | |
1730 | v | v
1731 a | b | c
1732
1733 If we are at `b', then fixup_alt_jump right now points to a
1734 three-byte space after `a'. We'll put in the jump, set
1735 fixup_alt_jump to right after `b', and leave behind three
1736 bytes which we'll fill in when we get to after `c'. */
1737
1738 if (fixup_alt_jump)
1739 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1740
1741 /* Mark and leave space for a jump after this alternative,
1742 to be filled in later either by next alternative or
1743 when know we're at the end of a series of alternatives. */
1744 fixup_alt_jump = b;
1745 GET_BUFFER_SPACE (3);
1746 b += 3;
1747
1748 laststart = 0;
1749 begalt = b;
1750 break;
1751
1752
1753 case '{':
1754 /* If \{ is a literal. */
1755 if (!(syntax & RE_INTERVALS)
1756 /* If we're at `\{' and it's not the open-interval
1757 operator. */
1758 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1759 || (p - 2 == pattern && p == pend))
1760 goto normal_backslash;
1761
1762 handle_interval:
1763 {
1764 /* If got here, then the syntax allows intervals. */
1765
1766 /* At least (most) this many matches must be made. */
1767 int lower_bound = -1, upper_bound = -1;
1768
1769 beg_interval = p - 1;
1770
1771 if (p == pend)
1772 {
1773 if (syntax & RE_NO_BK_BRACES)
1774 goto unfetch_interval;
1775 else
1776 return REG_EBRACE;
1777 }
1778
1779 GET_UNSIGNED_NUMBER (lower_bound);
1780
1781 if (c == ',')
1782 {
1783 GET_UNSIGNED_NUMBER (upper_bound);
1784 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1785 }
1786 else
1787 /* Interval such as `{1}' => match exactly once. */
1788 upper_bound = lower_bound;
1789
1790 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1791 || lower_bound > upper_bound)
1792 {
1793 if (syntax & RE_NO_BK_BRACES)
1794 goto unfetch_interval;
1795 else
1796 return REG_BADBR;
1797 }
1798
1799 if (!(syntax & RE_NO_BK_BRACES))
1800 {
1801 if (c != '\\') return REG_EBRACE;
1802
1803 PATFETCH (c);
1804 }
1805
1806 if (c != '}')
1807 {
1808 if (syntax & RE_NO_BK_BRACES)
1809 goto unfetch_interval;
1810 else
1811 return REG_BADBR;
1812 }
1813
1814 /* We just parsed a valid interval. */
1815
1816 /* If it's invalid to have no preceding re. */
1817 if (!laststart)
1818 {
1819 if (syntax & RE_CONTEXT_INVALID_OPS)
1820 return REG_BADRPT;
1821 else if (syntax & RE_CONTEXT_INDEP_OPS)
1822 laststart = b;
1823 else
1824 goto unfetch_interval;
1825 }
1826
1827 /* If the upper bound is zero, don't want to succeed at
1828 all; jump from `laststart' to `b + 3', which will be
1829 the end of the buffer after we insert the jump. */
1830 if (upper_bound == 0)
1831 {
1832 GET_BUFFER_SPACE (3);
1833 INSERT_JUMP (jump, laststart, b + 3);
1834 b += 3;
1835 }
1836
1837 /* Otherwise, we have a nontrivial interval. When
1838 we're all done, the pattern will look like:
1839 set_number_at <jump count> <upper bound>
1840 set_number_at <succeed_n count> <lower bound>
1841 succeed_n <after jump addr> <succed_n count>
1842 <body of loop>
1843 jump_n <succeed_n addr> <jump count>
1844 (The upper bound and `jump_n' are omitted if
1845 `upper_bound' is 1, though.) */
1846 else
1847 { /* If the upper bound is > 1, we need to insert
1848 more at the end of the loop. */
1849 unsigned nbytes = 10 + (upper_bound > 1) * 10;
1850
1851 GET_BUFFER_SPACE (nbytes);
1852
1853 /* Initialize lower bound of the `succeed_n', even
1854 though it will be set during matching by its
1855 attendant `set_number_at' (inserted next),
1856 because `re_compile_fastmap' needs to know.
1857 Jump to the `jump_n' we might insert below. */
1858 INSERT_JUMP2 (succeed_n, laststart,
1859 b + 5 + (upper_bound > 1) * 5,
1860 lower_bound);
1861 b += 5;
1862
1863 /* Code to initialize the lower bound. Insert
1864 before the `succeed_n'. The `5' is the last two
1865 bytes of this `set_number_at', plus 3 bytes of
1866 the following `succeed_n'. */
1867 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1868 b += 5;
1869
1870 if (upper_bound > 1)
1871 { /* More than one repetition is allowed, so
1872 append a backward jump to the `succeed_n'
1873 that starts this interval.
1874
1875 When we've reached this during matching,
1876 we'll have matched the interval once, so
1877 jump back only `upper_bound - 1' times. */
1878 STORE_JUMP2 (jump_n, b, laststart + 5,
1879 upper_bound - 1);
1880 b += 5;
1881
1882 /* The location we want to set is the second
1883 parameter of the `jump_n'; that is `b-2' as
1884 an absolute address. `laststart' will be
1885 the `set_number_at' we're about to insert;
1886 `laststart+3' the number to set, the source
1887 for the relative address. But we are
1888 inserting into the middle of the pattern --
1889 so everything is getting moved up by 5.
1890 Conclusion: (b - 2) - (laststart + 3) + 5,
1891 i.e., b - laststart.
1892
1893 We insert this at the beginning of the loop
1894 so that if we fail during matching, we'll
1895 reinitialize the bounds. */
1896 insert_op2 (set_number_at, laststart, b - laststart,
1897 upper_bound - 1, b);
1898 b += 5;
1899 }
1900 }
1901 pending_exact = 0;
1902 beg_interval = NULL;
1903 }
1904 break;
1905
1906 unfetch_interval:
1907 /* If an invalid interval, match the characters as literals. */
1908 assert (beg_interval);
1909 p = beg_interval;
1910 beg_interval = NULL;
1911
1912 /* normal_char and normal_backslash need `c'. */
1913 PATFETCH (c);
1914
1915 if (!(syntax & RE_NO_BK_BRACES))
1916 {
1917 if (p > pattern && p[-1] == '\\')
1918 goto normal_backslash;
1919 }
1920 goto normal_char;
1921
1922 #ifdef emacs
1923 /* There is no way to specify the before_dot and after_dot
1924 operators. rms says this is ok. --karl */
1925 case '=':
1926 BUF_PUSH (at_dot);
1927 break;
1928
1929 case 's':
1930 laststart = b;
1931 PATFETCH (c);
1932 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1933 break;
1934
1935 case 'S':
1936 laststart = b;
1937 PATFETCH (c);
1938 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1939 break;
1940 #endif /* emacs */
1941
1942
1943 case 'w':
1944 laststart = b;
1945 BUF_PUSH (wordchar);
1946 break;
1947
1948
1949 case 'W':
1950 laststart = b;
1951 BUF_PUSH (notwordchar);
1952 break;
1953
1954
1955 case '<':
1956 BUF_PUSH (wordbeg);
1957 break;
1958
1959 case '>':
1960 BUF_PUSH (wordend);
1961 break;
1962
1963 case 'b':
1964 BUF_PUSH (wordbound);
1965 break;
1966
1967 case 'B':
1968 BUF_PUSH (notwordbound);
1969 break;
1970
1971 case '`':
1972 BUF_PUSH (begbuf);
1973 break;
1974
1975 case '\'':
1976 BUF_PUSH (endbuf);
1977 break;
1978
1979 case '1': case '2': case '3': case '4': case '5':
1980 case '6': case '7': case '8': case '9':
1981 if (syntax & RE_NO_BK_REFS)
1982 goto normal_char;
1983
1984 c1 = c - '0';
1985
1986 if (c1 > regnum)
1987 return REG_ESUBREG;
1988
1989 /* Can't back reference to a subexpression if inside of it. */
1990 if (group_in_compile_stack (compile_stack, c1))
1991 goto normal_char;
1992
1993 laststart = b;
1994 BUF_PUSH_2 (duplicate, c1);
1995 break;
1996
1997
1998 case '+':
1999 case '?':
2000 if (syntax & RE_BK_PLUS_QM)
2001 goto handle_plus;
2002 else
2003 goto normal_backslash;
2004
2005 default:
2006 normal_backslash:
2007 /* You might think it would be useful for \ to mean
2008 not to translate; but if we don't translate it
2009 it will never match anything. */
2010 c = TRANSLATE (c);
2011 goto normal_char;
2012 }
2013 break;
2014
2015
2016 default:
2017 /* Expects the character in `c'. */
2018 normal_char:
2019 /* If no exactn currently being built. */
2020 if (!pending_exact
2021
2022 /* If last exactn not at current position. */
2023 || pending_exact + *pending_exact + 1 != b
2024
2025 /* We have only one byte following the exactn for the count. */
2026 || *pending_exact == (1 << BYTEWIDTH) - 1
2027
2028 /* If followed by a repetition operator. */
2029 || *p == '*' || *p == '^'
2030 || ((syntax & RE_BK_PLUS_QM)
2031 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2032 : (*p == '+' || *p == '?'))
2033 || ((syntax & RE_INTERVALS)
2034 && ((syntax & RE_NO_BK_BRACES)
2035 ? *p == '{'
2036 : (p[0] == '\\' && p[1] == '{'))))
2037 {
2038 /* Start building a new exactn. */
2039
2040 laststart = b;
2041
2042 BUF_PUSH_2 (exactn, 0);
2043 pending_exact = b - 1;
2044 }
2045
2046 BUF_PUSH (c);
2047 (*pending_exact)++;
2048 break;
2049 } /* switch (c) */
2050 } /* while p != pend */
2051
2052
2053 /* Through the pattern now. */
2054
2055 if (fixup_alt_jump)
2056 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2057
2058 if (!COMPILE_STACK_EMPTY)
2059 return REG_EPAREN;
2060
2061 free (compile_stack.stack);
2062
2063 /* We have succeeded; set the length of the buffer. */
2064 bufp->used = b - bufp->buffer;
2065
2066 #ifdef DEBUG
2067 if (debug)
2068 {
2069 DEBUG_PRINT1 ("\nCompiled pattern: ");
2070 print_compiled_pattern (bufp);
2071 }
2072 #endif /* DEBUG */
2073
2074 return REG_NOERROR;
2075 } /* regex_compile */
2076
2077 /* Subroutines for `regex_compile'. */
2078
2079 /* Store OP at LOC followed by two-byte integer parameter ARG. */
2080
2081 static void
2082 store_op1 (op, loc, arg)
/* [<][>][^][v][top][bottom][index][help] */
2083 re_opcode_t op;
2084 unsigned char *loc;
2085 int arg;
2086 {
2087 *loc = (unsigned char) op;
2088 STORE_NUMBER (loc + 1, arg);
2089 }
2090
2091
2092 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
2093
2094 static void
2095 store_op2 (op, loc, arg1, arg2)
/* [<][>][^][v][top][bottom][index][help] */
2096 re_opcode_t op;
2097 unsigned char *loc;
2098 int arg1, arg2;
2099 {
2100 *loc = (unsigned char) op;
2101 STORE_NUMBER (loc + 1, arg1);
2102 STORE_NUMBER (loc + 3, arg2);
2103 }
2104
2105
2106 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2107 for OP followed by two-byte integer parameter ARG. */
2108
2109 static void
2110 insert_op1 (op, loc, arg, end)
/* [<][>][^][v][top][bottom][index][help] */
2111 re_opcode_t op;
2112 unsigned char *loc;
2113 int arg;
2114 unsigned char *end;
2115 {
2116 register unsigned char *pfrom = end;
2117 register unsigned char *pto = end + 3;
2118
2119 while (pfrom != loc)
2120 *--pto = *--pfrom;
2121
2122 store_op1 (op, loc, arg);
2123 }
2124
2125
2126 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
2127
2128 static void
2129 insert_op2 (op, loc, arg1, arg2, end)
/* [<][>][^][v][top][bottom][index][help] */
2130 re_opcode_t op;
2131 unsigned char *loc;
2132 int arg1, arg2;
2133 unsigned char *end;
2134 {
2135 register unsigned char *pfrom = end;
2136 register unsigned char *pto = end + 5;
2137
2138 while (pfrom != loc)
2139 *--pto = *--pfrom;
2140
2141 store_op2 (op, loc, arg1, arg2);
2142 }
2143
2144
2145 /* P points to just after a ^ in PATTERN. Return true if that ^ comes
2146 after an alternative or a begin-subexpression. We assume there is at
2147 least one character before the ^. */
2148
2149 static boolean
2150 at_begline_loc_p (pattern, p, syntax)
/* [<][>][^][v][top][bottom][index][help] */
2151 const char *pattern, *p;
2152 reg_syntax_t syntax;
2153 {
2154 const char *prev = p - 2;
2155 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2156
2157 return
2158 /* After a subexpression? */
2159 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2160 /* After an alternative? */
2161 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2162 }
2163
2164
2165 /* The dual of at_begline_loc_p. This one is for $. We assume there is
2166 at least one character after the $, i.e., `P < PEND'. */
2167
2168 static boolean
2169 at_endline_loc_p (p, pend, syntax)
/* [<][>][^][v][top][bottom][index][help] */
2170 const char *p, *pend;
2171 int syntax;
2172 {
2173 const char *next = p;
2174 boolean next_backslash = *next == '\\';
2175 const char *next_next = p + 1 < pend ? p + 1 : NULL;
2176
2177 return
2178 /* Before a subexpression? */
2179 (syntax & RE_NO_BK_PARENS ? *next == ')'
2180 : next_backslash && next_next && *next_next == ')')
2181 /* Before an alternative? */
2182 || (syntax & RE_NO_BK_VBAR ? *next == '|'
2183 : next_backslash && next_next && *next_next == '|');
2184 }
2185
2186
2187 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2188 false if it's not. */
2189
2190 static boolean
2191 group_in_compile_stack (compile_stack, regnum)
/* [<][>][^][v][top][bottom][index][help] */
2192 compile_stack_type compile_stack;
2193 regnum_t regnum;
2194 {
2195 int this_element;
2196
2197 for (this_element = compile_stack.avail - 1;
2198 this_element >= 0;
2199 this_element--)
2200 if (compile_stack.stack[this_element].regnum == regnum)
2201 return true;
2202
2203 return false;
2204 }
2205
2206
2207 /* Read the ending character of a range (in a bracket expression) from the
2208 uncompiled pattern *P_PTR (which ends at PEND). We assume the
2209 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
2210 Then we set the translation of all bits between the starting and
2211 ending characters (inclusive) in the compiled pattern B.
2212
2213 Return an error code.
2214
2215 We use these short variable names so we can use the same macros as
2216 `regex_compile' itself. */
2217
2218 static reg_errcode_t
2219 compile_range (p_ptr, pend, translate, syntax, b)
/* [<][>][^][v][top][bottom][index][help] */
2220 const char **p_ptr, *pend;
2221 char *translate;
2222 reg_syntax_t syntax;
2223 unsigned char *b;
2224 {
2225 unsigned this_char;
2226
2227 const char *p = *p_ptr;
2228 int range_start, range_end;
2229
2230 if (p == pend)
2231 return REG_ERANGE;
2232
2233 /* Even though the pattern is a signed `char *', we need to fetch
2234 with unsigned char *'s; if the high bit of the pattern character
2235 is set, the range endpoints will be negative if we fetch using a
2236 signed char *.
2237
2238 We also want to fetch the endpoints without translating them; the
2239 appropriate translation is done in the bit-setting loop below. */
2240 range_start = ((unsigned char *) p)[-2];
2241 range_end = ((unsigned char *) p)[0];
2242
2243 /* Have to increment the pointer into the pattern string, so the
2244 caller isn't still at the ending character. */
2245 (*p_ptr)++;
2246
2247 /* If the start is after the end, the range is empty. */
2248 if (range_start > range_end)
2249 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2250
2251 /* Here we see why `this_char' has to be larger than an `unsigned
2252 char' -- the range is inclusive, so if `range_end' == 0xff
2253 (assuming 8-bit characters), we would otherwise go into an infinite
2254 loop, since all characters <= 0xff. */
2255 for (this_char = range_start; this_char <= range_end; this_char++)
2256 {
2257 SET_LIST_BIT (TRANSLATE (this_char));
2258 }
2259
2260 return REG_NOERROR;
2261 }
2262
2263 /* Failure stack declarations and macros; both re_compile_fastmap and
2264 re_match_2 use a failure stack. These have to be macros because of
2265 REGEX_ALLOCATE. */
2266
2267
2268 /* Number of failure points for which to initially allocate space
2269 when matching. If this number is exceeded, we allocate more
2270 space, so it is not a hard limit. */
2271 #ifndef INIT_FAILURE_ALLOC
2272 #define INIT_FAILURE_ALLOC 5
2273 #endif
2274
2275 /* Roughly the maximum number of failure points on the stack. Would be
2276 exactly that if always used MAX_FAILURE_SPACE each time we failed.
2277 This is a variable only so users of regex can assign to it; we never
2278 change it ourselves. */
2279 int re_max_failures = 2000;
2280
2281 typedef const unsigned char *fail_stack_elt_t;
2282
2283 typedef struct
2284 {
2285 fail_stack_elt_t *stack;
2286 unsigned size;
2287 unsigned avail; /* Offset of next open position. */
2288 } fail_stack_type;
2289
2290 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
/* [<][>][^][v][top][bottom][index][help] */
2291 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
/* [<][>][^][v][top][bottom][index][help] */
2292 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
/* [<][>][^][v][top][bottom][index][help] */
2293 #define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
/* [<][>][^][v][top][bottom][index][help] */
2294
2295
2296 /* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
2297
2298 #define INIT_FAIL_STACK() \
/* [<][>][^][v][top][bottom][index][help] */
2299 do { \
2300 fail_stack.stack = (fail_stack_elt_t *) \
2301 REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
2302 \
2303 if (fail_stack.stack == NULL) \
2304 return -2; \
2305 \
2306 fail_stack.size = INIT_FAILURE_ALLOC; \
2307 fail_stack.avail = 0; \
2308 } while (0)
2309
2310
2311 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2312
2313 Return 1 if succeeds, and 0 if either ran out of memory
2314 allocating space for it or it was already too large.
2315
2316 REGEX_REALLOCATE requires `destination' be declared. */
2317
2318 #define DOUBLE_FAIL_STACK(fail_stack) \
/* [<][>][^][v][top][bottom][index][help] */
2319 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
2320 ? 0 \
2321 : ((fail_stack).stack = (fail_stack_elt_t *) \
2322 REGEX_REALLOCATE ((fail_stack).stack, \
2323 (fail_stack).size * sizeof (fail_stack_elt_t), \
2324 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
2325 \
2326 (fail_stack).stack == NULL \
2327 ? 0 \
2328 : ((fail_stack).size <<= 1, \
2329 1)))
2330
2331
2332 /* Push PATTERN_OP on FAIL_STACK.
2333
2334 Return 1 if was able to do so and 0 if ran out of memory allocating
2335 space to do so. */
2336 #define PUSH_PATTERN_OP(pattern_op, fail_stack) \
/* [<][>][^][v][top][bottom][index][help] */
2337 ((FAIL_STACK_FULL () \
2338 && !DOUBLE_FAIL_STACK (fail_stack)) \
2339 ? 0 \
2340 : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
2341 1))
2342
2343 /* This pushes an item onto the failure stack. Must be a four-byte
2344 value. Assumes the variable `fail_stack'. Probably should only
2345 be called from within `PUSH_FAILURE_POINT'. */
2346 #define PUSH_FAILURE_ITEM(item) \
/* [<][>][^][v][top][bottom][index][help] */
2347 fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2348
2349 /* The complement operation. Assumes `fail_stack' is nonempty. */
2350 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
/* [<][>][^][v][top][bottom][index][help] */
2351
2352 /* Used to omit pushing failure point id's when we're not debugging. */
2353 #ifdef DEBUG
2354 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2355 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
/* [<][>][^][v][top][bottom][index][help] */
2356 #else
2357 #define DEBUG_PUSH(item)
/* [<][>][^][v][top][bottom][index][help] */
2358 #define DEBUG_POP(item_addr)
/* [<][>][^][v][top][bottom][index][help] */
2359 #endif
2360
2361
2362 /* Push the information about the state we will need
2363 if we ever fail back to it.
2364
2365 Requires variables fail_stack, regstart, regend, reg_info, and
2366 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
2367 declared.
2368
2369 Does `return FAILURE_CODE' if runs out of memory. */
2370
2371 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
/* [<][>][^][v][top][bottom][index][help] */
2372 do { \
2373 char *destination; \
2374 /* Must be int, so when we don't save any registers, the arithmetic \
2375 of 0 + -1 isn't done as unsigned. */ \
2376 int this_reg; \
2377 \
2378 DEBUG_STATEMENT (failure_id++); \
2379 DEBUG_STATEMENT (nfailure_points_pushed++); \
2380 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
2381 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
2382 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
2383 \
2384 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
2385 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
2386 \
2387 /* Ensure we have enough space allocated for what we will push. */ \
2388 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
2389 { \
2390 if (!DOUBLE_FAIL_STACK (fail_stack)) \
2391 return failure_code; \
2392 \
2393 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
2394 (fail_stack).size); \
2395 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2396 } \
2397 \
2398 /* Push the info, starting with the registers. */ \
2399 DEBUG_PRINT1 ("\n"); \
2400 \
2401 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
2402 this_reg++) \
2403 { \
2404 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
2405 DEBUG_STATEMENT (num_regs_pushed++); \
2406 \
2407 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2408 PUSH_FAILURE_ITEM (regstart[this_reg]); \
2409 \
2410 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2411 PUSH_FAILURE_ITEM (regend[this_reg]); \
2412 \
2413 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
2414 DEBUG_PRINT2 (" match_null=%d", \
2415 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
2416 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
2417 DEBUG_PRINT2 (" matched_something=%d", \
2418 MATCHED_SOMETHING (reg_info[this_reg])); \
2419 DEBUG_PRINT2 (" ever_matched=%d", \
2420 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
2421 DEBUG_PRINT1 ("\n"); \
2422 PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
2423 } \
2424 \
2425 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
2426 PUSH_FAILURE_ITEM (lowest_active_reg); \
2427 \
2428 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
2429 PUSH_FAILURE_ITEM (highest_active_reg); \
2430 \
2431 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
2432 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
2433 PUSH_FAILURE_ITEM (pattern_place); \
2434 \
2435 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
2436 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
2437 size2); \
2438 DEBUG_PRINT1 ("'\n"); \
2439 PUSH_FAILURE_ITEM (string_place); \
2440 \
2441 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
2442 DEBUG_PUSH (failure_id); \
2443 } while (0)
2444
2445 /* This is the number of items that are pushed and popped on the stack
2446 for each register. */
2447 #define NUM_REG_ITEMS 3
2448
2449 /* Individual items aside from the registers. */
2450 #ifdef DEBUG
2451 #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
2452 #else
2453 #define NUM_NONREG_ITEMS 4
2454 #endif
2455
2456 /* We push at most this many items on the stack. */
2457 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2458
2459 /* We actually push this many items. */
2460 #define NUM_FAILURE_ITEMS \
2461 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
2462 + NUM_NONREG_ITEMS)
2463
2464 /* How many items can still be added to the stack without overflowing it. */
2465 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2466
2467
2468 /* Pops what PUSH_FAIL_STACK pushes.
2469
2470 We restore into the parameters, all of which should be lvalues:
2471 STR -- the saved data position.
2472 PAT -- the saved pattern position.
2473 LOW_REG, HIGH_REG -- the highest and lowest active registers.
2474 REGSTART, REGEND -- arrays of string positions.
2475 REG_INFO -- array of information about each subexpression.
2476
2477 Also assumes the variables `fail_stack' and (if debugging), `bufp',
2478 `pend', `string1', `size1', `string2', and `size2'. */
2479
2480 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
/* [<][>][^][v][top][bottom][index][help] */
2481 { \
2482 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
2483 int this_reg; \
2484 const unsigned char *string_temp; \
2485 \
2486 assert (!FAIL_STACK_EMPTY ()); \
2487 \
2488 /* Remove failure points and point to how many regs pushed. */ \
2489 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
2490 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
2491 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
2492 \
2493 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
2494 \
2495 DEBUG_POP (&failure_id); \
2496 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
2497 \
2498 /* If the saved string location is NULL, it came from an \
2499 on_failure_keep_string_jump opcode, and we want to throw away the \
2500 saved NULL, thus retaining our current position in the string. */ \
2501 string_temp = POP_FAILURE_ITEM (); \
2502 if (string_temp != NULL) \
2503 str = (const char *) string_temp; \
2504 \
2505 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
2506 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
2507 DEBUG_PRINT1 ("'\n"); \
2508 \
2509 pat = (unsigned char *) POP_FAILURE_ITEM (); \
2510 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
2511 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
2512 \
2513 /* Restore register info. */ \
2514 high_reg = (unsigned) POP_FAILURE_ITEM (); \
2515 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
2516 \
2517 low_reg = (unsigned) POP_FAILURE_ITEM (); \
2518 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
2519 \
2520 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
2521 { \
2522 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
2523 \
2524 reg_info[this_reg].word = POP_FAILURE_ITEM (); \
2525 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
2526 \
2527 regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2528 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2529 \
2530 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2531 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2532 } \
2533 \
2534 DEBUG_STATEMENT (nfailure_points_popped++); \
2535 } /* POP_FAILURE_POINT */
2536
2537 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2538 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
2539 characters can start a string that matches the pattern. This fastmap
2540 is used by re_search to skip quickly over impossible starting points.
2541
2542 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2543 area as BUFP->fastmap.
2544
2545 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2546 the pattern buffer.
2547
2548 Returns 0 if we succeed, -2 if an internal error. */
2549
2550 int
2551 re_compile_fastmap (bufp)
/* [<][>][^][v][top][bottom][index][help] */
2552 struct re_pattern_buffer *bufp;
2553 {
2554 int j, k;
2555 fail_stack_type fail_stack;
2556 #ifndef REGEX_MALLOC
2557 char *destination;
2558 #endif
2559 /* We don't push any register information onto the failure stack. */
2560 unsigned num_regs = 0;
2561
2562 register char *fastmap = bufp->fastmap;
2563 unsigned char *pattern = bufp->buffer;
2564 unsigned long size = bufp->used;
2565 const unsigned char *p = pattern;
2566 register unsigned char *pend = pattern + size;
2567
2568 /* Assume that each path through the pattern can be null until
2569 proven otherwise. We set this false at the bottom of switch
2570 statement, to which we get only if a particular path doesn't
2571 match the empty string. */
2572 boolean path_can_be_null = true;
2573
2574 /* We aren't doing a `succeed_n' to begin with. */
2575 boolean succeed_n_p = false;
2576
2577 assert (fastmap != NULL && p != NULL);
2578
2579 INIT_FAIL_STACK ();
2580 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
2581 bufp->fastmap_accurate = 1; /* It will be when we're done. */
2582 bufp->can_be_null = 0;
2583
2584 while (p != pend || !FAIL_STACK_EMPTY ())
2585 {
2586 if (p == pend)
2587 {
2588 bufp->can_be_null |= path_can_be_null;
2589
2590 /* Reset for next path. */
2591 path_can_be_null = true;
2592
2593 p = fail_stack.stack[--fail_stack.avail];
2594 }
2595
2596 /* We should never be about to go beyond the end of the pattern. */
2597 assert (p < pend);
2598
2599 #ifdef SWITCH_ENUM_BUG
2600 switch ((int) ((re_opcode_t) *p++))
2601 #else
2602 switch ((re_opcode_t) *p++)
2603 #endif
2604 {
2605
2606 /* I guess the idea here is to simply not bother with a fastmap
2607 if a backreference is used, since it's too hard to figure out
2608 the fastmap for the corresponding group. Setting
2609 `can_be_null' stops `re_search_2' from using the fastmap, so
2610 that is all we do. */
2611 case duplicate:
2612 bufp->can_be_null = 1;
2613 return 0;
2614
2615
2616 /* Following are the cases which match a character. These end
2617 with `break'. */
2618
2619 case exactn:
2620 fastmap[p[1]] = 1;
2621 break;
2622
2623
2624 case charset:
2625 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2626 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2627 fastmap[j] = 1;
2628 break;
2629
2630
2631 case charset_not:
2632 /* Chars beyond end of map must be allowed. */
2633 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2634 fastmap[j] = 1;
2635
2636 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2637 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2638 fastmap[j] = 1;
2639 break;
2640
2641
2642 case wordchar:
2643 for (j = 0; j < (1 << BYTEWIDTH); j++)
2644 if (SYNTAX (j) == Sword)
2645 fastmap[j] = 1;
2646 break;
2647
2648
2649 case notwordchar:
2650 for (j = 0; j < (1 << BYTEWIDTH); j++)
2651 if (SYNTAX (j) != Sword)
2652 fastmap[j] = 1;
2653 break;
2654
2655
2656 case anychar:
2657 /* `.' matches anything ... */
2658 for (j = 0; j < (1 << BYTEWIDTH); j++)
2659 fastmap[j] = 1;
2660
2661 /* ... except perhaps newline. */
2662 if (!(bufp->syntax & RE_DOT_NEWLINE))
2663 fastmap['\n'] = 0;
2664
2665 /* Return if we have already set `can_be_null'; if we have,
2666 then the fastmap is irrelevant. Something's wrong here. */
2667 else if (bufp->can_be_null)
2668 return 0;
2669
2670 /* Otherwise, have to check alternative paths. */
2671 break;
2672
2673
2674 #ifdef emacs
2675 case syntaxspec:
2676 k = *p++;
2677 for (j = 0; j < (1 << BYTEWIDTH); j++)
2678 if (SYNTAX (j) == (enum syntaxcode) k)
2679 fastmap[j] = 1;
2680 break;
2681
2682
2683 case notsyntaxspec:
2684 k = *p++;
2685 for (j = 0; j < (1 << BYTEWIDTH); j++)
2686 if (SYNTAX (j) != (enum syntaxcode) k)
2687 fastmap[j] = 1;
2688 break;
2689
2690
2691 /* All cases after this match the empty string. These end with
2692 `continue'. */
2693
2694
2695 case before_dot:
2696 case at_dot:
2697 case after_dot:
2698 continue;
2699 #endif /* not emacs */
2700
2701
2702 case no_op:
2703 case begline:
2704 case endline:
2705 case begbuf:
2706 case endbuf:
2707 case wordbound:
2708 case notwordbound:
2709 case wordbeg:
2710 case wordend:
2711 case push_dummy_failure:
2712 continue;
2713
2714
2715 case jump_n:
2716 case pop_failure_jump:
2717 case maybe_pop_jump:
2718 case jump:
2719 case jump_past_alt:
2720 case dummy_failure_jump:
2721 EXTRACT_NUMBER_AND_INCR (j, p);
2722 p += j;
2723 if (j > 0)
2724 continue;
2725
2726 /* Jump backward implies we just went through the body of a
2727 loop and matched nothing. Opcode jumped to should be
2728 `on_failure_jump' or `succeed_n'. Just treat it like an
2729 ordinary jump. For a * loop, it has pushed its failure
2730 point already; if so, discard that as redundant. */
2731 if ((re_opcode_t) *p != on_failure_jump
2732 && (re_opcode_t) *p != succeed_n)
2733 continue;
2734
2735 p++;
2736 EXTRACT_NUMBER_AND_INCR (j, p);
2737 p += j;
2738
2739 /* If what's on the stack is where we are now, pop it. */
2740 if (!FAIL_STACK_EMPTY ()
2741 && fail_stack.stack[fail_stack.avail - 1] == p)
2742 fail_stack.avail--;
2743
2744 continue;
2745
2746
2747 case on_failure_jump:
2748 case on_failure_keep_string_jump:
2749 handle_on_failure_jump:
2750 EXTRACT_NUMBER_AND_INCR (j, p);
2751
2752 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2753 end of the pattern. We don't want to push such a point,
2754 since when we restore it above, entering the switch will
2755 increment `p' past the end of the pattern. We don't need
2756 to push such a point since we obviously won't find any more
2757 fastmap entries beyond `pend'. Such a pattern can match
2758 the null string, though. */
2759 if (p + j < pend)
2760 {
2761 if (!PUSH_PATTERN_OP (p + j, fail_stack))
2762 return -2;
2763 }
2764 else
2765 bufp->can_be_null = 1;
2766
2767 if (succeed_n_p)
2768 {
2769 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
2770 succeed_n_p = false;
2771 }
2772
2773 continue;
2774
2775
2776 case succeed_n:
2777 /* Get to the number of times to succeed. */
2778 p += 2;
2779
2780 /* Increment p past the n for when k != 0. */
2781 EXTRACT_NUMBER_AND_INCR (k, p);
2782 if (k == 0)
2783 {
2784 p -= 4;
2785 succeed_n_p = true; /* Spaghetti code alert. */
2786 goto handle_on_failure_jump;
2787 }
2788 continue;
2789
2790
2791 case set_number_at:
2792 p += 4;
2793 continue;
2794
2795
2796 case start_memory:
2797 case stop_memory:
2798 p += 2;
2799 continue;
2800
2801
2802 default:
2803 abort (); /* We have listed all the cases. */
2804 } /* switch *p++ */
2805
2806 /* Getting here means we have found the possible starting
2807 characters for one path of the pattern -- and that the empty
2808 string does not match. We need not follow this path further.
2809 Instead, look at the next alternative (remembered on the
2810 stack), or quit if no more. The test at the top of the loop
2811 does these things. */
2812 path_can_be_null = false;
2813 p = pend;
2814 } /* while p */
2815
2816 /* Set `can_be_null' for the last path (also the first path, if the
2817 pattern is empty). */
2818 bufp->can_be_null |= path_can_be_null;
2819 return 0;
2820 } /* re_compile_fastmap */
2821
2822 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2823 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
2824 this memory for recording register information. STARTS and ENDS
2825 must be allocated using the malloc library routine, and must each
2826 be at least NUM_REGS * sizeof (regoff_t) bytes long.
2827
2828 If NUM_REGS == 0, then subsequent matches should allocate their own
2829 register data.
2830
2831 Unless this function is called, the first search or match using
2832 PATTERN_BUFFER will allocate its own register data, without
2833 freeing the old data. */
2834
2835 void
2836 re_set_registers (bufp, regs, num_regs, starts, ends)
/* [<][>][^][v][top][bottom][index][help] */
2837 struct re_pattern_buffer *bufp;
2838 struct re_registers *regs;
2839 unsigned num_regs;
2840 regoff_t *starts, *ends;
2841 {
2842 if (num_regs)
2843 {
2844 bufp->regs_allocated = REGS_REALLOCATE;
2845 regs->num_regs = num_regs;
2846 regs->start = starts;
2847 regs->end = ends;
2848 }
2849 else
2850 {
2851 bufp->regs_allocated = REGS_UNALLOCATED;
2852 regs->num_regs = 0;
2853 regs->start = regs->end = (regoff_t) 0;
2854 }
2855 }
2856
2857 /* Searching routines. */
2858
2859 /* Like re_search_2, below, but only one string is specified, and
2860 doesn't let you say where to stop matching. */
2861
2862 int
2863 re_search (bufp, string, size, startpos, range, regs)
/* [<][>][^][v][top][bottom][index][help] */
2864 struct re_pattern_buffer *bufp;
2865 const char *string;
2866 int size, startpos, range;
2867 struct re_registers *regs;
2868 {
2869 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2870 regs, size);
2871 }
2872
2873
2874 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2875 virtual concatenation of STRING1 and STRING2, starting first at index
2876 STARTPOS, then at STARTPOS + 1, and so on.
2877
2878 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2879
2880 RANGE is how far to scan while trying to match. RANGE = 0 means try
2881 only at STARTPOS; in general, the last start tried is STARTPOS +
2882 RANGE.
2883
2884 In REGS, return the indices of the virtual concatenation of STRING1
2885 and STRING2 that matched the entire BUFP->buffer and its contained
2886 subexpressions.
2887
2888 Do not consider matching one past the index STOP in the virtual
2889 concatenation of STRING1 and STRING2.
2890
2891 We return either the position in the strings at which the match was
2892 found, -1 if no match, or -2 if error (such as failure
2893 stack overflow). */
2894
2895 int
2896 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
/* [<][>][^][v][top][bottom][index][help] */
2897 struct re_pattern_buffer *bufp;
2898 const char *string1, *string2;
2899 int size1, size2;
2900 int startpos;
2901 int range;
2902 struct re_registers *regs;
2903 int stop;
2904 {
2905 int val;
2906 register char *fastmap = bufp->fastmap;
2907 register char *translate = bufp->translate;
2908 int total_size = size1 + size2;
2909 int endpos = startpos + range;
2910
2911 /* Check for out-of-range STARTPOS. */
2912 if (startpos < 0 || startpos > total_size)
2913 return -1;
2914
2915 /* Fix up RANGE if it might eventually take us outside
2916 the virtual concatenation of STRING1 and STRING2. */
2917 if (endpos < -1)
2918 range = -1 - startpos;
2919 else if (endpos > total_size)
2920 range = total_size - startpos;
2921
2922 /* If the search isn't to be a backwards one, don't waste time in a
2923 search for a pattern that must be anchored. */
2924 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2925 {
2926 if (startpos > 0)
2927 return -1;
2928 else
2929 range = 1;
2930 }
2931
2932 /* Update the fastmap now if not correct already. */
2933 if (fastmap && !bufp->fastmap_accurate)
2934 if (re_compile_fastmap (bufp) == -2)
2935 return -2;
2936
2937 /* Loop through the string, looking for a place to start matching. */
2938 for (;;)
2939 {
2940 /* If a fastmap is supplied, skip quickly over characters that
2941 cannot be the start of a match. If the pattern can match the
2942 null string, however, we don't need to skip characters; we want
2943 the first null string. */
2944 if (fastmap && startpos < total_size && !bufp->can_be_null)
2945 {
2946 if (range > 0) /* Searching forwards. */
2947 {
2948 register const char *d;
2949 register int lim = 0;
2950 int irange = range;
2951
2952 if (startpos < size1 && startpos + range >= size1)
2953 lim = range - (size1 - startpos);
2954
2955 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2956
2957 /* Written out as an if-else to avoid testing `translate'
2958 inside the loop. */
2959 if (translate)
2960 while (range > lim
2961 && !fastmap[(unsigned char)
2962 translate[(unsigned char) *d++]])
2963 range--;
2964 else
2965 while (range > lim && !fastmap[(unsigned char) *d++])
2966 range--;
2967
2968 startpos += irange - range;
2969 }
2970 else /* Searching backwards. */
2971 {
2972 register char c = (size1 == 0 || startpos >= size1
2973 ? string2[startpos - size1]
2974 : string1[startpos]);
2975
2976 if (!fastmap[(unsigned char) TRANSLATE (c)])
2977 goto advance;
2978 }
2979 }
2980
2981 /* If can't match the null string, and that's all we have left, fail. */
2982 if (range >= 0 && startpos == total_size && fastmap
2983 && !bufp->can_be_null)
2984 return -1;
2985
2986 val = re_match_2 (bufp, string1, size1, string2, size2,
2987 startpos, regs, stop);
2988 if (val >= 0)
2989 return startpos;
2990
2991 if (val == -2)
2992 return -2;
2993
2994 advance:
2995 if (!range)
2996 break;
2997 else if (range > 0)
2998 {
2999 range--;
3000 startpos++;
3001 }
3002 else
3003 {
3004 range++;
3005 startpos--;
3006 }
3007 }
3008 return -1;
3009 } /* re_search_2 */
3010
3011 /* Declarations and macros for re_match_2. */
3012
3013 static int bcmp_translate ();
3014 static boolean alt_match_null_string_p (),
3015 common_op_match_null_string_p (),
3016 group_match_null_string_p ();
3017
3018 /* Structure for per-register (a.k.a. per-group) information.
3019 This must not be longer than one word, because we push this value
3020 onto the failure stack. Other register information, such as the
3021 starting and ending positions (which are addresses), and the list of
3022 inner groups (which is a bits list) are maintained in separate
3023 variables.
3024
3025 We are making a (strictly speaking) nonportable assumption here: that
3026 the compiler will pack our bit fields into something that fits into
3027 the type of `word', i.e., is something that fits into one item on the
3028 failure stack. */
3029 typedef union
3030 {
3031 fail_stack_elt_t word;
3032 struct
3033 {
3034 /* This field is one if this group can match the empty string,
3035 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
3036 #define MATCH_NULL_UNSET_VALUE 3
3037 unsigned match_null_string_p : 2;
3038 unsigned is_active : 1;
3039 unsigned matched_something : 1;
3040 unsigned ever_matched_something : 1;
3041 } bits;
3042 } register_info_type;
3043
3044 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
/* [<][>][^][v][top][bottom][index][help] */
3045 #define IS_ACTIVE(R) ((R).bits.is_active)
/* [<][>][^][v][top][bottom][index][help] */
3046 #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
/* [<][>][^][v][top][bottom][index][help] */
3047 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
/* [<][>][^][v][top][bottom][index][help] */
3048
3049
3050 /* Call this when have matched a real character; it sets `matched' flags
3051 for the subexpressions which we are currently inside. Also records
3052 that those subexprs have matched. */
3053 #define SET_REGS_MATCHED() \
/* [<][>][^][v][top][bottom][index][help] */
3054 do \
3055 { \
3056 unsigned r; \
3057 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
3058 { \
3059 MATCHED_SOMETHING (reg_info[r]) \
3060 = EVER_MATCHED_SOMETHING (reg_info[r]) \
3061 = 1; \
3062 } \
3063 } \
3064 while (0)
3065
3066
3067 /* This converts PTR, a pointer into one of the search strings `string1'
3068 and `string2' into an offset from the beginning of that string. */
3069 #define POINTER_TO_OFFSET(ptr) \
/* [<][>][^][v][top][bottom][index][help] */
3070 (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3071
3072 /* Registers are set to a sentinel when they haven't yet matched. */
3073 #define REG_UNSET_VALUE ((char *) -1)
3074 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
/* [<][>][^][v][top][bottom][index][help] */
3075
3076
3077 /* Macros for dealing with the split strings in re_match_2. */
3078
3079 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3080
3081 /* Call before fetching a character with *d. This switches over to
3082 string2 if necessary. */
3083 #define PREFETCH() \
/* [<][>][^][v][top][bottom][index][help] */
3084 while (d == dend) \
3085 { \
3086 /* End of string2 => fail. */ \
3087 if (dend == end_match_2) \
3088 goto fail; \
3089 /* End of string1 => advance to string2. */ \
3090 d = string2; \
3091 dend = end_match_2; \
3092 }
3093
3094
3095 /* Test if at very beginning or at very end of the virtual concatenation
3096 of `string1' and `string2'. If only one string, it's `string2'. */
3097 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
/* [<][>][^][v][top][bottom][index][help] */
3098 #define AT_STRINGS_END(d) ((d) == end2)
/* [<][>][^][v][top][bottom][index][help] */
3099
3100
3101 /* Test if D points to a character which is word-constituent. We have
3102 two special cases to check for: if past the end of string1, look at
3103 the first character in string2; and if before the beginning of
3104 string2, look at the last character in string1. */
3105 #define WORDCHAR_P(d) \
/* [<][>][^][v][top][bottom][index][help] */
3106 (SYNTAX ((d) == end1 ? *string2 \
3107 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3108 == Sword)
3109
3110 /* Test if the character before D and the one at D differ with respect
3111 to being word-constituent. */
3112 #define AT_WORD_BOUNDARY(d) \
/* [<][>][^][v][top][bottom][index][help] */
3113 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3114 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3115
3116
3117 /* Free everything we malloc. */
3118 #ifdef REGEX_MALLOC
3119 #define FREE_VAR(var) if (var) free (var); var = NULL
/* [<][>][^][v][top][bottom][index][help] */
3120 #define FREE_VARIABLES() \
/* [<][>][^][v][top][bottom][index][help] */
3121 do { \
3122 FREE_VAR (fail_stack.stack); \
3123 FREE_VAR (regstart); \
3124 FREE_VAR (regend); \
3125 FREE_VAR (old_regstart); \
3126 FREE_VAR (old_regend); \
3127 FREE_VAR (best_regstart); \
3128 FREE_VAR (best_regend); \
3129 FREE_VAR (reg_info); \
3130 FREE_VAR (reg_dummy); \
3131 FREE_VAR (reg_info_dummy); \
3132 } while (0)
3133 #else /* not REGEX_MALLOC */
3134 /* Some MIPS systems (at least) want this to free alloca'd storage. */
3135 #define FREE_VARIABLES() alloca (0)
/* [<][>][^][v][top][bottom][index][help] */
3136 #endif /* not REGEX_MALLOC */
3137
3138
3139 /* These values must meet several constraints. They must not be valid
3140 register values; since we have a limit of 255 registers (because
3141 we use only one byte in the pattern for the register number), we can
3142 use numbers larger than 255. They must differ by 1, because of
3143 NUM_FAILURE_ITEMS above. And the value for the lowest register must
3144 be larger than the value for the highest register, so we do not try
3145 to actually save any registers when none are active. */
3146 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3147 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3148
3149 /* Matching routines. */
3150
3151 #ifndef emacs /* Emacs never uses this. */
3152 /* re_match is like re_match_2 except it takes only a single string. */
3153
3154 int
3155 re_match (bufp, string, size, pos, regs)
/* [<][>][^][v][top][bottom][index][help] */
3156 struct re_pattern_buffer *bufp;
3157 const char *string;
3158 int size, pos;
3159 struct re_registers *regs;
3160 {
3161 return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3162 }
3163 #endif /* not emacs */
3164
3165
3166 /* re_match_2 matches the compiled pattern in BUFP against the
3167 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3168 and SIZE2, respectively). We start matching at POS, and stop
3169 matching at STOP.
3170
3171 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3172 store offsets for the substring each group matched in REGS. See the
3173 documentation for exactly how many groups we fill.
3174
3175 We return -1 if no match, -2 if an internal error (such as the
3176 failure stack overflowing). Otherwise, we return the length of the
3177 matched substring. */
3178
3179 int
3180 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* [<][>][^][v][top][bottom][index][help] */
3181 struct re_pattern_buffer *bufp;
3182 const char *string1, *string2;
3183 int size1, size2;
3184 int pos;
3185 struct re_registers *regs;
3186 int stop;
3187 {
3188 /* General temporaries. */
3189 int mcnt;
3190 unsigned char *p1;
3191
3192 /* Just past the end of the corresponding string. */
3193 const char *end1, *end2;
3194
3195 /* Pointers into string1 and string2, just past the last characters in
3196 each to consider matching. */
3197 const char *end_match_1, *end_match_2;
3198
3199 /* Where we are in the data, and the end of the current string. */
3200 const char *d, *dend;
3201
3202 /* Where we are in the pattern, and the end of the pattern. */
3203 unsigned char *p = bufp->buffer;
3204 register unsigned char *pend = p + bufp->used;
3205
3206 /* We use this to map every character in the string. */
3207 char *translate = bufp->translate;
3208
3209 /* Failure point stack. Each place that can handle a failure further
3210 down the line pushes a failure point on this stack. It consists of
3211 restart, regend, and reg_info for all registers corresponding to
3212 the subexpressions we're currently inside, plus the number of such
3213 registers, and, finally, two char *'s. The first char * is where
3214 to resume scanning the pattern; the second one is where to resume
3215 scanning the strings. If the latter is zero, the failure point is
3216 a ``dummy''; if a failure happens and the failure point is a dummy,
3217 it gets discarded and the next next one is tried. */
3218 fail_stack_type fail_stack;
3219 #ifdef DEBUG
3220 static unsigned failure_id = 0;
3221 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3222 #endif
3223
3224 /* We fill all the registers internally, independent of what we
3225 return, for use in backreferences. The number here includes
3226 an element for register zero. */
3227 unsigned num_regs = bufp->re_nsub + 1;
3228
3229 /* The currently active registers. */
3230 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3231 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3232
3233 /* Information on the contents of registers. These are pointers into
3234 the input strings; they record just what was matched (on this
3235 attempt) by a subexpression part of the pattern, that is, the
3236 regnum-th regstart pointer points to where in the pattern we began
3237 matching and the regnum-th regend points to right after where we
3238 stopped matching the regnum-th subexpression. (The zeroth register
3239 keeps track of what the whole pattern matches.) */
3240 const char **regstart, **regend;
3241
3242 /* If a group that's operated upon by a repetition operator fails to
3243 match anything, then the register for its start will need to be
3244 restored because it will have been set to wherever in the string we
3245 are when we last see its open-group operator. Similarly for a
3246 register's end. */
3247 const char **old_regstart, **old_regend;
3248
3249 /* The is_active field of reg_info helps us keep track of which (possibly
3250 nested) subexpressions we are currently in. The matched_something
3251 field of reg_info[reg_num] helps us tell whether or not we have
3252 matched any of the pattern so far this time through the reg_num-th
3253 subexpression. These two fields get reset each time through any
3254 loop their register is in. */
3255 register_info_type *reg_info;
3256
3257 /* The following record the register info as found in the above
3258 variables when we find a match better than any we've seen before.
3259 This happens as we backtrack through the failure points, which in
3260 turn happens only if we have not yet matched the entire string. */
3261 unsigned best_regs_set = false;
3262 const char **best_regstart, **best_regend;
3263
3264 /* Logically, this is `best_regend[0]'. But we don't want to have to
3265 allocate space for that if we're not allocating space for anything
3266 else (see below). Also, we never need info about register 0 for
3267 any of the other register vectors, and it seems rather a kludge to
3268 treat `best_regend' differently than the rest. So we keep track of
3269 the end of the best match so far in a separate variable. We
3270 initialize this to NULL so that when we backtrack the first time
3271 and need to test it, it's not garbage. */
3272 const char *match_end = NULL;
3273
3274 /* Used when we pop values we don't care about. */
3275 const char **reg_dummy;
3276 register_info_type *reg_info_dummy;
3277
3278 #ifdef DEBUG
3279 /* Counts the total number of registers pushed. */
3280 unsigned num_regs_pushed = 0;
3281 #endif
3282
3283 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3284
3285 INIT_FAIL_STACK ();
3286
3287 /* Do not bother to initialize all the register variables if there are
3288 no groups in the pattern, as it takes a fair amount of time. If
3289 there are groups, we include space for register 0 (the whole
3290 pattern), even though we never use it, since it simplifies the
3291 array indexing. We should fix this. */
3292 if (bufp->re_nsub)
3293 {
3294 regstart = REGEX_TALLOC (num_regs, const char *);
3295 regend = REGEX_TALLOC (num_regs, const char *);
3296 old_regstart = REGEX_TALLOC (num_regs, const char *);
3297 old_regend = REGEX_TALLOC (num_regs, const char *);
3298 best_regstart = REGEX_TALLOC (num_regs, const char *);
3299 best_regend = REGEX_TALLOC (num_regs, const char *);
3300 reg_info = REGEX_TALLOC (num_regs, register_info_type);
3301 reg_dummy = REGEX_TALLOC (num_regs, const char *);
3302 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3303
3304 if (!(regstart && regend && old_regstart && old_regend && reg_info
3305 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3306 {
3307 FREE_VARIABLES ();
3308 return -2;
3309 }
3310 }
3311 #ifdef REGEX_MALLOC
3312 else
3313 {
3314 /* We must initialize all our variables to NULL, so that
3315 `FREE_VARIABLES' doesn't try to free them. */
3316 regstart = regend = old_regstart = old_regend = best_regstart
3317 = best_regend = reg_dummy = NULL;
3318 reg_info = reg_info_dummy = (register_info_type *) NULL;
3319 }
3320 #endif /* REGEX_MALLOC */
3321
3322 /* The starting position is bogus. */
3323 if (pos < 0 || pos > size1 + size2)
3324 {
3325 FREE_VARIABLES ();
3326 return -1;
3327 }
3328
3329 /* Initialize subexpression text positions to -1 to mark ones that no
3330 start_memory/stop_memory has been seen for. Also initialize the
3331 register information struct. */
3332 for (mcnt = 1; mcnt < num_regs; mcnt++)
3333 {
3334 regstart[mcnt] = regend[mcnt]
3335 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3336
3337 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3338 IS_ACTIVE (reg_info[mcnt]) = 0;
3339 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3340 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3341 }
3342
3343 /* We move `string1' into `string2' if the latter's empty -- but not if
3344 `string1' is null. */
3345 if (size2 == 0 && string1 != NULL)
3346 {
3347 string2 = string1;
3348 size2 = size1;
3349 string1 = 0;
3350 size1 = 0;
3351 }
3352 end1 = string1 + size1;
3353 end2 = string2 + size2;
3354
3355 /* Compute where to stop matching, within the two strings. */
3356 if (stop <= size1)
3357 {
3358 end_match_1 = string1 + stop;
3359 end_match_2 = string2;
3360 }
3361 else
3362 {
3363 end_match_1 = end1;
3364 end_match_2 = string2 + stop - size1;
3365 }
3366
3367 /* `p' scans through the pattern as `d' scans through the data.
3368 `dend' is the end of the input string that `d' points within. `d'
3369 is advanced into the following input string whenever necessary, but
3370 this happens before fetching; therefore, at the beginning of the
3371 loop, `d' can be pointing at the end of a string, but it cannot
3372 equal `string2'. */
3373 if (size1 > 0 && pos <= size1)
3374 {
3375 d = string1 + pos;
3376 dend = end_match_1;
3377 }
3378 else
3379 {
3380 d = string2 + pos - size1;
3381 dend = end_match_2;
3382 }
3383
3384 DEBUG_PRINT1 ("The compiled pattern is: ");
3385 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3386 DEBUG_PRINT1 ("The string to match is: `");
3387 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3388 DEBUG_PRINT1 ("'\n");
3389
3390 /* This loops over pattern commands. It exits by returning from the
3391 function if the match is complete, or it drops through if the match
3392 fails at this starting point in the input data. */
3393 for (;;)
3394 {
3395 DEBUG_PRINT2 ("\n0x%x: ", p);
3396
3397 if (p == pend)
3398 { /* End of pattern means we might have succeeded. */
3399 DEBUG_PRINT1 ("end of pattern ... ");
3400
3401 /* If we haven't matched the entire string, and we want the
3402 longest match, try backtracking. */
3403 if (d != end_match_2)
3404 {
3405 DEBUG_PRINT1 ("backtracking.\n");
3406
3407 if (!FAIL_STACK_EMPTY ())
3408 { /* More failure points to try. */
3409 boolean same_str_p = (FIRST_STRING_P (match_end)
3410 == MATCHING_IN_FIRST_STRING);
3411
3412 /* If exceeds best match so far, save it. */
3413 if (!best_regs_set
3414 || (same_str_p && d > match_end)
3415 || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3416 {
3417 best_regs_set = true;
3418 match_end = d;
3419
3420 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3421
3422 for (mcnt = 1; mcnt < num_regs; mcnt++)
3423 {
3424 best_regstart[mcnt] = regstart[mcnt];
3425 best_regend[mcnt] = regend[mcnt];
3426 }
3427 }
3428 goto fail;
3429 }
3430
3431 /* If no failure points, don't restore garbage. */
3432 else if (best_regs_set)
3433 {
3434 restore_best_regs:
3435 /* Restore best match. It may happen that `dend ==
3436 end_match_1' while the restored d is in string2.
3437 For example, the pattern `x.*y.*z' against the
3438 strings `x-' and `y-z-', if the two strings are
3439 not consecutive in memory. */
3440 DEBUG_PRINT1 ("Restoring best registers.\n");
3441
3442 d = match_end;
3443 dend = ((d >= string1 && d <= end1)
3444 ? end_match_1 : end_match_2);
3445
3446 for (mcnt = 1; mcnt < num_regs; mcnt++)
3447 {
3448 regstart[mcnt] = best_regstart[mcnt];
3449 regend[mcnt] = best_regend[mcnt];
3450 }
3451 }
3452 } /* d != end_match_2 */
3453
3454 DEBUG_PRINT1 ("Accepting match.\n");
3455
3456 /* If caller wants register contents data back, do it. */
3457 if (regs && !bufp->no_sub)
3458 {
3459 /* Have the register data arrays been allocated? */
3460 if (bufp->regs_allocated == REGS_UNALLOCATED)
3461 { /* No. So allocate them with malloc. We need one
3462 extra element beyond `num_regs' for the `-1' marker
3463 GNU code uses. */
3464 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3465 regs->start = TALLOC (regs->num_regs, regoff_t);
3466 regs->end = TALLOC (regs->num_regs, regoff_t);
3467 if (regs->start == NULL || regs->end == NULL)
3468 return -2;
3469 bufp->regs_allocated = REGS_REALLOCATE;
3470 }
3471 else if (bufp->regs_allocated == REGS_REALLOCATE)
3472 { /* Yes. If we need more elements than were already
3473 allocated, reallocate them. If we need fewer, just
3474 leave it alone. */
3475 if (regs->num_regs < num_regs + 1)
3476 {
3477 regs->num_regs = num_regs + 1;
3478 RETALLOC (regs->start, regs->num_regs, regoff_t);
3479 RETALLOC (regs->end, regs->num_regs, regoff_t);
3480 if (regs->start == NULL || regs->end == NULL)
3481 return -2;
3482 }
3483 }
3484 else
3485 assert (bufp->regs_allocated == REGS_FIXED);
3486
3487 /* Convert the pointer data in `regstart' and `regend' to
3488 indices. Register zero has to be set differently,
3489 since we haven't kept track of any info for it. */
3490 if (regs->num_regs > 0)
3491 {
3492 regs->start[0] = pos;
3493 regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3494 : d - string2 + size1);
3495 }
3496
3497 /* Go through the first `min (num_regs, regs->num_regs)'
3498 registers, since that is all we initialized. */
3499 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3500 {
3501 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3502 regs->start[mcnt] = regs->end[mcnt] = -1;
3503 else
3504 {
3505 regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3506 regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3507 }
3508 }
3509
3510 /* If the regs structure we return has more elements than
3511 were in the pattern, set the extra elements to -1. If
3512 we (re)allocated the registers, this is the case,
3513 because we always allocate enough to have at least one
3514 -1 at the end. */
3515 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3516 regs->start[mcnt] = regs->end[mcnt] = -1;
3517 } /* regs && !bufp->no_sub */
3518
3519 FREE_VARIABLES ();
3520 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3521 nfailure_points_pushed, nfailure_points_popped,
3522 nfailure_points_pushed - nfailure_points_popped);
3523 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3524
3525 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3526 ? string1
3527 : string2 - size1);
3528
3529 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3530
3531 return mcnt;
3532 }
3533
3534 /* Otherwise match next pattern command. */
3535 #ifdef SWITCH_ENUM_BUG
3536 switch ((int) ((re_opcode_t) *p++))
3537 #else
3538 switch ((re_opcode_t) *p++)
3539 #endif
3540 {
3541 /* Ignore these. Used to ignore the n of succeed_n's which
3542 currently have n == 0. */
3543 case no_op:
3544 DEBUG_PRINT1 ("EXECUTING no_op.\n");
3545 break;
3546
3547
3548 /* Match the next n pattern characters exactly. The following
3549 byte in the pattern defines n, and the n bytes after that
3550 are the characters to match. */
3551 case exactn:
3552 mcnt = *p++;
3553 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3554
3555 /* This is written out as an if-else so we don't waste time
3556 testing `translate' inside the loop. */
3557 if (translate)
3558 {
3559 do
3560 {
3561 PREFETCH ();
3562 if (translate[(unsigned char) *d++] != (char) *p++)
3563 goto fail;
3564 }
3565 while (--mcnt);
3566 }
3567 else
3568 {
3569 do
3570 {
3571 PREFETCH ();
3572 if (*d++ != (char) *p++) goto fail;
3573 }
3574 while (--mcnt);
3575 }
3576 SET_REGS_MATCHED ();
3577 break;
3578
3579
3580 /* Match any character except possibly a newline or a null. */
3581 case anychar:
3582 DEBUG_PRINT1 ("EXECUTING anychar.\n");
3583
3584 PREFETCH ();
3585
3586 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3587 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3588 goto fail;
3589
3590 SET_REGS_MATCHED ();
3591 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
3592 d++;
3593 break;
3594
3595
3596 case charset:
3597 case charset_not:
3598 {
3599 register unsigned char c;
3600 boolean not = (re_opcode_t) *(p - 1) == charset_not;
3601
3602 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3603
3604 PREFETCH ();
3605 c = TRANSLATE (*d); /* The character to match. */
3606
3607 /* Cast to `unsigned' instead of `unsigned char' in case the
3608 bit list is a full 32 bytes long. */
3609 if (c < (unsigned) (*p * BYTEWIDTH)
3610 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3611 not = !not;
3612
3613 p += 1 + *p;
3614
3615 if (!not) goto fail;
3616
3617 SET_REGS_MATCHED ();
3618 d++;
3619 break;
3620 }
3621
3622
3623 /* The beginning of a group is represented by start_memory.
3624 The arguments are the register number in the next byte, and the
3625 number of groups inner to this one in the next. The text
3626 matched within the group is recorded (in the internal
3627 registers data structure) under the register number. */
3628 case start_memory:
3629 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3630
3631 /* Find out if this group can match the empty string. */
3632 p1 = p; /* To send to group_match_null_string_p. */
3633
3634 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3635 REG_MATCH_NULL_STRING_P (reg_info[*p])
3636 = group_match_null_string_p (&p1, pend, reg_info);
3637
3638 /* Save the position in the string where we were the last time
3639 we were at this open-group operator in case the group is
3640 operated upon by a repetition operator, e.g., with `(a*)*b'
3641 against `ab'; then we want to ignore where we are now in
3642 the string in case this attempt to match fails. */
3643 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3644 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3645 : regstart[*p];
3646 DEBUG_PRINT2 (" old_regstart: %d\n",
3647 POINTER_TO_OFFSET (old_regstart[*p]));
3648
3649 regstart[*p] = d;
3650 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3651
3652 IS_ACTIVE (reg_info[*p]) = 1;
3653 MATCHED_SOMETHING (reg_info[*p]) = 0;
3654
3655 /* This is the new highest active register. */
3656 highest_active_reg = *p;
3657
3658 /* If nothing was active before, this is the new lowest active
3659 register. */
3660 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3661 lowest_active_reg = *p;
3662
3663 /* Move past the register number and inner group count. */
3664 p += 2;
3665 break;
3666
3667
3668 /* The stop_memory opcode represents the end of a group. Its
3669 arguments are the same as start_memory's: the register
3670 number, and the number of inner groups. */
3671 case stop_memory:
3672 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3673
3674 /* We need to save the string position the last time we were at
3675 this close-group operator in case the group is operated
3676 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3677 against `aba'; then we want to ignore where we are now in
3678 the string in case this attempt to match fails. */
3679 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3680 ? REG_UNSET (regend[*p]) ? d : regend[*p]
3681 : regend[*p];
3682 DEBUG_PRINT2 (" old_regend: %d\n",
3683 POINTER_TO_OFFSET (old_regend[*p]));
3684
3685 regend[*p] = d;
3686 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3687
3688 /* This register isn't active anymore. */
3689 IS_ACTIVE (reg_info[*p]) = 0;
3690
3691 /* If this was the only register active, nothing is active
3692 anymore. */
3693 if (lowest_active_reg == highest_active_reg)
3694 {
3695 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3696 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3697 }
3698 else
3699 { /* We must scan for the new highest active register, since
3700 it isn't necessarily one less than now: consider
3701 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
3702 new highest active register is 1. */
3703 unsigned char r = *p - 1;
3704 while (r > 0 && !IS_ACTIVE (reg_info[r]))
3705 r--;
3706
3707 /* If we end up at register zero, that means that we saved
3708 the registers as the result of an `on_failure_jump', not
3709 a `start_memory', and we jumped to past the innermost
3710 `stop_memory'. For example, in ((.)*) we save
3711 registers 1 and 2 as a result of the *, but when we pop
3712 back to the second ), we are at the stop_memory 1.
3713 Thus, nothing is active. */
3714 if (r == 0)
3715 {
3716 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3717 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3718 }
3719 else
3720 highest_active_reg = r;
3721 }
3722
3723 /* If just failed to match something this time around with a
3724 group that's operated on by a repetition operator, try to
3725 force exit from the ``loop'', and restore the register
3726 information for this group that we had before trying this
3727 last match. */
3728 if ((!MATCHED_SOMETHING (reg_info[*p])
3729 || (re_opcode_t) p[-3] == start_memory)
3730 && (p + 2) < pend)
3731 {
3732 boolean is_a_jump_n = false;
3733
3734 p1 = p + 2;
3735 mcnt = 0;
3736 switch ((re_opcode_t) *p1++)
3737 {
3738 case jump_n:
3739 is_a_jump_n = true;
3740 case pop_failure_jump:
3741 case maybe_pop_jump:
3742 case jump:
3743 case dummy_failure_jump:
3744 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3745 if (is_a_jump_n)
3746 p1 += 2;
3747 break;
3748
3749 default:
3750 /* do nothing */ ;
3751 }
3752 p1 += mcnt;
3753
3754 /* If the next operation is a jump backwards in the pattern
3755 to an on_failure_jump right before the start_memory
3756 corresponding to this stop_memory, exit from the loop
3757 by forcing a failure after pushing on the stack the
3758 on_failure_jump's jump in the pattern, and d. */
3759 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3760 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3761 {
3762 /* If this group ever matched anything, then restore
3763 what its registers were before trying this last
3764 failed match, e.g., with `(a*)*b' against `ab' for
3765 regstart[1], and, e.g., with `((a*)*(b*)*)*'
3766 against `aba' for regend[3].
3767
3768 Also restore the registers for inner groups for,
3769 e.g., `((a*)(b*))*' against `aba' (register 3 would
3770 otherwise get trashed). */
3771
3772 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3773 {
3774 unsigned r;
3775
3776 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3777
3778 /* Restore this and inner groups' (if any) registers. */
3779 for (r = *p; r < *p + *(p + 1); r++)
3780 {
3781 regstart[r] = old_regstart[r];
3782
3783 /* xx why this test? */
3784 if ((int) old_regend[r] >= (int) regstart[r])
3785 regend[r] = old_regend[r];
3786 }
3787 }
3788 p1++;
3789 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3790 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3791
3792 goto fail;
3793 }
3794 }
3795
3796 /* Move past the register number and the inner group count. */
3797 p += 2;
3798 break;
3799
3800
3801 /* \<digit> has been turned into a `duplicate' command which is
3802 followed by the numeric value of <digit> as the register number. */
3803 case duplicate:
3804 {
3805 register const char *d2, *dend2;
3806 int regno = *p++; /* Get which register to match against. */
3807 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3808
3809 /* Can't back reference a group which we've never matched. */
3810 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3811 goto fail;
3812
3813 /* Where in input to try to start matching. */
3814 d2 = regstart[regno];
3815
3816 /* Where to stop matching; if both the place to start and
3817 the place to stop matching are in the same string, then
3818 set to the place to stop, otherwise, for now have to use
3819 the end of the first string. */
3820
3821 dend2 = ((FIRST_STRING_P (regstart[regno])
3822 == FIRST_STRING_P (regend[regno]))
3823 ? regend[regno] : end_match_1);
3824 for (;;)
3825 {
3826 /* If necessary, advance to next segment in register
3827 contents. */
3828 while (d2 == dend2)
3829 {
3830 if (dend2 == end_match_2) break;
3831 if (dend2 == regend[regno]) break;
3832
3833 /* End of string1 => advance to string2. */
3834 d2 = string2;
3835 dend2 = regend[regno];
3836 }
3837 /* At end of register contents => success */
3838 if (d2 == dend2) break;
3839
3840 /* If necessary, advance to next segment in data. */
3841 PREFETCH ();
3842
3843 /* How many characters left in this segment to match. */
3844 mcnt = dend - d;
3845
3846 /* Want how many consecutive characters we can match in
3847 one shot, so, if necessary, adjust the count. */
3848 if (mcnt > dend2 - d2)
3849 mcnt = dend2 - d2;
3850
3851 /* Compare that many; failure if mismatch, else move
3852 past them. */
3853 if (translate
3854 ? bcmp_translate (d, d2, mcnt, translate)
3855 : bcmp (d, d2, mcnt))
3856 goto fail;
3857 d += mcnt, d2 += mcnt;
3858 }
3859 }
3860 break;
3861
3862
3863 /* begline matches the empty string at the beginning of the string
3864 (unless `not_bol' is set in `bufp'), and, if
3865 `newline_anchor' is set, after newlines. */
3866 case begline:
3867 DEBUG_PRINT1 ("EXECUTING begline.\n");
3868
3869 if (AT_STRINGS_BEG (d))
3870 {
3871 if (!bufp->not_bol) break;
3872 }
3873 else if (d[-1] == '\n' && bufp->newline_anchor)
3874 {
3875 break;
3876 }
3877 /* In all other cases, we fail. */
3878 goto fail;
3879
3880
3881 /* endline is the dual of begline. */
3882 case endline:
3883 DEBUG_PRINT1 ("EXECUTING endline.\n");
3884
3885 if (AT_STRINGS_END (d))
3886 {
3887 if (!bufp->not_eol) break;
3888 }
3889
3890 /* We have to ``prefetch'' the next character. */
3891 else if ((d == end1 ? *string2 : *d) == '\n'
3892 && bufp->newline_anchor)
3893 {
3894 break;
3895 }
3896 goto fail;
3897
3898
3899 /* Match at the very beginning of the data. */
3900 case begbuf:
3901 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3902 if (AT_STRINGS_BEG (d))
3903 break;
3904 goto fail;
3905
3906
3907 /* Match at the very end of the data. */
3908 case endbuf:
3909 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3910 if (AT_STRINGS_END (d))
3911 break;
3912 goto fail;
3913
3914
3915 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
3916 pushes NULL as the value for the string on the stack. Then
3917 `pop_failure_point' will keep the current value for the
3918 string, instead of restoring it. To see why, consider
3919 matching `foo\nbar' against `.*\n'. The .* matches the foo;
3920 then the . fails against the \n. But the next thing we want
3921 to do is match the \n against the \n; if we restored the
3922 string value, we would be back at the foo.
3923
3924 Because this is used only in specific cases, we don't need to
3925 check all the things that `on_failure_jump' does, to make
3926 sure the right things get saved on the stack. Hence we don't
3927 share its code. The only reason to push anything on the
3928 stack at all is that otherwise we would have to change
3929 `anychar's code to do something besides goto fail in this
3930 case; that seems worse than this. */
3931 case on_failure_keep_string_jump:
3932 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3933
3934 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3935 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3936
3937 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3938 break;
3939
3940
3941 /* Uses of on_failure_jump:
3942
3943 Each alternative starts with an on_failure_jump that points
3944 to the beginning of the next alternative. Each alternative
3945 except the last ends with a jump that in effect jumps past
3946 the rest of the alternatives. (They really jump to the
3947 ending jump of the following alternative, because tensioning
3948 these jumps is a hassle.)
3949
3950 Repeats start with an on_failure_jump that points past both
3951 the repetition text and either the following jump or
3952 pop_failure_jump back to this on_failure_jump. */
3953 case on_failure_jump:
3954 on_failure:
3955 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3956
3957 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3958 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3959
3960 /* If this on_failure_jump comes right before a group (i.e.,
3961 the original * applied to a group), save the information
3962 for that group and all inner ones, so that if we fail back
3963 to this point, the group's information will be correct.
3964 For example, in \(a*\)*\1, we need the preceding group,
3965 and in \(\(a*\)b*\)\2, we need the inner group. */
3966
3967 /* We can't use `p' to check ahead because we push
3968 a failure point to `p + mcnt' after we do this. */
3969 p1 = p;
3970
3971 /* We need to skip no_op's before we look for the
3972 start_memory in case this on_failure_jump is happening as
3973 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3974 against aba. */
3975 while (p1 < pend && (re_opcode_t) *p1 == no_op)
3976 p1++;
3977
3978 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3979 {
3980 /* We have a new highest active register now. This will
3981 get reset at the start_memory we are about to get to,
3982 but we will have saved all the registers relevant to
3983 this repetition op, as described above. */
3984 highest_active_reg = *(p1 + 1) + *(p1 + 2);
3985 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3986 lowest_active_reg = *(p1 + 1);
3987 }
3988
3989 DEBUG_PRINT1 (":\n");
3990 PUSH_FAILURE_POINT (p + mcnt, d, -2);
3991 break;
3992
3993
3994 /* A smart repeat ends with `maybe_pop_jump'.
3995 We change it to either `pop_failure_jump' or `jump'. */
3996 case maybe_pop_jump:
3997 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3998 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3999 {
4000 register unsigned char *p2 = p;
4001
4002 /* Compare the beginning of the repeat with what in the
4003 pattern follows its end. If we can establish that there
4004 is nothing that they would both match, i.e., that we
4005 would have to backtrack because of (as in, e.g., `a*a')
4006 then we can change to pop_failure_jump, because we'll
4007 never have to backtrack.
4008
4009 This is not true in the case of alternatives: in
4010 `(a|ab)*' we do need to backtrack to the `ab' alternative
4011 (e.g., if the string was `ab'). But instead of trying to
4012 detect that here, the alternative has put on a dummy
4013 failure point which is what we will end up popping. */
4014
4015 /* Skip over open/close-group commands. */
4016 while (p2 + 2 < pend
4017 && ((re_opcode_t) *p2 == stop_memory
4018 || (re_opcode_t) *p2 == start_memory))
4019 p2 += 3; /* Skip over args, too. */
4020
4021 /* If we're at the end of the pattern, we can change. */
4022 if (p2 == pend)
4023 {
4024 /* Consider what happens when matching ":\(.*\)"
4025 against ":/". I don't really understand this code
4026 yet. */
4027 p[-3] = (unsigned char) pop_failure_jump;
4028 DEBUG_PRINT1
4029 (" End of pattern: change to `pop_failure_jump'.\n");
4030 }
4031
4032 else if ((re_opcode_t) *p2 == exactn
4033 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4034 {
4035 register unsigned char c
4036 = *p2 == (unsigned char) endline ? '\n' : p2[2];
4037 p1 = p + mcnt;
4038
4039 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4040 to the `maybe_finalize_jump' of this case. Examine what
4041 follows. */
4042 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4043 {
4044 p[-3] = (unsigned char) pop_failure_jump;
4045 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4046 c, p1[5]);
4047 }
4048
4049 else if ((re_opcode_t) p1[3] == charset
4050 || (re_opcode_t) p1[3] == charset_not)
4051 {
4052 int not = (re_opcode_t) p1[3] == charset_not;
4053
4054 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4055 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4056 not = !not;
4057
4058 /* `not' is equal to 1 if c would match, which means
4059 that we can't change to pop_failure_jump. */
4060 if (!not)
4061 {
4062 p[-3] = (unsigned char) pop_failure_jump;
4063 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4064 }
4065 }
4066 }
4067 }
4068 p -= 2; /* Point at relative address again. */
4069 if ((re_opcode_t) p[-1] != pop_failure_jump)
4070 {
4071 p[-1] = (unsigned char) jump;
4072 DEBUG_PRINT1 (" Match => jump.\n");
4073 goto unconditional_jump;
4074 }
4075 /* Note fall through. */
4076
4077
4078 /* The end of a simple repeat has a pop_failure_jump back to
4079 its matching on_failure_jump, where the latter will push a
4080 failure point. The pop_failure_jump takes off failure
4081 points put on by this pop_failure_jump's matching
4082 on_failure_jump; we got through the pattern to here from the
4083 matching on_failure_jump, so didn't fail. */
4084 case pop_failure_jump:
4085 {
4086 /* We need to pass separate storage for the lowest and
4087 highest registers, even though we don't care about the
4088 actual values. Otherwise, we will restore only one
4089 register from the stack, since lowest will == highest in
4090 `pop_failure_point'. */
4091 unsigned dummy_low_reg, dummy_high_reg;
4092 unsigned char *pdummy;
4093 const char *sdummy;
4094
4095 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4096 POP_FAILURE_POINT (sdummy, pdummy,
4097 dummy_low_reg, dummy_high_reg,
4098 reg_dummy, reg_dummy, reg_info_dummy);
4099 }
4100 /* Note fall through. */
4101
4102
4103 /* Unconditionally jump (without popping any failure points). */
4104 case jump:
4105 unconditional_jump:
4106 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
4107 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4108 p += mcnt; /* Do the jump. */
4109 DEBUG_PRINT2 ("(to 0x%x).\n", p);
4110 break;
4111
4112
4113 /* We need this opcode so we can detect where alternatives end
4114 in `group_match_null_string_p' et al. */
4115 case jump_past_alt:
4116 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4117 goto unconditional_jump;
4118
4119
4120 /* Normally, the on_failure_jump pushes a failure point, which
4121 then gets popped at pop_failure_jump. We will end up at
4122 pop_failure_jump, also, and with a pattern of, say, `a+', we
4123 are skipping over the on_failure_jump, so we have to push
4124 something meaningless for pop_failure_jump to pop. */
4125 case dummy_failure_jump:
4126 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4127 /* It doesn't matter what we push for the string here. What
4128 the code at `fail' tests is the value for the pattern. */
4129 PUSH_FAILURE_POINT (0, 0, -2);
4130 goto unconditional_jump;
4131
4132
4133 /* At the end of an alternative, we need to push a dummy failure
4134 point in case we are followed by a `pop_failure_jump', because
4135 we don't want the failure point for the alternative to be
4136 popped. For example, matching `(a|ab)*' against `aab'
4137 requires that we match the `ab' alternative. */
4138 case push_dummy_failure:
4139 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4140 /* See comments just above at `dummy_failure_jump' about the
4141 two zeroes. */
4142 PUSH_FAILURE_POINT (0, 0, -2);
4143 break;
4144
4145 /* Have to succeed matching what follows at least n times.
4146 After that, handle like `on_failure_jump'. */
4147 case succeed_n:
4148 EXTRACT_NUMBER (mcnt, p + 2);
4149 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4150
4151 assert (mcnt >= 0);
4152 /* Originally, this is how many times we HAVE to succeed. */
4153 if (mcnt > 0)
4154 {
4155 mcnt--;
4156 p += 2;
4157 STORE_NUMBER_AND_INCR (p, mcnt);
4158 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
4159 }
4160 else if (mcnt == 0)
4161 {
4162 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
4163 p[2] = (unsigned char) no_op;
4164 p[3] = (unsigned char) no_op;
4165 goto on_failure;
4166 }
4167 break;
4168
4169 case jump_n:
4170 EXTRACT_NUMBER (mcnt, p + 2);
4171 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4172
4173 /* Originally, this is how many times we CAN jump. */
4174 if (mcnt)
4175 {
4176 mcnt--;
4177 STORE_NUMBER (p + 2, mcnt);
4178 goto unconditional_jump;
4179 }
4180 /* If don't have to jump any more, skip over the rest of command. */
4181 else
4182 p += 4;
4183 break;
4184
4185 case set_number_at:
4186 {
4187 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4188
4189 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4190 p1 = p + mcnt;
4191 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4192 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
4193 STORE_NUMBER (p1, mcnt);
4194 break;
4195 }
4196
4197 case wordbound:
4198 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4199 if (AT_WORD_BOUNDARY (d))
4200 break;
4201 goto fail;
4202
4203 case notwordbound:
4204 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4205 if (AT_WORD_BOUNDARY (d))
4206 goto fail;
4207 break;
4208
4209 case wordbeg:
4210 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4211 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4212 break;
4213 goto fail;
4214
4215 case wordend:
4216 DEBUG_PRINT1 ("EXECUTING wordend.\n");
4217 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4218 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4219 break;
4220 goto fail;
4221
4222 #ifdef emacs
4223 #ifdef emacs19
4224 case before_dot:
4225 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4226 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4227 goto fail;
4228 break;
4229
4230 case at_dot:
4231 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4232 if (PTR_CHAR_POS ((unsigned char *) d) != point)
4233 goto fail;
4234 break;
4235
4236 case after_dot:
4237 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4238 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4239 goto fail;
4240 break;
4241 #else /* not emacs19 */
4242 case at_dot:
4243 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4244 if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4245 goto fail;
4246 break;
4247 #endif /* not emacs19 */
4248
4249 case syntaxspec:
4250 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4251 mcnt = *p++;
4252 goto matchsyntax;
4253
4254 case wordchar:
4255 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4256 mcnt = (int) Sword;
4257 matchsyntax:
4258 PREFETCH ();
4259 if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4260 goto fail;
4261 SET_REGS_MATCHED ();
4262 break;
4263
4264 case notsyntaxspec:
4265 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4266 mcnt = *p++;
4267 goto matchnotsyntax;
4268
4269 case notwordchar:
4270 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4271 mcnt = (int) Sword;
4272 matchnotsyntax:
4273 PREFETCH ();
4274 if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4275 goto fail;
4276 SET_REGS_MATCHED ();
4277 break;
4278
4279 #else /* not emacs */
4280 case wordchar:
4281 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4282 PREFETCH ();
4283 if (!WORDCHAR_P (d))
4284 goto fail;
4285 SET_REGS_MATCHED ();
4286 d++;
4287 break;
4288
4289 case notwordchar:
4290 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4291 PREFETCH ();
4292 if (WORDCHAR_P (d))
4293 goto fail;
4294 SET_REGS_MATCHED ();
4295 d++;
4296 break;
4297 #endif /* not emacs */
4298
4299 default:
4300 abort ();
4301 }
4302 continue; /* Successfully executed one pattern command; keep going. */
4303
4304
4305 /* We goto here if a matching operation fails. */
4306 fail:
4307 if (!FAIL_STACK_EMPTY ())
4308 { /* A restart point is known. Restore to that state. */
4309 DEBUG_PRINT1 ("\nFAIL:\n");
4310 POP_FAILURE_POINT (d, p,
4311 lowest_active_reg, highest_active_reg,
4312 regstart, regend, reg_info);
4313
4314 /* If this failure point is a dummy, try the next one. */
4315 if (!p)
4316 goto fail;
4317
4318 /* If we failed to the end of the pattern, don't examine *p. */
4319 assert (p <= pend);
4320 if (p < pend)
4321 {
4322 boolean is_a_jump_n = false;
4323
4324 /* If failed to a backwards jump that's part of a repetition
4325 loop, need to pop this failure point and use the next one. */
4326 switch ((re_opcode_t) *p)
4327 {
4328 case jump_n:
4329 is_a_jump_n = true;
4330 case maybe_pop_jump:
4331 case pop_failure_jump:
4332 case jump:
4333 p1 = p + 1;
4334 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4335 p1 += mcnt;
4336
4337 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4338 || (!is_a_jump_n
4339 && (re_opcode_t) *p1 == on_failure_jump))
4340 goto fail;
4341 break;
4342 default:
4343 /* do nothing */ ;
4344 }
4345 }
4346
4347 if (d >= string1 && d <= end1)
4348 dend = end_match_1;
4349 }
4350 else
4351 break; /* Matching at this starting point really fails. */
4352 } /* for (;;) */
4353
4354 if (best_regs_set)
4355 goto restore_best_regs;
4356
4357 FREE_VARIABLES ();
4358
4359 return -1; /* Failure to match. */
4360 } /* re_match_2 */
4361
4362 /* Subroutine definitions for re_match_2. */
4363
4364
4365 /* We are passed P pointing to a register number after a start_memory.
4366
4367 Return true if the pattern up to the corresponding stop_memory can
4368 match the empty string, and false otherwise.
4369
4370 If we find the matching stop_memory, sets P to point to one past its number.
4371 Otherwise, sets P to an undefined byte less than or equal to END.
4372
4373 We don't handle duplicates properly (yet). */
4374
4375 static boolean
4376 group_match_null_string_p (p, end, reg_info)
/* [<][>][^][v][top][bottom][index][help] */
4377 unsigned char **p, *end;
4378 register_info_type *reg_info;
4379 {
4380 int mcnt;
4381 /* Point to after the args to the start_memory. */
4382 unsigned char *p1 = *p + 2;
4383
4384 while (p1 < end)
4385 {
4386 /* Skip over opcodes that can match nothing, and return true or
4387 false, as appropriate, when we get to one that can't, or to the
4388 matching stop_memory. */
4389
4390 switch ((re_opcode_t) *p1)
4391 {
4392 /* Could be either a loop or a series of alternatives. */
4393 case on_failure_jump:
4394 p1++;
4395 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4396
4397 /* If the next operation is not a jump backwards in the
4398 pattern. */
4399
4400 if (mcnt >= 0)
4401 {
4402 /* Go through the on_failure_jumps of the alternatives,
4403 seeing if any of the alternatives cannot match nothing.
4404 The last alternative starts with only a jump,
4405 whereas the rest start with on_failure_jump and end
4406 with a jump, e.g., here is the pattern for `a|b|c':
4407
4408 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4409 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4410 /exactn/1/c
4411
4412 So, we have to first go through the first (n-1)
4413 alternatives and then deal with the last one separately. */
4414
4415
4416 /* Deal with the first (n-1) alternatives, which start
4417 with an on_failure_jump (see above) that jumps to right
4418 past a jump_past_alt. */
4419
4420 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4421 {
4422 /* `mcnt' holds how many bytes long the alternative
4423 is, including the ending `jump_past_alt' and
4424 its number. */
4425
4426 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4427 reg_info))
4428 return false;
4429
4430 /* Move to right after this alternative, including the
4431 jump_past_alt. */
4432 p1 += mcnt;
4433
4434 /* Break if it's the beginning of an n-th alternative
4435 that doesn't begin with an on_failure_jump. */
4436 if ((re_opcode_t) *p1 != on_failure_jump)
4437 break;
4438
4439 /* Still have to check that it's not an n-th
4440 alternative that starts with an on_failure_jump. */
4441 p1++;
4442 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4443 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4444 {
4445 /* Get to the beginning of the n-th alternative. */
4446 p1 -= 3;
4447 break;
4448 }
4449 }
4450
4451 /* Deal with the last alternative: go back and get number
4452 of the `jump_past_alt' just before it. `mcnt' contains
4453 the length of the alternative. */
4454 EXTRACT_NUMBER (mcnt, p1 - 2);
4455
4456 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4457 return false;
4458
4459 p1 += mcnt; /* Get past the n-th alternative. */
4460 } /* if mcnt > 0 */
4461 break;
4462
4463
4464 case stop_memory:
4465 assert (p1[1] == **p);
4466 *p = p1 + 2;
4467 return true;
4468
4469
4470 default:
4471 if (!common_op_match_null_string_p (&p1, end, reg_info))
4472 return false;
4473 }
4474 } /* while p1 < end */
4475
4476 return false;
4477 } /* group_match_null_string_p */
4478
4479
4480 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4481 It expects P to be the first byte of a single alternative and END one
4482 byte past the last. The alternative can contain groups. */
4483
4484 static boolean
4485 alt_match_null_string_p (p, end, reg_info)
/* [<][>][^][v][top][bottom][index][help] */
4486 unsigned char *p, *end;
4487 register_info_type *reg_info;
4488 {
4489 int mcnt;
4490 unsigned char *p1 = p;
4491
4492 while (p1 < end)
4493 {
4494 /* Skip over opcodes that can match nothing, and break when we get
4495 to one that can't. */
4496
4497 switch ((re_opcode_t) *p1)
4498 {
4499 /* It's a loop. */
4500 case on_failure_jump:
4501 p1++;
4502 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4503 p1 += mcnt;
4504 break;
4505
4506 default:
4507 if (!common_op_match_null_string_p (&p1, end, reg_info))
4508 return false;
4509 }
4510 } /* while p1 < end */
4511
4512 return true;
4513 } /* alt_match_null_string_p */
4514
4515
4516 /* Deals with the ops common to group_match_null_string_p and
4517 alt_match_null_string_p.
4518
4519 Sets P to one after the op and its arguments, if any. */
4520
4521 static boolean
4522 common_op_match_null_string_p (p, end, reg_info)
/* [<][>][^][v][top][bottom][index][help] */
4523 unsigned char **p, *end;
4524 register_info_type *reg_info;
4525 {
4526 int mcnt;
4527 boolean ret;
4528 int reg_no;
4529 unsigned char *p1 = *p;
4530
4531 switch ((re_opcode_t) *p1++)
4532 {
4533 case no_op:
4534 case begline:
4535 case endline:
4536 case begbuf:
4537 case endbuf:
4538 case wordbeg:
4539 case wordend:
4540 case wordbound:
4541 case notwordbound:
4542 #ifdef emacs
4543 case before_dot:
4544 case at_dot:
4545 case after_dot:
4546 #endif
4547 break;
4548
4549 case start_memory:
4550 reg_no = *p1;
4551 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4552 ret = group_match_null_string_p (&p1, end, reg_info);
4553
4554 /* Have to set this here in case we're checking a group which
4555 contains a group and a back reference to it. */
4556
4557 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4558 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4559
4560 if (!ret)
4561 return false;
4562 break;
4563
4564 /* If this is an optimized succeed_n for zero times, make the jump. */
4565 case jump:
4566 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4567 if (mcnt >= 0)
4568 p1 += mcnt;
4569 else
4570 return false;
4571 break;
4572
4573 case succeed_n:
4574 /* Get to the number of times to succeed. */
4575 p1 += 2;
4576 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4577
4578 if (mcnt == 0)
4579 {
4580 p1 -= 4;
4581 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4582 p1 += mcnt;
4583 }
4584 else
4585 return false;
4586 break;
4587
4588 case duplicate:
4589 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4590 return false;
4591 break;
4592
4593 case set_number_at:
4594 p1 += 4;
4595
4596 default:
4597 /* All other opcodes mean we cannot match the empty string. */
4598 return false;
4599 }
4600
4601 *p = p1;
4602 return true;
4603 } /* common_op_match_null_string_p */
4604
4605
4606 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4607 bytes; nonzero otherwise. */
4608
4609 static int
4610 bcmp_translate (s1, s2, len, translate)
/* [<][>][^][v][top][bottom][index][help] */
4611 unsigned char *s1, *s2;
4612 register int len;
4613 char *translate;
4614 {
4615 register unsigned char *p1 = s1, *p2 = s2;
4616 while (len)
4617 {
4618 if (translate[*p1++] != translate[*p2++]) return 1;
4619 len--;
4620 }
4621 return 0;
4622 }
4623
4624 /* Entry points for GNU code. */
4625
4626 /* re_compile_pattern is the GNU regular expression compiler: it
4627 compiles PATTERN (of length SIZE) and puts the result in BUFP.
4628 Returns 0 if the pattern was valid, otherwise an error string.
4629
4630 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4631 are set in BUFP on entry.
4632
4633 We call regex_compile to do the actual compilation. */
4634
4635 const char *
4636 re_compile_pattern (pattern, length, bufp)
/* [<][>][^][v][top][bottom][index][help] */
4637 const char *pattern;
4638 int length;
4639 struct re_pattern_buffer *bufp;
4640 {
4641 reg_errcode_t ret;
4642
4643 /* GNU code is written to assume at least RE_NREGS registers will be set
4644 (and at least one extra will be -1). */
4645 bufp->regs_allocated = REGS_UNALLOCATED;
4646
4647 /* And GNU code determines whether or not to get register information
4648 by passing null for the REGS argument to re_match, etc., not by
4649 setting no_sub. */
4650 bufp->no_sub = 0;
4651
4652 /* Match anchors at newline. */
4653 bufp->newline_anchor = 1;
4654
4655 ret = regex_compile (pattern, length, re_syntax_options, bufp);
4656
4657 return re_error_msg[(int) ret];
4658 }
4659
4660 /* Entry points compatible with 4.2 BSD regex library. We don't define
4661 them if this is an Emacs or POSIX compilation. */
4662
4663 #if !defined (emacs) && !defined (_POSIX_SOURCE)
4664
4665 /* BSD has one and only one pattern buffer. */
4666 static struct re_pattern_buffer re_comp_buf;
4667
4668 char *
4669 re_comp (s)
/* [<][>][^][v][top][bottom][index][help] */
4670 const char *s;
4671 {
4672 reg_errcode_t ret;
4673
4674 if (!s)
4675 {
4676 if (!re_comp_buf.buffer)
4677 return "No previous regular expression";
4678 return 0;
4679 }
4680
4681 if (!re_comp_buf.buffer)
4682 {
4683 re_comp_buf.buffer = (unsigned char *) malloc (200);
4684 if (re_comp_buf.buffer == NULL)
4685 return "Memory exhausted";
4686 re_comp_buf.allocated = 200;
4687
4688 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4689 if (re_comp_buf.fastmap == NULL)
4690 return "Memory exhausted";
4691 }
4692
4693 /* Since `re_exec' always passes NULL for the `regs' argument, we
4694 don't need to initialize the pattern buffer fields which affect it. */
4695
4696 /* Match anchors at newlines. */
4697 re_comp_buf.newline_anchor = 1;
4698
4699 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4700
4701 /* Yes, we're discarding `const' here. */
4702 return (char *) re_error_msg[(int) ret];
4703 }
4704
4705
4706 int
4707 re_exec (s)
/* [<][>][^][v][top][bottom][index][help] */
4708 const char *s;
4709 {
4710 const int len = strlen (s);
4711 return
4712 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4713 }
4714 #endif /* not emacs and not _POSIX_SOURCE */
4715
4716 /* POSIX.2 functions. Don't define these for Emacs. */
4717
4718 #ifndef emacs
4719
4720 /* regcomp takes a regular expression as a string and compiles it.
4721
4722 PREG is a regex_t *. We do not expect any fields to be initialized,
4723 since POSIX says we shouldn't. Thus, we set
4724
4725 `buffer' to the compiled pattern;
4726 `used' to the length of the compiled pattern;
4727 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4728 REG_EXTENDED bit in CFLAGS is set; otherwise, to
4729 RE_SYNTAX_POSIX_BASIC;
4730 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4731 `fastmap' and `fastmap_accurate' to zero;
4732 `re_nsub' to the number of subexpressions in PATTERN.
4733
4734 PATTERN is the address of the pattern string.
4735
4736 CFLAGS is a series of bits which affect compilation.
4737
4738 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4739 use POSIX basic syntax.
4740
4741 If REG_NEWLINE is set, then . and [^...] don't match newline.
4742 Also, regexec will try a match beginning after every newline.
4743
4744 If REG_ICASE is set, then we considers upper- and lowercase
4745 versions of letters to be equivalent when matching.
4746
4747 If REG_NOSUB is set, then when PREG is passed to regexec, that
4748 routine will report only success or failure, and nothing about the
4749 registers.
4750
4751 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
4752 the return codes and their meanings.) */
4753
4754 int
4755 regcomp (preg, pattern, cflags)
/* [<][>][^][v][top][bottom][index][help] */
4756 regex_t *preg;
4757 const char *pattern;
4758 int cflags;
4759 {
4760 reg_errcode_t ret;
4761 unsigned syntax
4762 = (cflags & REG_EXTENDED) ?
4763 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4764
4765 /* regex_compile will allocate the space for the compiled pattern. */
4766 preg->buffer = 0;
4767 preg->allocated = 0;
4768
4769 /* Don't bother to use a fastmap when searching. This simplifies the
4770 REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4771 characters after newlines into the fastmap. This way, we just try
4772 every character. */
4773 preg->fastmap = 0;
4774
4775 if (cflags & REG_ICASE)
4776 {
4777 unsigned i;
4778
4779 preg->translate = (char *) malloc (CHAR_SET_SIZE);
4780 if (preg->translate == NULL)
4781 return (int) REG_ESPACE;
4782
4783 /* Map uppercase characters to corresponding lowercase ones. */
4784 for (i = 0; i < CHAR_SET_SIZE; i++)
4785 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4786 }
4787 else
4788 preg->translate = NULL;
4789
4790 /* If REG_NEWLINE is set, newlines are treated differently. */
4791 if (cflags & REG_NEWLINE)
4792 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
4793 syntax &= ~RE_DOT_NEWLINE;
4794 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4795 /* It also changes the matching behavior. */
4796 preg->newline_anchor = 1;
4797 }
4798 else
4799 preg->newline_anchor = 0;
4800
4801 preg->no_sub = !!(cflags & REG_NOSUB);
4802
4803 /* POSIX says a null character in the pattern terminates it, so we
4804 can use strlen here in compiling the pattern. */
4805 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4806
4807 /* POSIX doesn't distinguish between an unmatched open-group and an
4808 unmatched close-group: both are REG_EPAREN. */
4809 if (ret == REG_ERPAREN) ret = REG_EPAREN;
4810
4811 return (int) ret;
4812 }
4813
4814
4815 /* regexec searches for a given pattern, specified by PREG, in the
4816 string STRING.
4817
4818 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4819 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
4820 least NMATCH elements, and we set them to the offsets of the
4821 corresponding matched substrings.
4822
4823 EFLAGS specifies `execution flags' which affect matching: if
4824 REG_NOTBOL is set, then ^ does not match at the beginning of the
4825 string; if REG_NOTEOL is set, then $ does not match at the end.
4826
4827 We return 0 if we find a match and REG_NOMATCH if not. */
4828
4829 int
4830 regexec (preg, string, nmatch, pmatch, eflags)
/* [<][>][^][v][top][bottom][index][help] */
4831 const regex_t *preg;
4832 const char *string;
4833 size_t nmatch;
4834 regmatch_t pmatch[];
4835 int eflags;
4836 {
4837 int ret;
4838 struct re_registers regs;
4839 regex_t private_preg;
4840 int len = strlen (string);
4841 boolean want_reg_info = !preg->no_sub && nmatch > 0;
4842
4843 private_preg = *preg;
4844
4845 private_preg.not_bol = !!(eflags & REG_NOTBOL);
4846 private_preg.not_eol = !!(eflags & REG_NOTEOL);
4847
4848 /* The user has told us exactly how many registers to return
4849 information about, via `nmatch'. We have to pass that on to the
4850 matching routines. */
4851 private_preg.regs_allocated = REGS_FIXED;
4852
4853 if (want_reg_info)
4854 {
4855 regs.num_regs = nmatch;
4856 regs.start = TALLOC (nmatch, regoff_t);
4857 regs.end = TALLOC (nmatch, regoff_t);
4858 if (regs.start == NULL || regs.end == NULL)
4859 return (int) REG_NOMATCH;
4860 }
4861
4862 /* Perform the searching operation. */
4863 ret = re_search (&private_preg, string, len,
4864 /* start: */ 0, /* range: */ len,
4865 want_reg_info ? ®s : (struct re_registers *) 0);
4866
4867 /* Copy the register information to the POSIX structure. */
4868 if (want_reg_info)
4869 {
4870 if (ret >= 0)
4871 {
4872 unsigned r;
4873
4874 for (r = 0; r < nmatch; r++)
4875 {
4876 pmatch[r].rm_so = regs.start[r];
4877 pmatch[r].rm_eo = regs.end[r];
4878 }
4879 }
4880
4881 /* If we needed the temporary register info, free the space now. */
4882 free (regs.start);
4883 free (regs.end);
4884 }
4885
4886 /* We want zero return to mean success, unlike `re_search'. */
4887 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4888 }
4889
4890
4891 /* Returns a message corresponding to an error code, ERRCODE, returned
4892 from either regcomp or regexec. We don't use PREG here. */
4893
4894 size_t
4895 regerror (errcode, preg, errbuf, errbuf_size)
/* [<][>][^][v][top][bottom][index][help] */
4896 int errcode;
4897 const regex_t *preg;
4898 char *errbuf;
4899 size_t errbuf_size;
4900 {
4901 const char *msg;
4902 size_t msg_size;
4903
4904 if (errcode < 0
4905 || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4906 /* Only error codes returned by the rest of the code should be passed
4907 to this routine. If we are given anything else, or if other regex
4908 code generates an invalid error code, then the program has a bug.
4909 Dump core so we can fix it. */
4910 abort ();
4911
4912 msg = re_error_msg[errcode];
4913
4914 /* POSIX doesn't require that we do anything in this case, but why
4915 not be nice. */
4916 if (! msg)
4917 msg = "Success";
4918
4919 msg_size = strlen (msg) + 1; /* Includes the null. */
4920
4921 if (errbuf_size != 0)
4922 {
4923 if (msg_size > errbuf_size)
4924 {
4925 strncpy (errbuf, msg, errbuf_size - 1);
4926 errbuf[errbuf_size - 1] = 0;
4927 }
4928 else
4929 strcpy (errbuf, msg);
4930 }
4931
4932 return msg_size;
4933 }
4934
4935
4936 /* Free dynamically allocated space used by PREG. */
4937
4938 void
4939 regfree (preg)
/* [<][>][^][v][top][bottom][index][help] */
4940 regex_t *preg;
4941 {
4942 if (preg->buffer != NULL)
4943 free (preg->buffer);
4944 preg->buffer = NULL;
4945
4946 preg->allocated = 0;
4947 preg->used = 0;
4948
4949 if (preg->fastmap != NULL)
4950 free (preg->fastmap);
4951 preg->fastmap = NULL;
4952 preg->fastmap_accurate = 0;
4953
4954 if (preg->translate != NULL)
4955 free (preg->translate);
4956 preg->translate = NULL;
4957 }
4958
4959 #endif /* not emacs */
4960
4961 /*
4962 Local variables:
4963 make-backup-files: t
4964 version-control: t
4965 trim-versions-without-asking: nil
4966 End:
4967 */