modules/up/src/gnug++/regex.c

/* [<][>]
[^][v][top][bottom][index][help] */

FUNCTIONS

This source file includes following functions.
  1. bcmp
  2. bcopy
  3. bzero
  4. init_syntax_once
  5. SYNTAX
  6. isascii
  7. ISBLANK
  8. ISBLANK
  9. ISGRAPH
  10. ISGRAPH
  11. ISPRINT
  12. ISDIGIT
  13. ISALNUM
  14. ISALPHA
  15. ISCNTRL
  16. ISLOWER
  17. ISPUNCT
  18. ISSPACE
  19. ISUPPER
  20. ISXDIGIT
  21. SIGN_EXTEND_CHAR
  22. SIGN_EXTEND_CHAR
  23. REGEX_REALLOCATE
  24. REGEX_REALLOCATE
  25. FIRST_STRING_P
  26. TALLOC
  27. RETALLOC
  28. REGEX_TALLOC
  29. STREQ
  30. MAX
  31. MIN
  32. STORE_NUMBER
  33. STORE_NUMBER_AND_INCR
  34. EXTRACT_NUMBER
  35. extract_number
  36. EXTRACT_NUMBER
  37. EXTRACT_NUMBER_AND_INCR
  38. extract_number_and_incr
  39. EXTRACT_NUMBER_AND_INCR
  40. DEBUG_STATEMENT
  41. DEBUG_PRINT1
  42. DEBUG_PRINT2
  43. DEBUG_PRINT3
  44. DEBUG_PRINT4
  45. DEBUG_PRINT_COMPILED_PATTERN
  46. DEBUG_PRINT_DOUBLE_STRING
  47. printchar
  48. print_fastmap
  49. print_partial_compiled_pattern
  50. print_compiled_pattern
  51. print_double_string
  52. assert
  53. DEBUG_STATEMENT
  54. DEBUG_PRINT1
  55. DEBUG_PRINT2
  56. DEBUG_PRINT3
  57. DEBUG_PRINT4
  58. DEBUG_PRINT_COMPILED_PATTERN
  59. DEBUG_PRINT_DOUBLE_STRING
  60. re_set_syntax
  61. PATFETCH
  62. PATFETCH_RAW
  63. TRANSLATE
  64. GET_BUFFER_SPACE
  65. BUF_PUSH
  66. BUF_PUSH_2
  67. BUF_PUSH_3
  68. STORE_JUMP
  69. STORE_JUMP2
  70. INSERT_JUMP
  71. INSERT_JUMP2
  72. EXTEND_BUFFER
  73. SET_LIST_BIT
  74. GET_UNSIGNED_NUMBER
  75. IS_CHAR_CLASS
  76. regex_compile
  77. store_op1
  78. store_op2
  79. insert_op1
  80. insert_op2
  81. at_begline_loc_p
  82. at_endline_loc_p
  83. group_in_compile_stack
  84. compile_range
  85. FAIL_STACK_EMPTY
  86. FAIL_STACK_PTR_EMPTY
  87. FAIL_STACK_FULL
  88. FAIL_STACK_TOP
  89. INIT_FAIL_STACK
  90. DOUBLE_FAIL_STACK
  91. PUSH_PATTERN_OP
  92. PUSH_FAILURE_ITEM
  93. POP_FAILURE_ITEM
  94. DEBUG_POP
  95. DEBUG_PUSH
  96. DEBUG_POP
  97. PUSH_FAILURE_POINT
  98. POP_FAILURE_POINT
  99. re_compile_fastmap
  100. re_set_registers
  101. re_search
  102. re_search_2
  103. REG_MATCH_NULL_STRING_P
  104. IS_ACTIVE
  105. MATCHED_SOMETHING
  106. EVER_MATCHED_SOMETHING
  107. SET_REGS_MATCHED
  108. POINTER_TO_OFFSET
  109. REG_UNSET
  110. PREFETCH
  111. AT_STRINGS_BEG
  112. AT_STRINGS_END
  113. WORDCHAR_P
  114. AT_WORD_BOUNDARY
  115. FREE_VAR
  116. FREE_VARIABLES
  117. FREE_VARIABLES
  118. re_match
  119. re_match_2
  120. group_match_null_string_p
  121. alt_match_null_string_p
  122. common_op_match_null_string_p
  123. bcmp_translate
  124. re_compile_pattern
  125. re_comp
  126. re_exec
  127. regcomp
  128. regexec
  129. regerror
  130. regfree

   1 /* Extended regular expression matching and search library,
   2    version 0.12.
   3    (Implements POSIX draft P10003.2/D11.2, except for
   4    internationalization features.)
   5 
   6    Copyright (C) 1993 Free Software Foundation, Inc.
   7 
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 2, or (at your option)
  11    any later version.
  12 
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17 
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  21 
  22 /* AIX requires this to be the first thing in the file. */
  23 #if defined (_AIX) && !defined (REGEX_MALLOC)
  24   #pragma alloca
  25 #endif
  26 
  27 #define _GNU_SOURCE
  28 
  29 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
  30 #include <sys/types.h>
  31 
  32 #ifdef HAVE_CONFIG_H
  33 #include "config.h"
  34 #endif
  35 
  36 /* The `emacs' switch turns on certain matching commands
  37    that make sense only in Emacs. */
  38 #ifdef emacs
  39 
  40 #include "lisp.h"
  41 #include "buffer.h"
  42 #include "syntax.h"
  43 
  44 /* Emacs uses `NULL' as a predicate.  */
  45 #undef NULL
  46 
  47 #else  /* not emacs */
  48 
  49 /* We used to test for `BSTRING' here, but only GCC and Emacs define
  50    `BSTRING', as far as I know, and neither of them use this code.  */
  51 #if HAVE_STRING_H || STDC_HEADERS
  52 #include <string.h>
  53 #ifndef bcmp
  54 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
     /* [<][>][^][v][top][bottom][index][help] */
  55 #endif
  56 #ifndef bcopy
  57 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
     /* [<][>][^][v][top][bottom][index][help] */
  58 #endif
  59 #ifndef bzero
  60 #define bzero(s, n)     memset ((s), 0, (n))
     /* [<][>][^][v][top][bottom][index][help] */
  61 #endif
  62 #else
  63 #include <strings.h>
  64 #endif
  65 
  66 #ifdef STDC_HEADERS
  67 #include <stdlib.h>
  68 #else
  69 char *malloc ();
  70 char *realloc ();
  71 #endif
  72 
  73 
  74 /* Define the syntax stuff for \<, \>, etc.  */
  75 
  76 /* This must be nonzero for the wordchar and notwordchar pattern
  77    commands in re_match_2.  */
  78 #ifndef Sword 
  79 #define Sword 1
  80 #endif
  81 
  82 #ifdef SYNTAX_TABLE
  83 
  84 extern char *re_syntax_table;
  85 
  86 #else /* not SYNTAX_TABLE */
  87 
  88 /* How many characters in the character set.  */
  89 #define CHAR_SET_SIZE 256
  90 
  91 static char re_syntax_table[CHAR_SET_SIZE];
  92 
  93 static void
  94 init_syntax_once ()
     /* [<][>][^][v][top][bottom][index][help] */
  95 {
  96    register int c;
  97    static int done = 0;
  98 
  99    if (done)
 100      return;
 101 
 102    bzero (re_syntax_table, sizeof re_syntax_table);
 103 
 104    for (c = 'a'; c <= 'z'; c++)
 105      re_syntax_table[c] = Sword;
 106 
 107    for (c = 'A'; c <= 'Z'; c++)
 108      re_syntax_table[c] = Sword;
 109 
 110    for (c = '0'; c <= '9'; c++)
 111      re_syntax_table[c] = Sword;
 112 
 113    re_syntax_table['_'] = Sword;
 114 
 115    done = 1;
 116 }
 117 
 118 #endif /* not SYNTAX_TABLE */
 119 
 120 #define SYNTAX(c) re_syntax_table[c]
     /* [<][>][^][v][top][bottom][index][help] */
 121 
 122 #endif /* not emacs */
 123 
 124 /* Get the interface, including the syntax bits.  */
 125 #include "regex.h"
 126 
 127 /* isalpha etc. are used for the character classes.  */
 128 #include <ctype.h>
 129 
 130 #ifndef isascii
 131 #define isascii(c) 1
     /* [<][>][^][v][top][bottom][index][help] */
 132 #endif
 133 
 134 #ifdef isblank
 135 #define ISBLANK(c) (isascii (c) && isblank (c))
     /* [<][>][^][v][top][bottom][index][help] */
 136 #else
 137 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
     /* [<][>][^][v][top][bottom][index][help] */
 138 #endif
 139 #ifdef isgraph
 140 #define ISGRAPH(c) (isascii (c) && isgraph (c))
     /* [<][>][^][v][top][bottom][index][help] */
 141 #else
 142 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
     /* [<][>][^][v][top][bottom][index][help] */
 143 #endif
 144 
 145 #define ISPRINT(c) (isascii (c) && isprint (c))
     /* [<][>][^][v][top][bottom][index][help] */
 146 #define ISDIGIT(c) (isascii (c) && isdigit (c))
     /* [<][>][^][v][top][bottom][index][help] */
 147 #define ISALNUM(c) (isascii (c) && isalnum (c))
     /* [<][>][^][v][top][bottom][index][help] */
 148 #define ISALPHA(c) (isascii (c) && isalpha (c))
     /* [<][>][^][v][top][bottom][index][help] */
 149 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
     /* [<][>][^][v][top][bottom][index][help] */
 150 #define ISLOWER(c) (isascii (c) && islower (c))
     /* [<][>][^][v][top][bottom][index][help] */
 151 #define ISPUNCT(c) (isascii (c) && ispunct (c))
     /* [<][>][^][v][top][bottom][index][help] */
 152 #define ISSPACE(c) (isascii (c) && isspace (c))
     /* [<][>][^][v][top][bottom][index][help] */
 153 #define ISUPPER(c) (isascii (c) && isupper (c))
     /* [<][>][^][v][top][bottom][index][help] */
 154 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
     /* [<][>][^][v][top][bottom][index][help] */
 155 
 156 #ifndef NULL
 157 #define NULL 0
 158 #endif
 159 
 160 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
 161    since ours (we hope) works properly with all combinations of
 162    machines, compilers, `char' and `unsigned char' argument types.
 163    (Per Bothner suggested the basic approach.)  */
 164 #undef SIGN_EXTEND_CHAR
 165 #if __STDC__
 166 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
     /* [<][>][^][v][top][bottom][index][help] */
 167 #else  /* not __STDC__ */
 168 /* As in Harbison and Steele.  */
 169 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
     /* [<][>][^][v][top][bottom][index][help] */
 170 #endif
 171 
 172 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
 173    use `alloca' instead of `malloc'.  This is because using malloc in
 174    re_search* or re_match* could cause memory leaks when C-g is used in
 175    Emacs; also, malloc is slower and causes storage fragmentation.  On
 176    the other hand, malloc is more portable, and easier to debug.  
 177    
 178    Because we sometimes use alloca, some routines have to be macros,
 179    not functions -- `alloca'-allocated space disappears at the end of the
 180    function it is called in.  */
 181 
 182 #ifdef REGEX_MALLOC
 183 
 184 #define REGEX_ALLOCATE malloc
 185 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
     /* [<][>][^][v][top][bottom][index][help] */
 186 
 187 #else /* not REGEX_MALLOC  */
 188 
 189 /* Emacs already defines alloca, sometimes.  */
 190 #ifndef alloca
 191 
 192 /* Make alloca work the best possible way.  */
 193 #ifdef __GNUC__
 194 #define alloca __builtin_alloca
 195 #else /* not __GNUC__ */
 196 #if HAVE_ALLOCA_H
 197 #include <alloca.h>
 198 #else /* not __GNUC__ or HAVE_ALLOCA_H */
 199 #ifndef _AIX /* Already did AIX, up at the top.  */
 200 char *alloca ();
 201 #endif /* not _AIX */
 202 #endif /* not HAVE_ALLOCA_H */ 
 203 #endif /* not __GNUC__ */
 204 
 205 #endif /* not alloca */
 206 
 207 #define REGEX_ALLOCATE alloca
 208 
 209 /* Assumes a `char *destination' variable.  */
 210 #define REGEX_REALLOCATE(source, osize, nsize)                          \
     /* [<][>][^][v][top][bottom][index][help] */
 211   (destination = (char *) alloca (nsize),                               \
 212    bcopy (source, destination, osize),                                  \
 213    destination)
 214 
 215 #endif /* not REGEX_MALLOC */
 216 
 217 
 218 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
 219    `string1' or just past its end.  This works if PTR is NULL, which is
 220    a good thing.  */
 221 #define FIRST_STRING_P(ptr)                                     \
     /* [<][>][^][v][top][bottom][index][help] */
 222   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 223 
 224 /* (Re)Allocate N items of type T using malloc, or fail.  */
 225 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
     /* [<][>][^][v][top][bottom][index][help] */
 226 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
     /* [<][>][^][v][top][bottom][index][help] */
 227 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
     /* [<][>][^][v][top][bottom][index][help] */
 228 
 229 #define BYTEWIDTH 8 /* In bits.  */
 230 
 231 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
     /* [<][>][^][v][top][bottom][index][help] */
 232 
 233 #define MAX(a, b) ((a) > (b) ? (a) : (b))
     /* [<][>][^][v][top][bottom][index][help] */
 234 #define MIN(a, b) ((a) < (b) ? (a) : (b))
     /* [<][>][^][v][top][bottom][index][help] */
 235 
 236 typedef char boolean;
 237 #define false 0
 238 #define true 1
 239 
 240 /* These are the command codes that appear in compiled regular
 241    expressions.  Some opcodes are followed by argument bytes.  A
 242    command code can specify any interpretation whatsoever for its
 243    arguments.  Zero bytes may appear in the compiled regular expression.
 244 
 245    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
 246    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
 247    `exactn' we use here must also be 1.  */
 248 
 249 typedef enum
 250 {
 251   no_op = 0,
 252 
 253         /* Followed by one byte giving n, then by n literal bytes.  */
 254   exactn = 1,
 255 
 256         /* Matches any (more or less) character.  */
 257   anychar,
 258 
 259         /* Matches any one char belonging to specified set.  First
 260            following byte is number of bitmap bytes.  Then come bytes
 261            for a bitmap saying which chars are in.  Bits in each byte
 262            are ordered low-bit-first.  A character is in the set if its
 263            bit is 1.  A character too large to have a bit in the map is
 264            automatically not in the set.  */
 265   charset,
 266 
 267         /* Same parameters as charset, but match any character that is
 268            not one of those specified.  */
 269   charset_not,
 270 
 271         /* Start remembering the text that is matched, for storing in a
 272            register.  Followed by one byte with the register number, in
 273            the range 0 to one less than the pattern buffer's re_nsub
 274            field.  Then followed by one byte with the number of groups
 275            inner to this one.  (This last has to be part of the
 276            start_memory only because we need it in the on_failure_jump
 277            of re_match_2.)  */
 278   start_memory,
 279 
 280         /* Stop remembering the text that is matched and store it in a
 281            memory register.  Followed by one byte with the register
 282            number, in the range 0 to one less than `re_nsub' in the
 283            pattern buffer, and one byte with the number of inner groups,
 284            just like `start_memory'.  (We need the number of inner
 285            groups here because we don't have any easy way of finding the
 286            corresponding start_memory when we're at a stop_memory.)  */
 287   stop_memory,
 288 
 289         /* Match a duplicate of something remembered. Followed by one
 290            byte containing the register number.  */
 291   duplicate,
 292 
 293         /* Fail unless at beginning of line.  */
 294   begline,
 295 
 296         /* Fail unless at end of line.  */
 297   endline,
 298 
 299         /* Succeeds if at beginning of buffer (if emacs) or at beginning
 300            of string to be matched (if not).  */
 301   begbuf,
 302 
 303         /* Analogously, for end of buffer/string.  */
 304   endbuf,
 305  
 306         /* Followed by two byte relative address to which to jump.  */
 307   jump, 
 308 
 309         /* Same as jump, but marks the end of an alternative.  */
 310   jump_past_alt,
 311 
 312         /* Followed by two-byte relative address of place to resume at
 313            in case of failure.  */
 314   on_failure_jump,
 315         
 316         /* Like on_failure_jump, but pushes a placeholder instead of the
 317            current string position when executed.  */
 318   on_failure_keep_string_jump,
 319   
 320         /* Throw away latest failure point and then jump to following
 321            two-byte relative address.  */
 322   pop_failure_jump,
 323 
 324         /* Change to pop_failure_jump if know won't have to backtrack to
 325            match; otherwise change to jump.  This is used to jump
 326            back to the beginning of a repeat.  If what follows this jump
 327            clearly won't match what the repeat does, such that we can be
 328            sure that there is no use backtracking out of repetitions
 329            already matched, then we change it to a pop_failure_jump.
 330            Followed by two-byte address.  */
 331   maybe_pop_jump,
 332 
 333         /* Jump to following two-byte address, and push a dummy failure
 334            point. This failure point will be thrown away if an attempt
 335            is made to use it for a failure.  A `+' construct makes this
 336            before the first repeat.  Also used as an intermediary kind
 337            of jump when compiling an alternative.  */
 338   dummy_failure_jump,
 339 
 340         /* Push a dummy failure point and continue.  Used at the end of
 341            alternatives.  */
 342   push_dummy_failure,
 343 
 344         /* Followed by two-byte relative address and two-byte number n.
 345            After matching N times, jump to the address upon failure.  */
 346   succeed_n,
 347 
 348         /* Followed by two-byte relative address, and two-byte number n.
 349            Jump to the address N times, then fail.  */
 350   jump_n,
 351 
 352         /* Set the following two-byte relative address to the
 353            subsequent two-byte number.  The address *includes* the two
 354            bytes of number.  */
 355   set_number_at,
 356 
 357   wordchar,     /* Matches any word-constituent character.  */
 358   notwordchar,  /* Matches any char that is not a word-constituent.  */
 359 
 360   wordbeg,      /* Succeeds if at word beginning.  */
 361   wordend,      /* Succeeds if at word end.  */
 362 
 363   wordbound,    /* Succeeds if at a word boundary.  */
 364   notwordbound  /* Succeeds if not at a word boundary.  */
 365 
 366 #ifdef emacs
 367   ,before_dot,  /* Succeeds if before point.  */
 368   at_dot,       /* Succeeds if at point.  */
 369   after_dot,    /* Succeeds if after point.  */
 370 
 371         /* Matches any character whose syntax is specified.  Followed by
 372            a byte which contains a syntax code, e.g., Sword.  */
 373   syntaxspec,
 374 
 375         /* Matches any character whose syntax is not that specified.  */
 376   notsyntaxspec
 377 #endif /* emacs */
 378 } re_opcode_t;
 379 
 380 /* Common operations on the compiled pattern.  */
 381 
 382 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 383 
 384 #define STORE_NUMBER(destination, number)                               \
     /* [<][>][^][v][top][bottom][index][help] */
 385   do {                                                                  \
 386     (destination)[0] = (number) & 0377;                                 \
 387     (destination)[1] = (number) >> 8;                                   \
 388   } while (0)
 389 
 390 /* Same as STORE_NUMBER, except increment DESTINATION to
 391    the byte after where the number is stored.  Therefore, DESTINATION
 392    must be an lvalue.  */
 393 
 394 #define STORE_NUMBER_AND_INCR(destination, number)                      \
     /* [<][>][^][v][top][bottom][index][help] */
 395   do {                                                                  \
 396     STORE_NUMBER (destination, number);                                 \
 397     (destination) += 2;                                                 \
 398   } while (0)
 399 
 400 /* Put into DESTINATION a number stored in two contiguous bytes starting
 401    at SOURCE.  */
 402 
 403 #define EXTRACT_NUMBER(destination, source)                             \
     /* [<][>][^][v][top][bottom][index][help] */
 404   do {                                                                  \
 405     (destination) = *(source) & 0377;                                   \
 406     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
 407   } while (0)
 408 
 409 #ifdef DEBUG
 410 static void
 411 extract_number (dest, source)
     /* [<][>][^][v][top][bottom][index][help] */
 412     int *dest;
 413     unsigned char *source;
 414 {
 415   int temp = SIGN_EXTEND_CHAR (*(source + 1)); 
 416   *dest = *source & 0377;
 417   *dest += temp << 8;
 418 }
 419 
 420 #ifndef EXTRACT_MACROS /* To debug the macros.  */
 421 #undef EXTRACT_NUMBER
 422 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
     /* [<][>][^][v][top][bottom][index][help] */
 423 #endif /* not EXTRACT_MACROS */
 424 
 425 #endif /* DEBUG */
 426 
 427 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
 428    SOURCE must be an lvalue.  */
 429 
 430 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
     /* [<][>][^][v][top][bottom][index][help] */
 431   do {                                                                  \
 432     EXTRACT_NUMBER (destination, source);                               \
 433     (source) += 2;                                                      \
 434   } while (0)
 435 
 436 #ifdef DEBUG
 437 static void
 438 extract_number_and_incr (destination, source)
     /* [<][>][^][v][top][bottom][index][help] */
 439     int *destination;
 440     unsigned char **source;
 441 { 
 442   extract_number (destination, *source);
 443   *source += 2;
 444 }
 445 
 446 #ifndef EXTRACT_MACROS
 447 #undef EXTRACT_NUMBER_AND_INCR
 448 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
     /* [<][>][^][v][top][bottom][index][help] */
 449   extract_number_and_incr (&dest, &src)
 450 #endif /* not EXTRACT_MACROS */
 451 
 452 #endif /* DEBUG */
 453 
 454 /* If DEBUG is defined, Regex prints many voluminous messages about what
 455    it is doing (if the variable `debug' is nonzero).  If linked with the
 456    main program in `iregex.c', you can enter patterns and strings
 457    interactively.  And if linked with the main program in `main.c' and
 458    the other test files, you can run the already-written tests.  */
 459 
 460 #ifdef DEBUG
 461 
 462 /* We use standard I/O for debugging.  */
 463 #include <stdio.h>
 464 
 465 /* It is useful to test things that ``must'' be true when debugging.  */
 466 #include <assert.h>
 467 
 468 static int debug = 0;
 469 
 470 #define DEBUG_STATEMENT(e) e
     /* [<][>][^][v][top][bottom][index][help] */
 471 #define DEBUG_PRINT1(x) if (debug) printf (x)
     /* [<][>][^][v][top][bottom][index][help] */
 472 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
     /* [<][>][^][v][top][bottom][index][help] */
 473 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
     /* [<][>][^][v][top][bottom][index][help] */
 474 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
     /* [<][>][^][v][top][bottom][index][help] */
 475 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
     /* [<][>][^][v][top][bottom][index][help] */
 476   if (debug) print_partial_compiled_pattern (s, e)
 477 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
     /* [<][>][^][v][top][bottom][index][help] */
 478   if (debug) print_double_string (w, s1, sz1, s2, sz2)
 479 
 480 // Commented out by wlee@isi.edu
 481 //extern void printchar ();
 482 
 483 // Instead, put the following in
 484 static void
 485 printchar (c)
     /* [<][>][^][v][top][bottom][index][help] */
 486      char c;
 487 {
 488   if (c < 040 || c >= 0177)
 489     {
 490       putchar ('\\');
 491       putchar (((c >> 6) & 3) + '0');
 492       putchar (((c >> 3) & 7) + '0');
 493       putchar ((c & 7) + '0');
 494     }
 495   else
 496     putchar (c);
 497 }
 498 
 499 
 500 /* Print the fastmap in human-readable form.  */
 501 
 502 void
 503 print_fastmap (fastmap)
     /* [<][>][^][v][top][bottom][index][help] */
 504     char *fastmap;
 505 {
 506   unsigned was_a_range = 0;
 507   unsigned i = 0;  
 508   
 509   while (i < (1 << BYTEWIDTH))
 510     {
 511       if (fastmap[i++])
 512         {
 513           was_a_range = 0;
 514           printchar (i - 1);
 515           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
 516             {
 517               was_a_range = 1;
 518               i++;
 519             }
 520           if (was_a_range)
 521             {
 522               printf ("-");
 523               printchar (i - 1);
 524             }
 525         }
 526     }
 527   putchar ('\n'); 
 528 }
 529 
 530 
 531 /* Print a compiled pattern string in human-readable form, starting at
 532    the START pointer into it and ending just before the pointer END.  */
 533 
 534 void
 535 print_partial_compiled_pattern (start, end)
     /* [<][>][^][v][top][bottom][index][help] */
 536     unsigned char *start;
 537     unsigned char *end;
 538 {
 539   int mcnt, mcnt2;
 540   unsigned char *p = start;
 541   unsigned char *pend = end;
 542 
 543   if (start == NULL)
 544     {
 545       printf ("(null)\n");
 546       return;
 547     }
 548     
 549   /* Loop over pattern commands.  */
 550   while (p < pend)
 551     {
 552       switch ((re_opcode_t) *p++)
 553         {
 554         case no_op:
 555           printf ("/no_op");
 556           break;
 557 
 558         case exactn:
 559           mcnt = *p++;
 560           printf ("/exactn/%d", mcnt);
 561           do
 562             {
 563               putchar ('/');
 564               printchar (*p++);
 565             }
 566           while (--mcnt);
 567           break;
 568 
 569         case start_memory:
 570           mcnt = *p++;
 571           printf ("/start_memory/%d/%d", mcnt, *p++);
 572           break;
 573 
 574         case stop_memory:
 575           mcnt = *p++;
 576           printf ("/stop_memory/%d/%d", mcnt, *p++);
 577           break;
 578 
 579         case duplicate:
 580           printf ("/duplicate/%d", *p++);
 581           break;
 582 
 583         case anychar:
 584           printf ("/anychar");
 585           break;
 586 
 587         case charset:
 588         case charset_not:
 589           {
 590             register int c;
 591 
 592             printf ("/charset%s",
 593                     (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
 594             
 595             assert (p + *p < pend);
 596 
 597             for (c = 0; c < *p; c++)
 598               {
 599                 unsigned bit;
 600                 unsigned char map_byte = p[1 + c];
 601                 
 602                 putchar ('/');
 603 
 604                 for (bit = 0; bit < BYTEWIDTH; bit++)
 605                   if (map_byte & (1 << bit))
 606                     printchar (c * BYTEWIDTH + bit);
 607               }
 608             p += 1 + *p;
 609             break;
 610           }
 611 
 612         case begline:
 613           printf ("/begline");
 614           break;
 615 
 616         case endline:
 617           printf ("/endline");
 618           break;
 619 
 620         case on_failure_jump:
 621           extract_number_and_incr (&mcnt, &p);
 622           printf ("/on_failure_jump/0/%d", mcnt);
 623           break;
 624 
 625         case on_failure_keep_string_jump:
 626           extract_number_and_incr (&mcnt, &p);
 627           printf ("/on_failure_keep_string_jump/0/%d", mcnt);
 628           break;
 629 
 630         case dummy_failure_jump:
 631           extract_number_and_incr (&mcnt, &p);
 632           printf ("/dummy_failure_jump/0/%d", mcnt);
 633           break;
 634 
 635         case push_dummy_failure:
 636           printf ("/push_dummy_failure");
 637           break;
 638           
 639         case maybe_pop_jump:
 640           extract_number_and_incr (&mcnt, &p);
 641           printf ("/maybe_pop_jump/0/%d", mcnt);
 642           break;
 643 
 644         case pop_failure_jump:
 645           extract_number_and_incr (&mcnt, &p);
 646           printf ("/pop_failure_jump/0/%d", mcnt);
 647           break;          
 648           
 649         case jump_past_alt:
 650           extract_number_and_incr (&mcnt, &p);
 651           printf ("/jump_past_alt/0/%d", mcnt);
 652           break;          
 653           
 654         case jump:
 655           extract_number_and_incr (&mcnt, &p);
 656           printf ("/jump/0/%d", mcnt);
 657           break;
 658 
 659         case succeed_n: 
 660           extract_number_and_incr (&mcnt, &p);
 661           extract_number_and_incr (&mcnt2, &p);
 662           printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
 663           break;
 664         
 665         case jump_n: 
 666           extract_number_and_incr (&mcnt, &p);
 667           extract_number_and_incr (&mcnt2, &p);
 668           printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
 669           break;
 670         
 671         case set_number_at: 
 672           extract_number_and_incr (&mcnt, &p);
 673           extract_number_and_incr (&mcnt2, &p);
 674           printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
 675           break;
 676         
 677         case wordbound:
 678           printf ("/wordbound");
 679           break;
 680 
 681         case notwordbound:
 682           printf ("/notwordbound");
 683           break;
 684 
 685         case wordbeg:
 686           printf ("/wordbeg");
 687           break;
 688           
 689         case wordend:
 690           printf ("/wordend");
 691           
 692 #ifdef emacs
 693         case before_dot:
 694           printf ("/before_dot");
 695           break;
 696 
 697         case at_dot:
 698           printf ("/at_dot");
 699           break;
 700 
 701         case after_dot:
 702           printf ("/after_dot");
 703           break;
 704 
 705         case syntaxspec:
 706           printf ("/syntaxspec");
 707           mcnt = *p++;
 708           printf ("/%d", mcnt);
 709           break;
 710           
 711         case notsyntaxspec:
 712           printf ("/notsyntaxspec");
 713           mcnt = *p++;
 714           printf ("/%d", mcnt);
 715           break;
 716 #endif /* emacs */
 717 
 718         case wordchar:
 719           printf ("/wordchar");
 720           break;
 721           
 722         case notwordchar:
 723           printf ("/notwordchar");
 724           break;
 725 
 726         case begbuf:
 727           printf ("/begbuf");
 728           break;
 729 
 730         case endbuf:
 731           printf ("/endbuf");
 732           break;
 733 
 734         default:
 735           printf ("?%d", *(p-1));
 736         }
 737     }
 738   printf ("/\n");
 739 }
 740 
 741 
 742 void
 743 print_compiled_pattern (bufp)
     /* [<][>][^][v][top][bottom][index][help] */
 744     struct re_pattern_buffer *bufp;
 745 {
 746   unsigned char *buffer = bufp->buffer;
 747 
 748   print_partial_compiled_pattern (buffer, buffer + bufp->used);
 749   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
 750 
 751   if (bufp->fastmap_accurate && bufp->fastmap)
 752     {
 753       printf ("fastmap: ");
 754       print_fastmap (bufp->fastmap);
 755     }
 756 
 757   printf ("re_nsub: %d\t", bufp->re_nsub);
 758   printf ("regs_alloc: %d\t", bufp->regs_allocated);
 759   printf ("can_be_null: %d\t", bufp->can_be_null);
 760   printf ("newline_anchor: %d\n", bufp->newline_anchor);
 761   printf ("no_sub: %d\t", bufp->no_sub);
 762   printf ("not_bol: %d\t", bufp->not_bol);
 763   printf ("not_eol: %d\t", bufp->not_eol);
 764   printf ("syntax: %d\n", bufp->syntax);
 765   /* Perhaps we should print the translate table?  */
 766 }
 767 
 768 
 769 void
 770 print_double_string (where, string1, size1, string2, size2)
     /* [<][>][^][v][top][bottom][index][help] */
 771     const char *where;
 772     const char *string1;
 773     const char *string2;
 774     int size1;
 775     int size2;
 776 {
 777   unsigned this_char;
 778   
 779   if (where == NULL)
 780     printf ("(null)");
 781   else
 782     {
 783       if (FIRST_STRING_P (where))
 784         {
 785           for (this_char = where - string1; this_char < size1; this_char++)
 786             printchar (string1[this_char]);
 787 
 788           where = string2;    
 789         }
 790 
 791       for (this_char = where - string2; this_char < size2; this_char++)
 792         printchar (string2[this_char]);
 793     }
 794 }
 795 
 796 #else /* not DEBUG */
 797 
 798 #undef assert
 799 #define assert(e)
     /* [<][>][^][v][top][bottom][index][help] */
 800 
 801 #define DEBUG_STATEMENT(e)
     /* [<][>][^][v][top][bottom][index][help] */
 802 #define DEBUG_PRINT1(x)
     /* [<][>][^][v][top][bottom][index][help] */
 803 #define DEBUG_PRINT2(x1, x2)
     /* [<][>][^][v][top][bottom][index][help] */
 804 #define DEBUG_PRINT3(x1, x2, x3)
     /* [<][>][^][v][top][bottom][index][help] */
 805 #define DEBUG_PRINT4(x1, x2, x3, x4)
     /* [<][>][^][v][top][bottom][index][help] */
 806 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
     /* [<][>][^][v][top][bottom][index][help] */
 807 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
     /* [<][>][^][v][top][bottom][index][help] */
 808 
 809 #endif /* not DEBUG */
 810 
 811 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
 812    also be assigned to arbitrarily: each pattern buffer stores its own
 813    syntax, so it can be changed between regex compilations.  */
 814 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
 815 
 816 
 817 /* Specify the precise syntax of regexps for compilation.  This provides
 818    for compatibility for various utilities which historically have
 819    different, incompatible syntaxes.
 820 
 821    The argument SYNTAX is a bit mask comprised of the various bits
 822    defined in regex.h.  We return the old syntax.  */
 823 
 824 reg_syntax_t
 825 re_set_syntax (syntax)
     /* [<][>][^][v][top][bottom][index][help] */
 826     reg_syntax_t syntax;
 827 {
 828   reg_syntax_t ret = re_syntax_options;
 829   
 830   re_syntax_options = syntax;
 831   return ret;
 832 }
 833 
 834 /* This table gives an error message for each of the error codes listed
 835    in regex.h.  Obviously the order here has to be same as there.  */
 836 
 837 static const char *re_error_msg[] =
 838   { NULL,                                       /* REG_NOERROR */
 839     "No match",                                 /* REG_NOMATCH */
 840     "Invalid regular expression",               /* REG_BADPAT */
 841     "Invalid collation character",              /* REG_ECOLLATE */
 842     "Invalid character class name",             /* REG_ECTYPE */
 843     "Trailing backslash",                       /* REG_EESCAPE */
 844     "Invalid back reference",                   /* REG_ESUBREG */
 845     "Unmatched [ or [^",                        /* REG_EBRACK */
 846     "Unmatched ( or \\(",                       /* REG_EPAREN */
 847     "Unmatched \\{",                            /* REG_EBRACE */
 848     "Invalid content of \\{\\}",                /* REG_BADBR */
 849     "Invalid range end",                        /* REG_ERANGE */
 850     "Memory exhausted",                         /* REG_ESPACE */
 851     "Invalid preceding regular expression",     /* REG_BADRPT */
 852     "Premature end of regular expression",      /* REG_EEND */
 853     "Regular expression too big",               /* REG_ESIZE */
 854     "Unmatched ) or \\)",                       /* REG_ERPAREN */
 855   };
 856 
 857 /* Subroutine declarations and macros for regex_compile.  */
 858 
 859 static void store_op1 (), store_op2 ();
 860 static void insert_op1 (), insert_op2 ();
 861 static boolean at_begline_loc_p (), at_endline_loc_p ();
 862 static boolean group_in_compile_stack ();
 863 static reg_errcode_t compile_range ();
 864 
 865 /* Fetch the next character in the uncompiled pattern---translating it 
 866    if necessary.  Also cast from a signed character in the constant
 867    string passed to us by the user to an unsigned char that we can use
 868    as an array index (in, e.g., `translate').  */
 869 #define PATFETCH(c)                                                     \
     /* [<][>][^][v][top][bottom][index][help] */
 870   do {if (p == pend) return REG_EEND;                                   \
 871     c = (unsigned char) *p++;                                           \
 872     if (translate) c = translate[c];                                    \
 873   } while (0)
 874 
 875 /* Fetch the next character in the uncompiled pattern, with no
 876    translation.  */
 877 #define PATFETCH_RAW(c)                                                 \
     /* [<][>][^][v][top][bottom][index][help] */
 878   do {if (p == pend) return REG_EEND;                                   \
 879     c = (unsigned char) *p++;                                           \
 880   } while (0)
 881 
 882 /* Go backwards one character in the pattern.  */
 883 #define PATUNFETCH p--
 884 
 885 
 886 /* If `translate' is non-null, return translate[D], else just D.  We
 887    cast the subscript to translate because some data is declared as
 888    `char *', to avoid warnings when a string constant is passed.  But
 889    when we use a character as a subscript we must make it unsigned.  */
 890 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
     /* [<][>][^][v][top][bottom][index][help] */
 891 
 892 
 893 /* Macros for outputting the compiled pattern into `buffer'.  */
 894 
 895 /* If the buffer isn't allocated when it comes in, use this.  */
 896 #define INIT_BUF_SIZE  32
 897 
 898 /* Make sure we have at least N more bytes of space in buffer.  */
 899 #define GET_BUFFER_SPACE(n)                                             \
     /* [<][>][^][v][top][bottom][index][help] */
 900     while (b - bufp->buffer + (n) > bufp->allocated)                    \
 901       EXTEND_BUFFER ()
 902 
 903 /* Make sure we have one more byte of buffer space and then add C to it.  */
 904 #define BUF_PUSH(c)                                                     \
     /* [<][>][^][v][top][bottom][index][help] */
 905   do {                                                                  \
 906     GET_BUFFER_SPACE (1);                                               \
 907     *b++ = (unsigned char) (c);                                         \
 908   } while (0)
 909 
 910 
 911 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
 912 #define BUF_PUSH_2(c1, c2)                                              \
     /* [<][>][^][v][top][bottom][index][help] */
 913   do {                                                                  \
 914     GET_BUFFER_SPACE (2);                                               \
 915     *b++ = (unsigned char) (c1);                                        \
 916     *b++ = (unsigned char) (c2);                                        \
 917   } while (0)
 918 
 919 
 920 /* As with BUF_PUSH_2, except for three bytes.  */
 921 #define BUF_PUSH_3(c1, c2, c3)                                          \
     /* [<][>][^][v][top][bottom][index][help] */
 922   do {                                                                  \
 923     GET_BUFFER_SPACE (3);                                               \
 924     *b++ = (unsigned char) (c1);                                        \
 925     *b++ = (unsigned char) (c2);                                        \
 926     *b++ = (unsigned char) (c3);                                        \
 927   } while (0)
 928 
 929 
 930 /* Store a jump with opcode OP at LOC to location TO.  We store a
 931    relative address offset by the three bytes the jump itself occupies.  */
 932 #define STORE_JUMP(op, loc, to) \
     /* [<][>][^][v][top][bottom][index][help] */
 933   store_op1 (op, loc, (to) - (loc) - 3)
 934 
 935 /* Likewise, for a two-argument jump.  */
 936 #define STORE_JUMP2(op, loc, to, arg) \
     /* [<][>][^][v][top][bottom][index][help] */
 937   store_op2 (op, loc, (to) - (loc) - 3, arg)
 938 
 939 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
 940 #define INSERT_JUMP(op, loc, to) \
     /* [<][>][^][v][top][bottom][index][help] */
 941   insert_op1 (op, loc, (to) - (loc) - 3, b)
 942 
 943 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
 944 #define INSERT_JUMP2(op, loc, to, arg) \
     /* [<][>][^][v][top][bottom][index][help] */
 945   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
 946 
 947 
 948 /* This is not an arbitrary limit: the arguments which represent offsets
 949    into the pattern are two bytes long.  So if 2^16 bytes turns out to
 950    be too small, many things would have to change.  */
 951 #define MAX_BUF_SIZE (1L << 16)
 952 
 953 
 954 /* Extend the buffer by twice its current size via realloc and
 955    reset the pointers that pointed into the old block to point to the
 956    correct places in the new one.  If extending the buffer results in it
 957    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
 958 #define EXTEND_BUFFER()                                                 \
     /* [<][>][^][v][top][bottom][index][help] */
 959   do {                                                                  \
 960     unsigned char *old_buffer = bufp->buffer;                           \
 961     if (bufp->allocated == MAX_BUF_SIZE)                                \
 962       return REG_ESIZE;                                                 \
 963     bufp->allocated <<= 1;                                              \
 964     if (bufp->allocated > MAX_BUF_SIZE)                                 \
 965       bufp->allocated = MAX_BUF_SIZE;                                   \
 966     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
 967     if (bufp->buffer == NULL)                                           \
 968       return REG_ESPACE;                                                \
 969     /* If the buffer moved, move all the pointers into it.  */          \
 970     if (old_buffer != bufp->buffer)                                     \
 971       {                                                                 \
 972         b = (b - old_buffer) + bufp->buffer;                            \
 973         begalt = (begalt - old_buffer) + bufp->buffer;                  \
 974         if (fixup_alt_jump)                                             \
 975           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
 976         if (laststart)                                                  \
 977           laststart = (laststart - old_buffer) + bufp->buffer;          \
 978         if (pending_exact)                                              \
 979           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
 980       }                                                                 \
 981   } while (0)
 982 
 983 
 984 /* Since we have one byte reserved for the register number argument to
 985    {start,stop}_memory, the maximum number of groups we can report
 986    things about is what fits in that byte.  */
 987 #define MAX_REGNUM 255
 988 
 989 /* But patterns can have more than `MAX_REGNUM' registers.  We just
 990    ignore the excess.  */
 991 typedef unsigned regnum_t;
 992 
 993 
 994 /* Macros for the compile stack.  */
 995 
 996 /* Since offsets can go either forwards or backwards, this type needs to
 997    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
 998 typedef int pattern_offset_t;
 999 
1000 typedef struct
1001 {
1002   pattern_offset_t begalt_offset;
1003   pattern_offset_t fixup_alt_jump;
1004   pattern_offset_t inner_group_offset;
1005   pattern_offset_t laststart_offset;  
1006   regnum_t regnum;
1007 } compile_stack_elt_t;
1008 
1009 
1010 typedef struct
1011 {
1012   compile_stack_elt_t *stack;
1013   unsigned size;
1014   unsigned avail;                       /* Offset of next open position.  */
1015 } compile_stack_type;
1016 
1017 
1018 #define INIT_COMPILE_STACK_SIZE 32
1019 
1020 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1021 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1022 
1023 /* The next available element.  */
1024 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1025 
1026 
1027 /* Set the bit for character C in a list.  */
1028 #define SET_LIST_BIT(c)                               \
     /* [<][>][^][v][top][bottom][index][help] */
1029   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1030    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1031 
1032 
1033 /* Get the next unsigned number in the uncompiled pattern.  */
1034 #define GET_UNSIGNED_NUMBER(num)                                        \
     /* [<][>][^][v][top][bottom][index][help] */
1035   { if (p != pend)                                                      \
1036      {                                                                  \
1037        PATFETCH (c);                                                    \
1038        while (ISDIGIT (c))                                              \
1039          {                                                              \
1040            if (num < 0)                                                 \
1041               num = 0;                                                  \
1042            num = num * 10 + c - '0';                                    \
1043            if (p == pend)                                               \
1044               break;                                                    \
1045            PATFETCH (c);                                                \
1046          }                                                              \
1047        }                                                                \
1048     }           
1049 
1050 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1051 
1052 #define IS_CHAR_CLASS(string)                                           \
     /* [<][>][^][v][top][bottom][index][help] */
1053    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1054     || STREQ (string, "lower") || STREQ (string, "digit")               \
1055     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1056     || STREQ (string, "space") || STREQ (string, "print")               \
1057     || STREQ (string, "punct") || STREQ (string, "graph")               \
1058     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1059 
1060 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1061    Returns one of error codes defined in `regex.h', or zero for success.
1062 
1063    Assumes the `allocated' (and perhaps `buffer') and `translate'
1064    fields are set in BUFP on entry.
1065 
1066    If it succeeds, results are put in BUFP (if it returns an error, the
1067    contents of BUFP are undefined):
1068      `buffer' is the compiled pattern;
1069      `syntax' is set to SYNTAX;
1070      `used' is set to the length of the compiled pattern;
1071      `fastmap_accurate' is zero;
1072      `re_nsub' is the number of subexpressions in PATTERN;
1073      `not_bol' and `not_eol' are zero;
1074    
1075    The `fastmap' and `newline_anchor' fields are neither
1076    examined nor set.  */
1077 
1078 static reg_errcode_t
1079 regex_compile (pattern, size, syntax, bufp)
     /* [<][>][^][v][top][bottom][index][help] */
1080      const char *pattern;
1081      int size;
1082      reg_syntax_t syntax;
1083      struct re_pattern_buffer *bufp;
1084 {
1085   /* We fetch characters from PATTERN here.  Even though PATTERN is
1086      `char *' (i.e., signed), we declare these variables as unsigned, so
1087      they can be reliably used as array indices.  */
1088   register unsigned char c, c1;
1089   
1090   /* A random tempory spot in PATTERN.  */
1091   const char *p1;
1092 
1093   /* Points to the end of the buffer, where we should append.  */
1094   register unsigned char *b;
1095   
1096   /* Keeps track of unclosed groups.  */
1097   compile_stack_type compile_stack;
1098 
1099   /* Points to the current (ending) position in the pattern.  */
1100   const char *p = pattern;
1101   const char *pend = pattern + size;
1102   
1103   /* How to translate the characters in the pattern.  */
1104   char *translate = bufp->translate;
1105 
1106   /* Address of the count-byte of the most recently inserted `exactn'
1107      command.  This makes it possible to tell if a new exact-match
1108      character can be added to that command or if the character requires
1109      a new `exactn' command.  */
1110   unsigned char *pending_exact = 0;
1111 
1112   /* Address of start of the most recently finished expression.
1113      This tells, e.g., postfix * where to find the start of its
1114      operand.  Reset at the beginning of groups and alternatives.  */
1115   unsigned char *laststart = 0;
1116 
1117   /* Address of beginning of regexp, or inside of last group.  */
1118   unsigned char *begalt;
1119 
1120   /* Place in the uncompiled pattern (i.e., the {) to
1121      which to go back if the interval is invalid.  */
1122   const char *beg_interval;
1123                 
1124   /* Address of the place where a forward jump should go to the end of
1125      the containing expression.  Each alternative of an `or' -- except the
1126      last -- ends with a forward jump of this sort.  */
1127   unsigned char *fixup_alt_jump = 0;
1128 
1129   /* Counts open-groups as they are encountered.  Remembered for the
1130      matching close-group on the compile stack, so the same register
1131      number is put in the stop_memory as the start_memory.  */
1132   regnum_t regnum = 0;
1133 
1134 #ifdef DEBUG
1135   DEBUG_PRINT1 ("\nCompiling pattern: ");
1136   if (debug)
1137     {
1138       unsigned debug_count;
1139       
1140       for (debug_count = 0; debug_count < size; debug_count++)
1141         printchar (pattern[debug_count]);
1142       putchar ('\n');
1143     }
1144 #endif /* DEBUG */
1145 
1146   /* Initialize the compile stack.  */
1147   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1148   if (compile_stack.stack == NULL)
1149     return REG_ESPACE;
1150 
1151   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1152   compile_stack.avail = 0;
1153 
1154   /* Initialize the pattern buffer.  */
1155   bufp->syntax = syntax;
1156   bufp->fastmap_accurate = 0;
1157   bufp->not_bol = bufp->not_eol = 0;
1158 
1159   /* Set `used' to zero, so that if we return an error, the pattern
1160      printer (for debugging) will think there's no pattern.  We reset it
1161      at the end.  */
1162   bufp->used = 0;
1163   
1164   /* Always count groups, whether or not bufp->no_sub is set.  */
1165   bufp->re_nsub = 0;                            
1166 
1167 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1168   /* Initialize the syntax table.  */
1169    init_syntax_once ();
1170 #endif
1171 
1172   if (bufp->allocated == 0)
1173     {
1174       if (bufp->buffer)
1175         { /* If zero allocated, but buffer is non-null, try to realloc
1176              enough space.  This loses if buffer's address is bogus, but
1177              that is the user's responsibility.  */
1178           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1179         }
1180       else
1181         { /* Caller did not allocate a buffer.  Do it for them.  */
1182           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1183         }
1184       if (!bufp->buffer) return REG_ESPACE;
1185 
1186       bufp->allocated = INIT_BUF_SIZE;
1187     }
1188 
1189   begalt = b = bufp->buffer;
1190 
1191   /* Loop through the uncompiled pattern until we're at the end.  */
1192   while (p != pend)
1193     {
1194       PATFETCH (c);
1195 
1196       switch (c)
1197         {
1198         case '^':
1199           {
1200             if (   /* If at start of pattern, it's an operator.  */
1201                    p == pattern + 1
1202                    /* If context independent, it's an operator.  */
1203                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1204                    /* Otherwise, depends on what's come before.  */
1205                 || at_begline_loc_p (pattern, p, syntax))
1206               BUF_PUSH (begline);
1207             else
1208               goto normal_char;
1209           }
1210           break;
1211 
1212 
1213         case '$':
1214           {
1215             if (   /* If at end of pattern, it's an operator.  */
1216                    p == pend 
1217                    /* If context independent, it's an operator.  */
1218                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1219                    /* Otherwise, depends on what's next.  */
1220                 || at_endline_loc_p (p, pend, syntax))
1221                BUF_PUSH (endline);
1222              else
1223                goto normal_char;
1224            }
1225            break;
1226 
1227 
1228         case '+':
1229         case '?':
1230           if ((syntax & RE_BK_PLUS_QM)
1231               || (syntax & RE_LIMITED_OPS))
1232             goto normal_char;
1233         handle_plus:
1234         case '*':
1235           /* If there is no previous pattern... */
1236           if (!laststart)
1237             {
1238               if (syntax & RE_CONTEXT_INVALID_OPS)
1239                 return REG_BADRPT;
1240               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1241                 goto normal_char;
1242             }
1243 
1244           {
1245             /* Are we optimizing this jump?  */
1246             boolean keep_string_p = false;
1247             
1248             /* 1 means zero (many) matches is allowed.  */
1249             char zero_times_ok = 0, many_times_ok = 0;
1250 
1251             /* If there is a sequence of repetition chars, collapse it
1252                down to just one (the right one).  We can't combine
1253                interval operators with these because of, e.g., `a{2}*',
1254                which should only match an even number of `a's.  */
1255 
1256             for (;;)
1257               {
1258                 zero_times_ok |= c != '+';
1259                 many_times_ok |= c != '?';
1260 
1261                 if (p == pend)
1262                   break;
1263 
1264                 PATFETCH (c);
1265 
1266                 if (c == '*'
1267                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1268                   ;
1269 
1270                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1271                   {
1272                     if (p == pend) return REG_EESCAPE;
1273 
1274                     PATFETCH (c1);
1275                     if (!(c1 == '+' || c1 == '?'))
1276                       {
1277                         PATUNFETCH;
1278                         PATUNFETCH;
1279                         break;
1280                       }
1281 
1282                     c = c1;
1283                   }
1284                 else
1285                   {
1286                     PATUNFETCH;
1287                     break;
1288                   }
1289 
1290                 /* If we get here, we found another repeat character.  */
1291                }
1292 
1293             /* Star, etc. applied to an empty pattern is equivalent
1294                to an empty pattern.  */
1295             if (!laststart)  
1296               break;
1297 
1298             /* Now we know whether or not zero matches is allowed
1299                and also whether or not two or more matches is allowed.  */
1300             if (many_times_ok)
1301               { /* More than one repetition is allowed, so put in at the
1302                    end a backward relative jump from `b' to before the next
1303                    jump we're going to put in below (which jumps from
1304                    laststart to after this jump).  
1305 
1306                    But if we are at the `*' in the exact sequence `.*\n',
1307                    insert an unconditional jump backwards to the .,
1308                    instead of the beginning of the loop.  This way we only
1309                    push a failure point once, instead of every time
1310                    through the loop.  */
1311                 assert (p - 1 > pattern);
1312 
1313                 /* Allocate the space for the jump.  */
1314                 GET_BUFFER_SPACE (3);
1315 
1316                 /* We know we are not at the first character of the pattern,
1317                    because laststart was nonzero.  And we've already
1318                    incremented `p', by the way, to be the character after
1319                    the `*'.  Do we have to do something analogous here
1320                    for null bytes, because of RE_DOT_NOT_NULL?  */
1321                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1322                     && zero_times_ok
1323                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1324                     && !(syntax & RE_DOT_NEWLINE))
1325                   { /* We have .*\n.  */
1326                     STORE_JUMP (jump, b, laststart);
1327                     keep_string_p = true;
1328                   }
1329                 else
1330                   /* Anything else.  */
1331                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1332 
1333                 /* We've added more stuff to the buffer.  */
1334                 b += 3;
1335               }
1336 
1337             /* On failure, jump from laststart to b + 3, which will be the
1338                end of the buffer after this jump is inserted.  */
1339             GET_BUFFER_SPACE (3);
1340             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1341                                        : on_failure_jump,
1342                          laststart, b + 3);
1343             pending_exact = 0;
1344             b += 3;
1345 
1346             if (!zero_times_ok)
1347               {
1348                 /* At least one repetition is required, so insert a
1349                    `dummy_failure_jump' before the initial
1350                    `on_failure_jump' instruction of the loop. This
1351                    effects a skip over that instruction the first time
1352                    we hit that loop.  */
1353                 GET_BUFFER_SPACE (3);
1354                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1355                 b += 3;
1356               }
1357             }
1358           break;
1359 
1360 
1361         case '.':
1362           laststart = b;
1363           BUF_PUSH (anychar);
1364           break;
1365 
1366 
1367         case '[':
1368           {
1369             boolean had_char_class = false;
1370 
1371             if (p == pend) return REG_EBRACK;
1372 
1373             /* Ensure that we have enough space to push a charset: the
1374                opcode, the length count, and the bitset; 34 bytes in all.  */
1375             GET_BUFFER_SPACE (34);
1376 
1377             laststart = b;
1378 
1379             /* We test `*p == '^' twice, instead of using an if
1380                statement, so we only need one BUF_PUSH.  */
1381             BUF_PUSH (*p == '^' ? charset_not : charset); 
1382             if (*p == '^')
1383               p++;
1384 
1385             /* Remember the first position in the bracket expression.  */
1386             p1 = p;
1387 
1388             /* Push the number of bytes in the bitmap.  */
1389             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1390 
1391             /* Clear the whole map.  */
1392             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1393 
1394             /* charset_not matches newline according to a syntax bit.  */
1395             if ((re_opcode_t) b[-2] == charset_not
1396                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1397               SET_LIST_BIT ('\n');
1398 
1399             /* Read in characters and ranges, setting map bits.  */
1400             for (;;)
1401               {
1402                 if (p == pend) return REG_EBRACK;
1403 
1404                 PATFETCH (c);
1405 
1406                 /* \ might escape characters inside [...] and [^...].  */
1407                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1408                   {
1409                     if (p == pend) return REG_EESCAPE;
1410 
1411                     PATFETCH (c1);
1412                     SET_LIST_BIT (c1);
1413                     continue;
1414                   }
1415 
1416                 /* Could be the end of the bracket expression.  If it's
1417                    not (i.e., when the bracket expression is `[]' so
1418                    far), the ']' character bit gets set way below.  */
1419                 if (c == ']' && p != p1 + 1)
1420                   break;
1421 
1422                 /* Look ahead to see if it's a range when the last thing
1423                    was a character class.  */
1424                 if (had_char_class && c == '-' && *p != ']')
1425                   return REG_ERANGE;
1426 
1427                 /* Look ahead to see if it's a range when the last thing
1428                    was a character: if this is a hyphen not at the
1429                    beginning or the end of a list, then it's the range
1430                    operator.  */
1431                 if (c == '-' 
1432                     && !(p - 2 >= pattern && p[-2] == '[') 
1433                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1434                     && *p != ']')
1435                   {
1436                     reg_errcode_t ret
1437                       = compile_range (&p, pend, translate, syntax, b);
1438                     if (ret != REG_NOERROR) return ret;
1439                   }
1440 
1441                 else if (p[0] == '-' && p[1] != ']')
1442                   { /* This handles ranges made up of characters only.  */
1443                     reg_errcode_t ret;
1444 
1445                     /* Move past the `-'.  */
1446                     PATFETCH (c1);
1447                     
1448                     ret = compile_range (&p, pend, translate, syntax, b);
1449                     if (ret != REG_NOERROR) return ret;
1450                   }
1451 
1452                 /* See if we're at the beginning of a possible character
1453                    class.  */
1454 
1455                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1456                   { /* Leave room for the null.  */
1457                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1458 
1459                     PATFETCH (c);
1460                     c1 = 0;
1461 
1462                     /* If pattern is `[[:'.  */
1463                     if (p == pend) return REG_EBRACK;
1464 
1465                     for (;;)
1466                       {
1467                         PATFETCH (c);
1468                         if (c == ':' || c == ']' || p == pend
1469                             || c1 == CHAR_CLASS_MAX_LENGTH)
1470                           break;
1471                         str[c1++] = c;
1472                       }
1473                     str[c1] = '\0';
1474 
1475                     /* If isn't a word bracketed by `[:' and:`]':
1476                        undo the ending character, the letters, and leave 
1477                        the leading `:' and `[' (but set bits for them).  */
1478                     if (c == ':' && *p == ']')
1479                       {
1480                         int ch;
1481                         boolean is_alnum = STREQ (str, "alnum");
1482                         boolean is_alpha = STREQ (str, "alpha");
1483                         boolean is_blank = STREQ (str, "blank");
1484                         boolean is_cntrl = STREQ (str, "cntrl");
1485                         boolean is_digit = STREQ (str, "digit");
1486                         boolean is_graph = STREQ (str, "graph");
1487                         boolean is_lower = STREQ (str, "lower");
1488                         boolean is_print = STREQ (str, "print");
1489                         boolean is_punct = STREQ (str, "punct");
1490                         boolean is_space = STREQ (str, "space");
1491                         boolean is_upper = STREQ (str, "upper");
1492                         boolean is_xdigit = STREQ (str, "xdigit");
1493                         
1494                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1495 
1496                         /* Throw away the ] at the end of the character
1497                            class.  */
1498                         PATFETCH (c);                                   
1499 
1500                         if (p == pend) return REG_EBRACK;
1501 
1502                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1503                           {
1504                             if (   (is_alnum  && ISALNUM (ch))
1505                                 || (is_alpha  && ISALPHA (ch))
1506                                 || (is_blank  && ISBLANK (ch))
1507                                 || (is_cntrl  && ISCNTRL (ch))
1508                                 || (is_digit  && ISDIGIT (ch))
1509                                 || (is_graph  && ISGRAPH (ch))
1510                                 || (is_lower  && ISLOWER (ch))
1511                                 || (is_print  && ISPRINT (ch))
1512                                 || (is_punct  && ISPUNCT (ch))
1513                                 || (is_space  && ISSPACE (ch))
1514                                 || (is_upper  && ISUPPER (ch))
1515                                 || (is_xdigit && ISXDIGIT (ch)))
1516                             SET_LIST_BIT (ch);
1517                           }
1518                         had_char_class = true;
1519                       }
1520                     else
1521                       {
1522                         c1++;
1523                         while (c1--)    
1524                           PATUNFETCH;
1525                         SET_LIST_BIT ('[');
1526                         SET_LIST_BIT (':');
1527                         had_char_class = false;
1528                       }
1529                   }
1530                 else
1531                   {
1532                     had_char_class = false;
1533                     SET_LIST_BIT (c);
1534                   }
1535               }
1536 
1537             /* Discard any (non)matching list bytes that are all 0 at the
1538                end of the map.  Decrease the map-length byte too.  */
1539             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 
1540               b[-1]--; 
1541             b += b[-1];
1542           }
1543           break;
1544 
1545 
1546         case '(':
1547           if (syntax & RE_NO_BK_PARENS)
1548             goto handle_open;
1549           else
1550             goto normal_char;
1551 
1552 
1553         case ')':
1554           if (syntax & RE_NO_BK_PARENS)
1555             goto handle_close;
1556           else
1557             goto normal_char;
1558 
1559 
1560         case '\n':
1561           if (syntax & RE_NEWLINE_ALT)
1562             goto handle_alt;
1563           else
1564             goto normal_char;
1565 
1566 
1567         case '|':
1568           if (syntax & RE_NO_BK_VBAR)
1569             goto handle_alt;
1570           else
1571             goto normal_char;
1572 
1573 
1574         case '{':
1575            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1576              goto handle_interval;
1577            else
1578              goto normal_char;
1579 
1580 
1581         case '\\':
1582           if (p == pend) return REG_EESCAPE;
1583 
1584           /* Do not translate the character after the \, so that we can
1585              distinguish, e.g., \B from \b, even if we normally would
1586              translate, e.g., B to b.  */
1587           PATFETCH_RAW (c);
1588 
1589           switch (c)
1590             {
1591             case '(':
1592               if (syntax & RE_NO_BK_PARENS)
1593                 goto normal_backslash;
1594 
1595             handle_open:
1596               bufp->re_nsub++;
1597               regnum++;
1598 
1599               if (COMPILE_STACK_FULL)
1600                 { 
1601                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1602                             compile_stack_elt_t);
1603                   if (compile_stack.stack == NULL) return REG_ESPACE;
1604 
1605                   compile_stack.size <<= 1;
1606                 }
1607 
1608               /* These are the values to restore when we hit end of this
1609                  group.  They are all relative offsets, so that if the
1610                  whole pattern moves because of realloc, they will still
1611                  be valid.  */
1612               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1613               COMPILE_STACK_TOP.fixup_alt_jump 
1614                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1615               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1616               COMPILE_STACK_TOP.regnum = regnum;
1617 
1618               /* We will eventually replace the 0 with the number of
1619                  groups inner to this one.  But do not push a
1620                  start_memory for groups beyond the last one we can
1621                  represent in the compiled pattern.  */
1622               if (regnum <= MAX_REGNUM)
1623                 {
1624                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1625                   BUF_PUSH_3 (start_memory, regnum, 0);
1626                 }
1627                 
1628               compile_stack.avail++;
1629 
1630               fixup_alt_jump = 0;
1631               laststart = 0;
1632               begalt = b;
1633               /* If we've reached MAX_REGNUM groups, then this open
1634                  won't actually generate any code, so we'll have to
1635                  clear pending_exact explicitly.  */
1636               pending_exact = 0;
1637               break;
1638 
1639 
1640             case ')':
1641               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1642 
1643               if (COMPILE_STACK_EMPTY)
1644                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1645                   goto normal_backslash;
1646                 else
1647                   return REG_ERPAREN;
1648 
1649             handle_close:
1650               if (fixup_alt_jump)
1651                 { /* Push a dummy failure point at the end of the
1652                      alternative for a possible future
1653                      `pop_failure_jump' to pop.  See comments at
1654                      `push_dummy_failure' in `re_match_2'.  */
1655                   BUF_PUSH (push_dummy_failure);
1656                   
1657                   /* We allocated space for this jump when we assigned
1658                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1659                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1660                 }
1661 
1662               /* See similar code for backslashed left paren above.  */
1663               if (COMPILE_STACK_EMPTY)
1664                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1665                   goto normal_char;
1666                 else
1667                   return REG_ERPAREN;
1668 
1669               /* Since we just checked for an empty stack above, this
1670                  ``can't happen''.  */
1671               assert (compile_stack.avail != 0);
1672               {
1673                 /* We don't just want to restore into `regnum', because
1674                    later groups should continue to be numbered higher,
1675                    as in `(ab)c(de)' -- the second group is #2.  */
1676                 regnum_t this_group_regnum;
1677 
1678                 compile_stack.avail--;          
1679                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1680                 fixup_alt_jump
1681                   = COMPILE_STACK_TOP.fixup_alt_jump
1682                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 
1683                     : 0;
1684                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1685                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1686                 /* If we've reached MAX_REGNUM groups, then this open
1687                    won't actually generate any code, so we'll have to
1688                    clear pending_exact explicitly.  */
1689                 pending_exact = 0;
1690 
1691                 /* We're at the end of the group, so now we know how many
1692                    groups were inside this one.  */
1693                 if (this_group_regnum <= MAX_REGNUM)
1694                   {
1695                     unsigned char *inner_group_loc
1696                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1697                     
1698                     *inner_group_loc = regnum - this_group_regnum;
1699                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1700                                 regnum - this_group_regnum);
1701                   }
1702               }
1703               break;
1704 
1705 
1706             case '|':                                   /* `\|'.  */
1707               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1708                 goto normal_backslash;
1709             handle_alt:
1710               if (syntax & RE_LIMITED_OPS)
1711                 goto normal_char;
1712 
1713               /* Insert before the previous alternative a jump which
1714                  jumps to this alternative if the former fails.  */
1715               GET_BUFFER_SPACE (3);
1716               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1717               pending_exact = 0;
1718               b += 3;
1719 
1720               /* The alternative before this one has a jump after it
1721                  which gets executed if it gets matched.  Adjust that
1722                  jump so it will jump to this alternative's analogous
1723                  jump (put in below, which in turn will jump to the next
1724                  (if any) alternative's such jump, etc.).  The last such
1725                  jump jumps to the correct final destination.  A picture:
1726                           _____ _____ 
1727                           |   | |   |   
1728                           |   v |   v 
1729                          a | b   | c   
1730 
1731                  If we are at `b', then fixup_alt_jump right now points to a
1732                  three-byte space after `a'.  We'll put in the jump, set
1733                  fixup_alt_jump to right after `b', and leave behind three
1734                  bytes which we'll fill in when we get to after `c'.  */
1735 
1736               if (fixup_alt_jump)
1737                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1738 
1739               /* Mark and leave space for a jump after this alternative,
1740                  to be filled in later either by next alternative or
1741                  when know we're at the end of a series of alternatives.  */
1742               fixup_alt_jump = b;
1743               GET_BUFFER_SPACE (3);
1744               b += 3;
1745 
1746               laststart = 0;
1747               begalt = b;
1748               break;
1749 
1750 
1751             case '{': 
1752               /* If \{ is a literal.  */
1753               if (!(syntax & RE_INTERVALS)
1754                      /* If we're at `\{' and it's not the open-interval 
1755                         operator.  */
1756                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1757                   || (p - 2 == pattern  &&  p == pend))
1758                 goto normal_backslash;
1759 
1760             handle_interval:
1761               {
1762                 /* If got here, then the syntax allows intervals.  */
1763 
1764                 /* At least (most) this many matches must be made.  */
1765                 int lower_bound = -1, upper_bound = -1;
1766 
1767                 beg_interval = p - 1;
1768 
1769                 if (p == pend)
1770                   {
1771                     if (syntax & RE_NO_BK_BRACES)
1772                       goto unfetch_interval;
1773                     else
1774                       return REG_EBRACE;
1775                   }
1776 
1777                 GET_UNSIGNED_NUMBER (lower_bound);
1778 
1779                 if (c == ',')
1780                   {
1781                     GET_UNSIGNED_NUMBER (upper_bound);
1782                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1783                   }
1784                 else
1785                   /* Interval such as `{1}' => match exactly once. */
1786                   upper_bound = lower_bound;
1787 
1788                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1789                     || lower_bound > upper_bound)
1790                   {
1791                     if (syntax & RE_NO_BK_BRACES)
1792                       goto unfetch_interval;
1793                     else 
1794                       return REG_BADBR;
1795                   }
1796 
1797                 if (!(syntax & RE_NO_BK_BRACES)) 
1798                   {
1799                     if (c != '\\') return REG_EBRACE;
1800 
1801                     PATFETCH (c);
1802                   }
1803 
1804                 if (c != '}')
1805                   {
1806                     if (syntax & RE_NO_BK_BRACES)
1807                       goto unfetch_interval;
1808                     else 
1809                       return REG_BADBR;
1810                   }
1811 
1812                 /* We just parsed a valid interval.  */
1813 
1814                 /* If it's invalid to have no preceding re.  */
1815                 if (!laststart)
1816                   {
1817                     if (syntax & RE_CONTEXT_INVALID_OPS)
1818                       return REG_BADRPT;
1819                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1820                       laststart = b;
1821                     else
1822                       goto unfetch_interval;
1823                   }
1824 
1825                 /* If the upper bound is zero, don't want to succeed at
1826                    all; jump from `laststart' to `b + 3', which will be
1827                    the end of the buffer after we insert the jump.  */
1828                  if (upper_bound == 0)
1829                    {
1830                      GET_BUFFER_SPACE (3);
1831                      INSERT_JUMP (jump, laststart, b + 3);
1832                      b += 3;
1833                    }
1834 
1835                  /* Otherwise, we have a nontrivial interval.  When
1836                     we're all done, the pattern will look like:
1837                       set_number_at <jump count> <upper bound>
1838                       set_number_at <succeed_n count> <lower bound>
1839                       succeed_n <after jump addr> <succed_n count>
1840                       <body of loop>
1841                       jump_n <succeed_n addr> <jump count>
1842                     (The upper bound and `jump_n' are omitted if
1843                     `upper_bound' is 1, though.)  */
1844                  else 
1845                    { /* If the upper bound is > 1, we need to insert
1846                         more at the end of the loop.  */
1847                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1848 
1849                      GET_BUFFER_SPACE (nbytes);
1850 
1851                      /* Initialize lower bound of the `succeed_n', even
1852                         though it will be set during matching by its
1853                         attendant `set_number_at' (inserted next),
1854                         because `re_compile_fastmap' needs to know.
1855                         Jump to the `jump_n' we might insert below.  */
1856                      INSERT_JUMP2 (succeed_n, laststart,
1857                                    b + 5 + (upper_bound > 1) * 5,
1858                                    lower_bound);
1859                      b += 5;
1860 
1861                      /* Code to initialize the lower bound.  Insert 
1862                         before the `succeed_n'.  The `5' is the last two
1863                         bytes of this `set_number_at', plus 3 bytes of
1864                         the following `succeed_n'.  */
1865                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1866                      b += 5;
1867 
1868                      if (upper_bound > 1)
1869                        { /* More than one repetition is allowed, so
1870                             append a backward jump to the `succeed_n'
1871                             that starts this interval.
1872                             
1873                             When we've reached this during matching,
1874                             we'll have matched the interval once, so
1875                             jump back only `upper_bound - 1' times.  */
1876                          STORE_JUMP2 (jump_n, b, laststart + 5,
1877                                       upper_bound - 1);
1878                          b += 5;
1879 
1880                          /* The location we want to set is the second
1881                             parameter of the `jump_n'; that is `b-2' as
1882                             an absolute address.  `laststart' will be
1883                             the `set_number_at' we're about to insert;
1884                             `laststart+3' the number to set, the source
1885                             for the relative address.  But we are
1886                             inserting into the middle of the pattern --
1887                             so everything is getting moved up by 5.
1888                             Conclusion: (b - 2) - (laststart + 3) + 5,
1889                             i.e., b - laststart.
1890                             
1891                             We insert this at the beginning of the loop
1892                             so that if we fail during matching, we'll
1893                             reinitialize the bounds.  */
1894                          insert_op2 (set_number_at, laststart, b - laststart,
1895                                      upper_bound - 1, b);
1896                          b += 5;
1897                        }
1898                    }
1899                 pending_exact = 0;
1900                 beg_interval = NULL;
1901               }
1902               break;
1903 
1904             unfetch_interval:
1905               /* If an invalid interval, match the characters as literals.  */
1906                assert (beg_interval);
1907                p = beg_interval;
1908                beg_interval = NULL;
1909 
1910                /* normal_char and normal_backslash need `c'.  */
1911                PATFETCH (c);    
1912 
1913                if (!(syntax & RE_NO_BK_BRACES))
1914                  {
1915                    if (p > pattern  &&  p[-1] == '\\')
1916                      goto normal_backslash;
1917                  }
1918                goto normal_char;
1919 
1920 #ifdef emacs
1921             /* There is no way to specify the before_dot and after_dot
1922                operators.  rms says this is ok.  --karl  */
1923             case '=':
1924               BUF_PUSH (at_dot);
1925               break;
1926 
1927             case 's':   
1928               laststart = b;
1929               PATFETCH (c);
1930               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1931               break;
1932 
1933             case 'S':
1934               laststart = b;
1935               PATFETCH (c);
1936               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1937               break;
1938 #endif /* emacs */
1939 
1940 
1941             case 'w':
1942               laststart = b;
1943               BUF_PUSH (wordchar);
1944               break;
1945 
1946 
1947             case 'W':
1948               laststart = b;
1949               BUF_PUSH (notwordchar);
1950               break;
1951 
1952 
1953             case '<':
1954               BUF_PUSH (wordbeg);
1955               break;
1956 
1957             case '>':
1958               BUF_PUSH (wordend);
1959               break;
1960 
1961             case 'b':
1962               BUF_PUSH (wordbound);
1963               break;
1964 
1965             case 'B':
1966               BUF_PUSH (notwordbound);
1967               break;
1968 
1969             case '`':
1970               BUF_PUSH (begbuf);
1971               break;
1972 
1973             case '\'':
1974               BUF_PUSH (endbuf);
1975               break;
1976 
1977             case '1': case '2': case '3': case '4': case '5':
1978             case '6': case '7': case '8': case '9':
1979               if (syntax & RE_NO_BK_REFS)
1980                 goto normal_char;
1981 
1982               c1 = c - '0';
1983 
1984               if (c1 > regnum)
1985                 return REG_ESUBREG;
1986 
1987               /* Can't back reference to a subexpression if inside of it.  */
1988               if (group_in_compile_stack (compile_stack, c1))
1989                 goto normal_char;
1990 
1991               laststart = b;
1992               BUF_PUSH_2 (duplicate, c1);
1993               break;
1994 
1995 
1996             case '+':
1997             case '?':
1998               if (syntax & RE_BK_PLUS_QM)
1999                 goto handle_plus;
2000               else
2001                 goto normal_backslash;
2002 
2003             default:
2004             normal_backslash:
2005               /* You might think it would be useful for \ to mean
2006                  not to translate; but if we don't translate it
2007                  it will never match anything.  */
2008               c = TRANSLATE (c);
2009               goto normal_char;
2010             }
2011           break;
2012 
2013 
2014         default:
2015         /* Expects the character in `c'.  */
2016         normal_char:
2017               /* If no exactn currently being built.  */
2018           if (!pending_exact 
2019 
2020               /* If last exactn not at current position.  */
2021               || pending_exact + *pending_exact + 1 != b
2022               
2023               /* We have only one byte following the exactn for the count.  */
2024               || *pending_exact == (1 << BYTEWIDTH) - 1
2025 
2026               /* If followed by a repetition operator.  */
2027               || *p == '*' || *p == '^'
2028               || ((syntax & RE_BK_PLUS_QM)
2029                   ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2030                   : (*p == '+' || *p == '?'))
2031               || ((syntax & RE_INTERVALS)
2032                   && ((syntax & RE_NO_BK_BRACES)
2033                       ? *p == '{'
2034                       : (p[0] == '\\' && p[1] == '{'))))
2035             {
2036               /* Start building a new exactn.  */
2037               
2038               laststart = b;
2039 
2040               BUF_PUSH_2 (exactn, 0);
2041               pending_exact = b - 1;
2042             }
2043             
2044           BUF_PUSH (c);
2045           (*pending_exact)++;
2046           break;
2047         } /* switch (c) */
2048     } /* while p != pend */
2049 
2050   
2051   /* Through the pattern now.  */
2052   
2053   if (fixup_alt_jump)
2054     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2055 
2056   if (!COMPILE_STACK_EMPTY) 
2057     return REG_EPAREN;
2058 
2059   free (compile_stack.stack);
2060 
2061   /* We have succeeded; set the length of the buffer.  */
2062   bufp->used = b - bufp->buffer;
2063 
2064 #ifdef DEBUG
2065   if (debug)
2066     {
2067       DEBUG_PRINT1 ("\nCompiled pattern: ");
2068       print_compiled_pattern (bufp);
2069     }
2070 #endif /* DEBUG */
2071 
2072   return REG_NOERROR;
2073 } /* regex_compile */
2074 
2075 /* Subroutines for `regex_compile'.  */
2076 
2077 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2078 
2079 static void
2080 store_op1 (op, loc, arg)
     /* [<][>][^][v][top][bottom][index][help] */
2081     re_opcode_t op;
2082     unsigned char *loc;
2083     int arg;
2084 {
2085   *loc = (unsigned char) op;
2086   STORE_NUMBER (loc + 1, arg);
2087 }
2088 
2089 
2090 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2091 
2092 static void
2093 store_op2 (op, loc, arg1, arg2)
     /* [<][>][^][v][top][bottom][index][help] */
2094     re_opcode_t op;
2095     unsigned char *loc;
2096     int arg1, arg2;
2097 {
2098   *loc = (unsigned char) op;
2099   STORE_NUMBER (loc + 1, arg1);
2100   STORE_NUMBER (loc + 3, arg2);
2101 }
2102 
2103 
2104 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2105    for OP followed by two-byte integer parameter ARG.  */
2106 
2107 static void
2108 insert_op1 (op, loc, arg, end)
     /* [<][>][^][v][top][bottom][index][help] */
2109     re_opcode_t op;
2110     unsigned char *loc;
2111     int arg;
2112     unsigned char *end;    
2113 {
2114   register unsigned char *pfrom = end;
2115   register unsigned char *pto = end + 3;
2116 
2117   while (pfrom != loc)
2118     *--pto = *--pfrom;
2119     
2120   store_op1 (op, loc, arg);
2121 }
2122 
2123 
2124 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2125 
2126 static void
2127 insert_op2 (op, loc, arg1, arg2, end)
     /* [<][>][^][v][top][bottom][index][help] */
2128     re_opcode_t op;
2129     unsigned char *loc;
2130     int arg1, arg2;
2131     unsigned char *end;    
2132 {
2133   register unsigned char *pfrom = end;
2134   register unsigned char *pto = end + 5;
2135 
2136   while (pfrom != loc)
2137     *--pto = *--pfrom;
2138     
2139   store_op2 (op, loc, arg1, arg2);
2140 }
2141 
2142 
2143 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2144    after an alternative or a begin-subexpression.  We assume there is at
2145    least one character before the ^.  */
2146 
2147 static boolean
2148 at_begline_loc_p (pattern, p, syntax)
     /* [<][>][^][v][top][bottom][index][help] */
2149     const char *pattern, *p;
2150     reg_syntax_t syntax;
2151 {
2152   const char *prev = p - 2;
2153   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2154   
2155   return
2156        /* After a subexpression?  */
2157        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2158        /* After an alternative?  */
2159     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2160 }
2161 
2162 
2163 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2164    at least one character after the $, i.e., `P < PEND'.  */
2165 
2166 static boolean
2167 at_endline_loc_p (p, pend, syntax)
     /* [<][>][^][v][top][bottom][index][help] */
2168     const char *p, *pend;
2169     int syntax;
2170 {
2171   const char *next = p;
2172   boolean next_backslash = *next == '\\';
2173   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2174   
2175   return
2176        /* Before a subexpression?  */
2177        (syntax & RE_NO_BK_PARENS ? *next == ')'
2178         : next_backslash && next_next && *next_next == ')')
2179        /* Before an alternative?  */
2180     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2181         : next_backslash && next_next && *next_next == '|');
2182 }
2183 
2184 
2185 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 
2186    false if it's not.  */
2187 
2188 static boolean
2189 group_in_compile_stack (compile_stack, regnum)
     /* [<][>][^][v][top][bottom][index][help] */
2190     compile_stack_type compile_stack;
2191     regnum_t regnum;
2192 {
2193   int this_element;
2194 
2195   for (this_element = compile_stack.avail - 1;  
2196        this_element >= 0; 
2197        this_element--)
2198     if (compile_stack.stack[this_element].regnum == regnum)
2199       return true;
2200 
2201   return false;
2202 }
2203 
2204 
2205 /* Read the ending character of a range (in a bracket expression) from the
2206    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2207    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2208    Then we set the translation of all bits between the starting and
2209    ending characters (inclusive) in the compiled pattern B.
2210    
2211    Return an error code.
2212    
2213    We use these short variable names so we can use the same macros as
2214    `regex_compile' itself.  */
2215 
2216 static reg_errcode_t
2217 compile_range (p_ptr, pend, translate, syntax, b)
     /* [<][>][^][v][top][bottom][index][help] */
2218     const char **p_ptr, *pend;
2219     char *translate;
2220     reg_syntax_t syntax;
2221     unsigned char *b;
2222 {
2223   unsigned this_char;
2224 
2225   const char *p = *p_ptr;
2226   int range_start, range_end;
2227   
2228   if (p == pend)
2229     return REG_ERANGE;
2230 
2231   /* Even though the pattern is a signed `char *', we need to fetch
2232      with unsigned char *'s; if the high bit of the pattern character
2233      is set, the range endpoints will be negative if we fetch using a
2234      signed char *.
2235 
2236      We also want to fetch the endpoints without translating them; the 
2237      appropriate translation is done in the bit-setting loop below.  */
2238   range_start = ((unsigned char *) p)[-2];
2239   range_end   = ((unsigned char *) p)[0];
2240 
2241   /* Have to increment the pointer into the pattern string, so the
2242      caller isn't still at the ending character.  */
2243   (*p_ptr)++;
2244 
2245   /* If the start is after the end, the range is empty.  */
2246   if (range_start > range_end)
2247     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2248 
2249   /* Here we see why `this_char' has to be larger than an `unsigned
2250      char' -- the range is inclusive, so if `range_end' == 0xff
2251      (assuming 8-bit characters), we would otherwise go into an infinite
2252      loop, since all characters <= 0xff.  */
2253   for (this_char = range_start; this_char <= range_end; this_char++)
2254     {
2255       SET_LIST_BIT (TRANSLATE (this_char));
2256     }
2257   
2258   return REG_NOERROR;
2259 }
2260 
2261 /* Failure stack declarations and macros; both re_compile_fastmap and
2262    re_match_2 use a failure stack.  These have to be macros because of
2263    REGEX_ALLOCATE.  */
2264    
2265 
2266 /* Number of failure points for which to initially allocate space
2267    when matching.  If this number is exceeded, we allocate more
2268    space, so it is not a hard limit.  */
2269 #ifndef INIT_FAILURE_ALLOC
2270 #define INIT_FAILURE_ALLOC 5
2271 #endif
2272 
2273 /* Roughly the maximum number of failure points on the stack.  Would be
2274    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2275    This is a variable only so users of regex can assign to it; we never
2276    change it ourselves.  */
2277 int re_max_failures = 2000;
2278 
2279 typedef const unsigned char *fail_stack_elt_t;
2280 
2281 typedef struct
2282 {
2283   fail_stack_elt_t *stack;
2284   unsigned size;
2285   unsigned avail;                       /* Offset of next open position.  */
2286 } fail_stack_type;
2287 
2288 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
     /* [<][>][^][v][top][bottom][index][help] */
2289 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
     /* [<][>][^][v][top][bottom][index][help] */
2290 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
     /* [<][>][^][v][top][bottom][index][help] */
2291 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
     /* [<][>][^][v][top][bottom][index][help] */
2292 
2293 
2294 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2295 
2296 #define INIT_FAIL_STACK()                                               \
     /* [<][>][^][v][top][bottom][index][help] */
2297   do {                                                                  \
2298     fail_stack.stack = (fail_stack_elt_t *)                             \
2299       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));  \
2300                                                                         \
2301     if (fail_stack.stack == NULL)                                       \
2302       return -2;                                                        \
2303                                                                         \
2304     fail_stack.size = INIT_FAILURE_ALLOC;                               \
2305     fail_stack.avail = 0;                                               \
2306   } while (0)
2307 
2308 
2309 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2310 
2311    Return 1 if succeeds, and 0 if either ran out of memory
2312    allocating space for it or it was already too large.  
2313    
2314    REGEX_REALLOCATE requires `destination' be declared.   */
2315 
2316 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
     /* [<][>][^][v][top][bottom][index][help] */
2317   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS              \
2318    ? 0                                                                  \
2319    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
2320         REGEX_REALLOCATE ((fail_stack).stack,                           \
2321           (fail_stack).size * sizeof (fail_stack_elt_t),                \
2322           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
2323                                                                         \
2324       (fail_stack).stack == NULL                                        \
2325       ? 0                                                               \
2326       : ((fail_stack).size <<= 1,                                       \
2327          1)))
2328 
2329 
2330 /* Push PATTERN_OP on FAIL_STACK. 
2331 
2332    Return 1 if was able to do so and 0 if ran out of memory allocating
2333    space to do so.  */
2334 #define PUSH_PATTERN_OP(pattern_op, fail_stack)                         \
     /* [<][>][^][v][top][bottom][index][help] */
2335   ((FAIL_STACK_FULL ()                                                  \
2336     && !DOUBLE_FAIL_STACK (fail_stack))                                 \
2337     ? 0                                                                 \
2338     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,           \
2339        1))
2340 
2341 /* This pushes an item onto the failure stack.  Must be a four-byte
2342    value.  Assumes the variable `fail_stack'.  Probably should only
2343    be called from within `PUSH_FAILURE_POINT'.  */
2344 #define PUSH_FAILURE_ITEM(item)                                         \
     /* [<][>][^][v][top][bottom][index][help] */
2345   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2346 
2347 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2348 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
     /* [<][>][^][v][top][bottom][index][help] */
2349 
2350 /* Used to omit pushing failure point id's when we're not debugging.  */
2351 #ifdef DEBUG
2352 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2353 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
     /* [<][>][^][v][top][bottom][index][help] */
2354 #else
2355 #define DEBUG_PUSH(item)
     /* [<][>][^][v][top][bottom][index][help] */
2356 #define DEBUG_POP(item_addr)
     /* [<][>][^][v][top][bottom][index][help] */
2357 #endif
2358 
2359 
2360 /* Push the information about the state we will need
2361    if we ever fail back to it.  
2362    
2363    Requires variables fail_stack, regstart, regend, reg_info, and
2364    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2365    declared.
2366    
2367    Does `return FAILURE_CODE' if runs out of memory.  */
2368 
2369 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
     /* [<][>][^][v][top][bottom][index][help] */
2370   do {                                                                  \
2371     char *destination;                                                  \
2372     /* Must be int, so when we don't save any registers, the arithmetic \
2373        of 0 + -1 isn't done as unsigned.  */                            \
2374     int this_reg;                                                       \
2375                                                                         \
2376     DEBUG_STATEMENT (failure_id++);                                     \
2377     DEBUG_STATEMENT (nfailure_points_pushed++);                         \
2378     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
2379     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2380     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2381                                                                         \
2382     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
2383     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
2384                                                                         \
2385     /* Ensure we have enough space allocated for what we will push.  */ \
2386     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
2387       {                                                                 \
2388         if (!DOUBLE_FAIL_STACK (fail_stack))                    \
2389           return failure_code;                                          \
2390                                                                         \
2391         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
2392                        (fail_stack).size);                              \
2393         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2394       }                                                                 \
2395                                                                         \
2396     /* Push the info, starting with the registers.  */                  \
2397     DEBUG_PRINT1 ("\n");                                                \
2398                                                                         \
2399     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
2400          this_reg++)                                                    \
2401       {                                                                 \
2402         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
2403         DEBUG_STATEMENT (num_regs_pushed++);                            \
2404                                                                         \
2405         DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
2406         PUSH_FAILURE_ITEM (regstart[this_reg]);                         \
2407                                                                         \
2408         DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
2409         PUSH_FAILURE_ITEM (regend[this_reg]);                           \
2410                                                                         \
2411         DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
2412         DEBUG_PRINT2 (" match_null=%d",                                 \
2413                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));    \
2414         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));    \
2415         DEBUG_PRINT2 (" matched_something=%d",                          \
2416                       MATCHED_SOMETHING (reg_info[this_reg]));          \
2417         DEBUG_PRINT2 (" ever_matched=%d",                               \
2418                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));     \
2419         DEBUG_PRINT1 ("\n");                                            \
2420         PUSH_FAILURE_ITEM (reg_info[this_reg].word);                    \
2421       }                                                                 \
2422                                                                         \
2423     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2424     PUSH_FAILURE_ITEM (lowest_active_reg);                              \
2425                                                                         \
2426     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2427     PUSH_FAILURE_ITEM (highest_active_reg);                             \
2428                                                                         \
2429     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);           \
2430     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
2431     PUSH_FAILURE_ITEM (pattern_place);                                  \
2432                                                                         \
2433     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
2434     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2435                                  size2);                                \
2436     DEBUG_PRINT1 ("'\n");                                               \
2437     PUSH_FAILURE_ITEM (string_place);                                   \
2438                                                                         \
2439     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
2440     DEBUG_PUSH (failure_id);                                            \
2441   } while (0)
2442 
2443 /* This is the number of items that are pushed and popped on the stack
2444    for each register.  */
2445 #define NUM_REG_ITEMS  3
2446 
2447 /* Individual items aside from the registers.  */
2448 #ifdef DEBUG
2449 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2450 #else
2451 #define NUM_NONREG_ITEMS 4
2452 #endif
2453 
2454 /* We push at most this many items on the stack.  */
2455 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2456 
2457 /* We actually push this many items.  */
2458 #define NUM_FAILURE_ITEMS                                               \
2459   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
2460     + NUM_NONREG_ITEMS)
2461 
2462 /* How many items can still be added to the stack without overflowing it.  */
2463 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2464 
2465 
2466 /* Pops what PUSH_FAIL_STACK pushes.
2467 
2468    We restore into the parameters, all of which should be lvalues:
2469      STR -- the saved data position.
2470      PAT -- the saved pattern position.
2471      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2472      REGSTART, REGEND -- arrays of string positions.
2473      REG_INFO -- array of information about each subexpression.
2474    
2475    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2476    `pend', `string1', `size1', `string2', and `size2'.  */
2477 
2478 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
     /* [<][>][^][v][top][bottom][index][help] */
2479 {                                                                       \
2480   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
2481   int this_reg;                                                         \
2482   const unsigned char *string_temp;                                     \
2483                                                                         \
2484   assert (!FAIL_STACK_EMPTY ());                                        \
2485                                                                         \
2486   /* Remove failure points and point to how many regs pushed.  */       \
2487   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
2488   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
2489   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
2490                                                                         \
2491   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
2492                                                                         \
2493   DEBUG_POP (&failure_id);                                              \
2494   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
2495                                                                         \
2496   /* If the saved string location is NULL, it came from an              \
2497      on_failure_keep_string_jump opcode, and we want to throw away the  \
2498      saved NULL, thus retaining our current position in the string.  */ \
2499   string_temp = POP_FAILURE_ITEM ();                                    \
2500   if (string_temp != NULL)                                              \
2501     str = (const char *) string_temp;                                   \
2502                                                                         \
2503   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
2504   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
2505   DEBUG_PRINT1 ("'\n");                                                 \
2506                                                                         \
2507   pat = (unsigned char *) POP_FAILURE_ITEM ();                          \
2508   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                       \
2509   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
2510                                                                         \
2511   /* Restore register info.  */                                         \
2512   high_reg = (unsigned) POP_FAILURE_ITEM ();                            \
2513   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
2514                                                                         \
2515   low_reg = (unsigned) POP_FAILURE_ITEM ();                             \
2516   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
2517                                                                         \
2518   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)            \
2519     {                                                                   \
2520       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                 \
2521                                                                         \
2522       reg_info[this_reg].word = POP_FAILURE_ITEM ();                    \
2523       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);          \
2524                                                                         \
2525       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();            \
2526       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);             \
2527                                                                         \
2528       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();          \
2529       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);         \
2530     }                                                                   \
2531                                                                         \
2532   DEBUG_STATEMENT (nfailure_points_popped++);                           \
2533 } /* POP_FAILURE_POINT */
2534 
2535 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2536    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2537    characters can start a string that matches the pattern.  This fastmap
2538    is used by re_search to skip quickly over impossible starting points.
2539 
2540    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2541    area as BUFP->fastmap.
2542    
2543    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2544    the pattern buffer.
2545 
2546    Returns 0 if we succeed, -2 if an internal error.   */
2547 
2548 int
2549 re_compile_fastmap (bufp)
     /* [<][>][^][v][top][bottom][index][help] */
2550      struct re_pattern_buffer *bufp;
2551 {
2552   int j, k;
2553   fail_stack_type fail_stack;
2554 #ifndef REGEX_MALLOC
2555   char *destination;
2556 #endif
2557   /* We don't push any register information onto the failure stack.  */
2558   unsigned num_regs = 0;
2559   
2560   register char *fastmap = bufp->fastmap;
2561   unsigned char *pattern = bufp->buffer;
2562   unsigned long size = bufp->used;
2563   const unsigned char *p = pattern;
2564   register unsigned char *pend = pattern + size;
2565 
2566   /* Assume that each path through the pattern can be null until
2567      proven otherwise.  We set this false at the bottom of switch
2568      statement, to which we get only if a particular path doesn't
2569      match the empty string.  */
2570   boolean path_can_be_null = true;
2571 
2572   /* We aren't doing a `succeed_n' to begin with.  */
2573   boolean succeed_n_p = false;
2574 
2575   assert (fastmap != NULL && p != NULL);
2576   
2577   INIT_FAIL_STACK ();
2578   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2579   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
2580   bufp->can_be_null = 0;
2581       
2582   while (p != pend || !FAIL_STACK_EMPTY ())
2583     {
2584       if (p == pend)
2585         {
2586           bufp->can_be_null |= path_can_be_null;
2587           
2588           /* Reset for next path.  */
2589           path_can_be_null = true;
2590           
2591           p = fail_stack.stack[--fail_stack.avail];
2592         }
2593 
2594       /* We should never be about to go beyond the end of the pattern.  */
2595       assert (p < pend);
2596       
2597 #ifdef SWITCH_ENUM_BUG
2598       switch ((int) ((re_opcode_t) *p++))
2599 #else
2600       switch ((re_opcode_t) *p++)
2601 #endif
2602         {
2603 
2604         /* I guess the idea here is to simply not bother with a fastmap
2605            if a backreference is used, since it's too hard to figure out
2606            the fastmap for the corresponding group.  Setting
2607            `can_be_null' stops `re_search_2' from using the fastmap, so
2608            that is all we do.  */
2609         case duplicate:
2610           bufp->can_be_null = 1;
2611           return 0;
2612 
2613 
2614       /* Following are the cases which match a character.  These end
2615          with `break'.  */
2616 
2617         case exactn:
2618           fastmap[p[1]] = 1;
2619           break;
2620 
2621 
2622         case charset:
2623           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2624             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2625               fastmap[j] = 1;
2626           break;
2627 
2628 
2629         case charset_not:
2630           /* Chars beyond end of map must be allowed.  */
2631           for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2632             fastmap[j] = 1;
2633 
2634           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2635             if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2636               fastmap[j] = 1;
2637           break;
2638 
2639 
2640         case wordchar:
2641           for (j = 0; j < (1 << BYTEWIDTH); j++)
2642             if (SYNTAX (j) == Sword)
2643               fastmap[j] = 1;
2644           break;
2645 
2646 
2647         case notwordchar:
2648           for (j = 0; j < (1 << BYTEWIDTH); j++)
2649             if (SYNTAX (j) != Sword)
2650               fastmap[j] = 1;
2651           break;
2652 
2653 
2654         case anychar:
2655           /* `.' matches anything ...  */
2656           for (j = 0; j < (1 << BYTEWIDTH); j++)
2657             fastmap[j] = 1;
2658 
2659           /* ... except perhaps newline.  */
2660           if (!(bufp->syntax & RE_DOT_NEWLINE))
2661             fastmap['\n'] = 0;
2662 
2663           /* Return if we have already set `can_be_null'; if we have,
2664              then the fastmap is irrelevant.  Something's wrong here.  */
2665           else if (bufp->can_be_null)
2666             return 0;
2667 
2668           /* Otherwise, have to check alternative paths.  */
2669           break;
2670 
2671 
2672 #ifdef emacs
2673         case syntaxspec:
2674           k = *p++;
2675           for (j = 0; j < (1 << BYTEWIDTH); j++)
2676             if (SYNTAX (j) == (enum syntaxcode) k)
2677               fastmap[j] = 1;
2678           break;
2679 
2680 
2681         case notsyntaxspec:
2682           k = *p++;
2683           for (j = 0; j < (1 << BYTEWIDTH); j++)
2684             if (SYNTAX (j) != (enum syntaxcode) k)
2685               fastmap[j] = 1;
2686           break;
2687 
2688 
2689       /* All cases after this match the empty string.  These end with
2690          `continue'.  */
2691 
2692 
2693         case before_dot:
2694         case at_dot:
2695         case after_dot:
2696           continue;
2697 #endif /* not emacs */
2698 
2699 
2700         case no_op:
2701         case begline:
2702         case endline:
2703         case begbuf:
2704         case endbuf:
2705         case wordbound:
2706         case notwordbound:
2707         case wordbeg:
2708         case wordend:
2709         case push_dummy_failure:
2710           continue;
2711 
2712 
2713         case jump_n:
2714         case pop_failure_jump:
2715         case maybe_pop_jump:
2716         case jump:
2717         case jump_past_alt:
2718         case dummy_failure_jump:
2719           EXTRACT_NUMBER_AND_INCR (j, p);
2720           p += j;       
2721           if (j > 0)
2722             continue;
2723             
2724           /* Jump backward implies we just went through the body of a
2725              loop and matched nothing.  Opcode jumped to should be
2726              `on_failure_jump' or `succeed_n'.  Just treat it like an
2727              ordinary jump.  For a * loop, it has pushed its failure
2728              point already; if so, discard that as redundant.  */
2729           if ((re_opcode_t) *p != on_failure_jump
2730               && (re_opcode_t) *p != succeed_n)
2731             continue;
2732 
2733           p++;
2734           EXTRACT_NUMBER_AND_INCR (j, p);
2735           p += j;               
2736           
2737           /* If what's on the stack is where we are now, pop it.  */
2738           if (!FAIL_STACK_EMPTY () 
2739               && fail_stack.stack[fail_stack.avail - 1] == p)
2740             fail_stack.avail--;
2741 
2742           continue;
2743 
2744 
2745         case on_failure_jump:
2746         case on_failure_keep_string_jump:
2747         handle_on_failure_jump:
2748           EXTRACT_NUMBER_AND_INCR (j, p);
2749 
2750           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2751              end of the pattern.  We don't want to push such a point,
2752              since when we restore it above, entering the switch will
2753              increment `p' past the end of the pattern.  We don't need
2754              to push such a point since we obviously won't find any more
2755              fastmap entries beyond `pend'.  Such a pattern can match
2756              the null string, though.  */
2757           if (p + j < pend)
2758             {
2759               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2760                 return -2;
2761             }
2762           else
2763             bufp->can_be_null = 1;
2764 
2765           if (succeed_n_p)
2766             {
2767               EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
2768               succeed_n_p = false;
2769             }
2770 
2771           continue;
2772 
2773 
2774         case succeed_n:
2775           /* Get to the number of times to succeed.  */
2776           p += 2;               
2777 
2778           /* Increment p past the n for when k != 0.  */
2779           EXTRACT_NUMBER_AND_INCR (k, p);
2780           if (k == 0)
2781             {
2782               p -= 4;
2783               succeed_n_p = true;  /* Spaghetti code alert.  */
2784               goto handle_on_failure_jump;
2785             }
2786           continue;
2787 
2788 
2789         case set_number_at:
2790           p += 4;
2791           continue;
2792 
2793 
2794         case start_memory:
2795         case stop_memory:
2796           p += 2;
2797           continue;
2798 
2799 
2800         default:
2801           abort (); /* We have listed all the cases.  */
2802         } /* switch *p++ */
2803 
2804       /* Getting here means we have found the possible starting
2805          characters for one path of the pattern -- and that the empty
2806          string does not match.  We need not follow this path further.
2807          Instead, look at the next alternative (remembered on the
2808          stack), or quit if no more.  The test at the top of the loop
2809          does these things.  */
2810       path_can_be_null = false;
2811       p = pend;
2812     } /* while p */
2813 
2814   /* Set `can_be_null' for the last path (also the first path, if the
2815      pattern is empty).  */
2816   bufp->can_be_null |= path_can_be_null;
2817   return 0;
2818 } /* re_compile_fastmap */
2819 
2820 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2821    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2822    this memory for recording register information.  STARTS and ENDS
2823    must be allocated using the malloc library routine, and must each
2824    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2825 
2826    If NUM_REGS == 0, then subsequent matches should allocate their own
2827    register data.
2828 
2829    Unless this function is called, the first search or match using
2830    PATTERN_BUFFER will allocate its own register data, without
2831    freeing the old data.  */
2832 
2833 void
2834 re_set_registers (bufp, regs, num_regs, starts, ends)
     /* [<][>][^][v][top][bottom][index][help] */
2835     struct re_pattern_buffer *bufp;
2836     struct re_registers *regs;
2837     unsigned num_regs;
2838     regoff_t *starts, *ends;
2839 {
2840   if (num_regs)
2841     {
2842       bufp->regs_allocated = REGS_REALLOCATE;
2843       regs->num_regs = num_regs;
2844       regs->start = starts;
2845       regs->end = ends;
2846     }
2847   else
2848     {
2849       bufp->regs_allocated = REGS_UNALLOCATED;
2850       regs->num_regs = 0;
2851       regs->start = regs->end = (regoff_t) 0;
2852     }
2853 }
2854 
2855 /* Searching routines.  */
2856 
2857 /* Like re_search_2, below, but only one string is specified, and
2858    doesn't let you say where to stop matching. */
2859 
2860 int
2861 re_search (bufp, string, size, startpos, range, regs)
     /* [<][>][^][v][top][bottom][index][help] */
2862      struct re_pattern_buffer *bufp;
2863      const char *string;
2864      int size, startpos, range;
2865      struct re_registers *regs;
2866 {
2867   return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 
2868                       regs, size);
2869 }
2870 
2871 
2872 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2873    virtual concatenation of STRING1 and STRING2, starting first at index
2874    STARTPOS, then at STARTPOS + 1, and so on.
2875    
2876    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2877    
2878    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2879    only at STARTPOS; in general, the last start tried is STARTPOS +
2880    RANGE.
2881    
2882    In REGS, return the indices of the virtual concatenation of STRING1
2883    and STRING2 that matched the entire BUFP->buffer and its contained
2884    subexpressions.
2885    
2886    Do not consider matching one past the index STOP in the virtual
2887    concatenation of STRING1 and STRING2.
2888 
2889    We return either the position in the strings at which the match was
2890    found, -1 if no match, or -2 if error (such as failure
2891    stack overflow).  */
2892 
2893 int
2894 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
     /* [<][>][^][v][top][bottom][index][help] */
2895      struct re_pattern_buffer *bufp;
2896      const char *string1, *string2;
2897      int size1, size2;
2898      int startpos;
2899      int range;
2900      struct re_registers *regs;
2901      int stop;
2902 {
2903   int val;
2904   register char *fastmap = bufp->fastmap;
2905   register char *translate = bufp->translate;
2906   int total_size = size1 + size2;
2907   int endpos = startpos + range;
2908 
2909   /* Check for out-of-range STARTPOS.  */
2910   if (startpos < 0 || startpos > total_size)
2911     return -1;
2912     
2913   /* Fix up RANGE if it might eventually take us outside
2914      the virtual concatenation of STRING1 and STRING2.  */
2915   if (endpos < -1)
2916     range = -1 - startpos;
2917   else if (endpos > total_size)
2918     range = total_size - startpos;
2919 
2920   /* If the search isn't to be a backwards one, don't waste time in a
2921      search for a pattern that must be anchored.  */
2922   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2923     {
2924       if (startpos > 0)
2925         return -1;
2926       else
2927         range = 1;
2928     }
2929 
2930   /* Update the fastmap now if not correct already.  */
2931   if (fastmap && !bufp->fastmap_accurate)
2932     if (re_compile_fastmap (bufp) == -2)
2933       return -2;
2934   
2935   /* Loop through the string, looking for a place to start matching.  */
2936   for (;;)
2937     { 
2938       /* If a fastmap is supplied, skip quickly over characters that
2939          cannot be the start of a match.  If the pattern can match the
2940          null string, however, we don't need to skip characters; we want
2941          the first null string.  */
2942       if (fastmap && startpos < total_size && !bufp->can_be_null)
2943         {
2944           if (range > 0)        /* Searching forwards.  */
2945             {
2946               register const char *d;
2947               register int lim = 0;
2948               int irange = range;
2949 
2950               if (startpos < size1 && startpos + range >= size1)
2951                 lim = range - (size1 - startpos);
2952 
2953               d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2954    
2955               /* Written out as an if-else to avoid testing `translate'
2956                  inside the loop.  */
2957               if (translate)
2958                 while (range > lim
2959                        && !fastmap[(unsigned char)
2960                                    translate[(unsigned char) *d++]])
2961                   range--;
2962               else
2963                 while (range > lim && !fastmap[(unsigned char) *d++])
2964                   range--;
2965 
2966               startpos += irange - range;
2967             }
2968           else                          /* Searching backwards.  */
2969             {
2970               register char c = (size1 == 0 || startpos >= size1
2971                                  ? string2[startpos - size1] 
2972                                  : string1[startpos]);
2973 
2974               if (!fastmap[(unsigned char) TRANSLATE (c)])
2975                 goto advance;
2976             }
2977         }
2978 
2979       /* If can't match the null string, and that's all we have left, fail.  */
2980       if (range >= 0 && startpos == total_size && fastmap
2981           && !bufp->can_be_null)
2982         return -1;
2983 
2984       val = re_match_2 (bufp, string1, size1, string2, size2,
2985                         startpos, regs, stop);
2986       if (val >= 0)
2987         return startpos;
2988         
2989       if (val == -2)
2990         return -2;
2991 
2992     advance:
2993       if (!range) 
2994         break;
2995       else if (range > 0) 
2996         {
2997           range--; 
2998           startpos++;
2999         }
3000       else
3001         {
3002           range++; 
3003           startpos--;
3004         }
3005     }
3006   return -1;
3007 } /* re_search_2 */
3008 
3009 /* Declarations and macros for re_match_2.  */
3010 
3011 static int bcmp_translate ();
3012 static boolean alt_match_null_string_p (),
3013                common_op_match_null_string_p (),
3014                group_match_null_string_p ();
3015 
3016 /* Structure for per-register (a.k.a. per-group) information.
3017    This must not be longer than one word, because we push this value
3018    onto the failure stack.  Other register information, such as the
3019    starting and ending positions (which are addresses), and the list of
3020    inner groups (which is a bits list) are maintained in separate
3021    variables.  
3022    
3023    We are making a (strictly speaking) nonportable assumption here: that
3024    the compiler will pack our bit fields into something that fits into
3025    the type of `word', i.e., is something that fits into one item on the
3026    failure stack.  */
3027 typedef union
3028 {
3029   fail_stack_elt_t word;
3030   struct
3031   {
3032       /* This field is one if this group can match the empty string,
3033          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3034 #define MATCH_NULL_UNSET_VALUE 3
3035     unsigned match_null_string_p : 2;
3036     unsigned is_active : 1;
3037     unsigned matched_something : 1;
3038     unsigned ever_matched_something : 1;
3039   } bits;
3040 } register_info_type;
3041 
3042 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
     /* [<][>][^][v][top][bottom][index][help] */
3043 #define IS_ACTIVE(R)  ((R).bits.is_active)
     /* [<][>][^][v][top][bottom][index][help] */
3044 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
     /* [<][>][^][v][top][bottom][index][help] */
3045 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
     /* [<][>][^][v][top][bottom][index][help] */
3046 
3047 
3048 /* Call this when have matched a real character; it sets `matched' flags
3049    for the subexpressions which we are currently inside.  Also records
3050    that those subexprs have matched.  */
3051 #define SET_REGS_MATCHED()                                              \
     /* [<][>][^][v][top][bottom][index][help] */
3052   do                                                                    \
3053     {                                                                   \
3054       unsigned r;                                                       \
3055       for (r = lowest_active_reg; r <= highest_active_reg; r++)         \
3056         {                                                               \
3057           MATCHED_SOMETHING (reg_info[r])                               \
3058             = EVER_MATCHED_SOMETHING (reg_info[r])                      \
3059             = 1;                                                        \
3060         }                                                               \
3061     }                                                                   \
3062   while (0)
3063 
3064 
3065 /* This converts PTR, a pointer into one of the search strings `string1'
3066    and `string2' into an offset from the beginning of that string.  */
3067 #define POINTER_TO_OFFSET(ptr)                                          \
     /* [<][>][^][v][top][bottom][index][help] */
3068   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3069 
3070 /* Registers are set to a sentinel when they haven't yet matched.  */
3071 #define REG_UNSET_VALUE ((char *) -1)
3072 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
     /* [<][>][^][v][top][bottom][index][help] */
3073 
3074 
3075 /* Macros for dealing with the split strings in re_match_2.  */
3076 
3077 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3078 
3079 /* Call before fetching a character with *d.  This switches over to
3080    string2 if necessary.  */
3081 #define PREFETCH()                                                      \
     /* [<][>][^][v][top][bottom][index][help] */
3082   while (d == dend)                                                     \
3083     {                                                                   \
3084       /* End of string2 => fail.  */                                    \
3085       if (dend == end_match_2)                                          \
3086         goto fail;                                                      \
3087       /* End of string1 => advance to string2.  */                      \
3088       d = string2;                                                      \
3089       dend = end_match_2;                                               \
3090     }
3091 
3092 
3093 /* Test if at very beginning or at very end of the virtual concatenation
3094    of `string1' and `string2'.  If only one string, it's `string2'.  */
3095 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
     /* [<][>][^][v][top][bottom][index][help] */
3096 #define AT_STRINGS_END(d) ((d) == end2) 
     /* [<][>][^][v][top][bottom][index][help] */
3097 
3098 
3099 /* Test if D points to a character which is word-constituent.  We have
3100    two special cases to check for: if past the end of string1, look at
3101    the first character in string2; and if before the beginning of
3102    string2, look at the last character in string1.  */
3103 #define WORDCHAR_P(d)                                                   \
     /* [<][>][^][v][top][bottom][index][help] */
3104   (SYNTAX ((d) == end1 ? *string2                                       \
3105            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3106    == Sword)
3107 
3108 /* Test if the character before D and the one at D differ with respect
3109    to being word-constituent.  */
3110 #define AT_WORD_BOUNDARY(d)                                             \
     /* [<][>][^][v][top][bottom][index][help] */
3111   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3112    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3113 
3114 
3115 /* Free everything we malloc.  */
3116 #ifdef REGEX_MALLOC
3117 #define FREE_VAR(var) if (var) free (var); var = NULL
     /* [<][>][^][v][top][bottom][index][help] */
3118 #define FREE_VARIABLES()                                                \
     /* [<][>][^][v][top][bottom][index][help] */
3119   do {                                                                  \
3120     FREE_VAR (fail_stack.stack);                                        \
3121     FREE_VAR (regstart);                                                \
3122     FREE_VAR (regend);                                                  \
3123     FREE_VAR (old_regstart);                                            \
3124     FREE_VAR (old_regend);                                              \
3125     FREE_VAR (best_regstart);                                           \
3126     FREE_VAR (best_regend);                                             \
3127     FREE_VAR (reg_info);                                                \
3128     FREE_VAR (reg_dummy);                                               \
3129     FREE_VAR (reg_info_dummy);                                          \
3130   } while (0)
3131 #else /* not REGEX_MALLOC */
3132 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3133 #define FREE_VARIABLES() alloca (0)
     /* [<][>][^][v][top][bottom][index][help] */
3134 #endif /* not REGEX_MALLOC */
3135 
3136 
3137 /* These values must meet several constraints.  They must not be valid
3138    register values; since we have a limit of 255 registers (because
3139    we use only one byte in the pattern for the register number), we can
3140    use numbers larger than 255.  They must differ by 1, because of
3141    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3142    be larger than the value for the highest register, so we do not try
3143    to actually save any registers when none are active.  */
3144 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3145 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3146 
3147 /* Matching routines.  */
3148 
3149 #ifndef emacs   /* Emacs never uses this.  */
3150 /* re_match is like re_match_2 except it takes only a single string.  */
3151 
3152 int
3153 re_match (bufp, string, size, pos, regs)
     /* [<][>][^][v][top][bottom][index][help] */
3154      struct re_pattern_buffer *bufp;
3155      const char *string;
3156      int size, pos;
3157      struct re_registers *regs;
3158  {
3159   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); 
3160 }
3161 #endif /* not emacs */
3162 
3163 
3164 /* re_match_2 matches the compiled pattern in BUFP against the
3165    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3166    and SIZE2, respectively).  We start matching at POS, and stop
3167    matching at STOP.
3168    
3169    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3170    store offsets for the substring each group matched in REGS.  See the
3171    documentation for exactly how many groups we fill.
3172 
3173    We return -1 if no match, -2 if an internal error (such as the
3174    failure stack overflowing).  Otherwise, we return the length of the
3175    matched substring.  */
3176 
3177 int
3178 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
     /* [<][>][^][v][top][bottom][index][help] */
3179      struct re_pattern_buffer *bufp;
3180      const char *string1, *string2;
3181      int size1, size2;
3182      int pos;
3183      struct re_registers *regs;
3184      int stop;
3185 {
3186   /* General temporaries.  */
3187   int mcnt;
3188   unsigned char *p1;
3189 
3190   /* Just past the end of the corresponding string.  */
3191   const char *end1, *end2;
3192 
3193   /* Pointers into string1 and string2, just past the last characters in
3194      each to consider matching.  */
3195   const char *end_match_1, *end_match_2;
3196 
3197   /* Where we are in the data, and the end of the current string.  */
3198   const char *d, *dend;
3199   
3200   /* Where we are in the pattern, and the end of the pattern.  */
3201   unsigned char *p = bufp->buffer;
3202   register unsigned char *pend = p + bufp->used;
3203 
3204   /* We use this to map every character in the string.  */
3205   char *translate = bufp->translate;
3206 
3207   /* Failure point stack.  Each place that can handle a failure further
3208      down the line pushes a failure point on this stack.  It consists of
3209      restart, regend, and reg_info for all registers corresponding to
3210      the subexpressions we're currently inside, plus the number of such
3211      registers, and, finally, two char *'s.  The first char * is where
3212      to resume scanning the pattern; the second one is where to resume
3213      scanning the strings.  If the latter is zero, the failure point is
3214      a ``dummy''; if a failure happens and the failure point is a dummy,
3215      it gets discarded and the next next one is tried.  */
3216   fail_stack_type fail_stack;
3217 #ifdef DEBUG
3218   static unsigned failure_id = 0;
3219   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3220 #endif
3221 
3222   /* We fill all the registers internally, independent of what we
3223      return, for use in backreferences.  The number here includes
3224      an element for register zero.  */
3225   unsigned num_regs = bufp->re_nsub + 1;
3226   
3227   /* The currently active registers.  */
3228   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3229   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3230 
3231   /* Information on the contents of registers. These are pointers into
3232      the input strings; they record just what was matched (on this
3233      attempt) by a subexpression part of the pattern, that is, the
3234      regnum-th regstart pointer points to where in the pattern we began
3235      matching and the regnum-th regend points to right after where we
3236      stopped matching the regnum-th subexpression.  (The zeroth register
3237      keeps track of what the whole pattern matches.)  */
3238   const char **regstart, **regend;
3239 
3240   /* If a group that's operated upon by a repetition operator fails to
3241      match anything, then the register for its start will need to be
3242      restored because it will have been set to wherever in the string we
3243      are when we last see its open-group operator.  Similarly for a
3244      register's end.  */
3245   const char **old_regstart, **old_regend;
3246 
3247   /* The is_active field of reg_info helps us keep track of which (possibly
3248      nested) subexpressions we are currently in. The matched_something
3249      field of reg_info[reg_num] helps us tell whether or not we have
3250      matched any of the pattern so far this time through the reg_num-th
3251      subexpression.  These two fields get reset each time through any
3252      loop their register is in.  */
3253   register_info_type *reg_info; 
3254 
3255   /* The following record the register info as found in the above
3256      variables when we find a match better than any we've seen before. 
3257      This happens as we backtrack through the failure points, which in
3258      turn happens only if we have not yet matched the entire string. */
3259   unsigned best_regs_set = false;
3260   const char **best_regstart, **best_regend;
3261   
3262   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3263      allocate space for that if we're not allocating space for anything
3264      else (see below).  Also, we never need info about register 0 for
3265      any of the other register vectors, and it seems rather a kludge to
3266      treat `best_regend' differently than the rest.  So we keep track of
3267      the end of the best match so far in a separate variable.  We
3268      initialize this to NULL so that when we backtrack the first time
3269      and need to test it, it's not garbage.  */
3270   const char *match_end = NULL;
3271 
3272   /* Used when we pop values we don't care about.  */
3273   const char **reg_dummy;
3274   register_info_type *reg_info_dummy;
3275 
3276 #ifdef DEBUG
3277   /* Counts the total number of registers pushed.  */
3278   unsigned num_regs_pushed = 0;         
3279 #endif
3280 
3281   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3282   
3283   INIT_FAIL_STACK ();
3284   
3285   /* Do not bother to initialize all the register variables if there are
3286      no groups in the pattern, as it takes a fair amount of time.  If
3287      there are groups, we include space for register 0 (the whole
3288      pattern), even though we never use it, since it simplifies the
3289      array indexing.  We should fix this.  */
3290   if (bufp->re_nsub)
3291     {
3292       regstart = REGEX_TALLOC (num_regs, const char *);
3293       regend = REGEX_TALLOC (num_regs, const char *);
3294       old_regstart = REGEX_TALLOC (num_regs, const char *);
3295       old_regend = REGEX_TALLOC (num_regs, const char *);
3296       best_regstart = REGEX_TALLOC (num_regs, const char *);
3297       best_regend = REGEX_TALLOC (num_regs, const char *);
3298       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3299       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3300       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3301 
3302       if (!(regstart && regend && old_regstart && old_regend && reg_info 
3303             && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 
3304         {
3305           FREE_VARIABLES ();
3306           return -2;
3307         }
3308     }
3309 #ifdef REGEX_MALLOC
3310   else
3311     {
3312       /* We must initialize all our variables to NULL, so that
3313          `FREE_VARIABLES' doesn't try to free them.  */
3314       regstart = regend = old_regstart = old_regend = best_regstart
3315         = best_regend = reg_dummy = NULL;
3316       reg_info = reg_info_dummy = (register_info_type *) NULL;
3317     }
3318 #endif /* REGEX_MALLOC */
3319 
3320   /* The starting position is bogus.  */
3321   if (pos < 0 || pos > size1 + size2)
3322     {
3323       FREE_VARIABLES ();
3324       return -1;
3325     }
3326     
3327   /* Initialize subexpression text positions to -1 to mark ones that no
3328      start_memory/stop_memory has been seen for. Also initialize the
3329      register information struct.  */
3330   for (mcnt = 1; mcnt < num_regs; mcnt++)
3331     {
3332       regstart[mcnt] = regend[mcnt] 
3333         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3334         
3335       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3336       IS_ACTIVE (reg_info[mcnt]) = 0;
3337       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3338       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3339     }
3340   
3341   /* We move `string1' into `string2' if the latter's empty -- but not if
3342      `string1' is null.  */
3343   if (size2 == 0 && string1 != NULL)
3344     {
3345       string2 = string1;
3346       size2 = size1;
3347       string1 = 0;
3348       size1 = 0;
3349     }
3350   end1 = string1 + size1;
3351   end2 = string2 + size2;
3352 
3353   /* Compute where to stop matching, within the two strings.  */
3354   if (stop <= size1)
3355     {
3356       end_match_1 = string1 + stop;
3357       end_match_2 = string2;
3358     }
3359   else
3360     {
3361       end_match_1 = end1;
3362       end_match_2 = string2 + stop - size1;
3363     }
3364 
3365   /* `p' scans through the pattern as `d' scans through the data. 
3366      `dend' is the end of the input string that `d' points within.  `d'
3367      is advanced into the following input string whenever necessary, but
3368      this happens before fetching; therefore, at the beginning of the
3369      loop, `d' can be pointing at the end of a string, but it cannot
3370      equal `string2'.  */
3371   if (size1 > 0 && pos <= size1)
3372     {
3373       d = string1 + pos;
3374       dend = end_match_1;
3375     }
3376   else
3377     {
3378       d = string2 + pos - size1;
3379       dend = end_match_2;
3380     }
3381 
3382   DEBUG_PRINT1 ("The compiled pattern is: ");
3383   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3384   DEBUG_PRINT1 ("The string to match is: `");
3385   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3386   DEBUG_PRINT1 ("'\n");
3387   
3388   /* This loops over pattern commands.  It exits by returning from the
3389      function if the match is complete, or it drops through if the match
3390      fails at this starting point in the input data.  */
3391   for (;;)
3392     {
3393       DEBUG_PRINT2 ("\n0x%x: ", p);
3394 
3395       if (p == pend)
3396         { /* End of pattern means we might have succeeded.  */
3397           DEBUG_PRINT1 ("end of pattern ... ");
3398           
3399           /* If we haven't matched the entire string, and we want the
3400              longest match, try backtracking.  */
3401           if (d != end_match_2)
3402             {
3403               DEBUG_PRINT1 ("backtracking.\n");
3404               
3405               if (!FAIL_STACK_EMPTY ())
3406                 { /* More failure points to try.  */
3407                   boolean same_str_p = (FIRST_STRING_P (match_end) 
3408                                         == MATCHING_IN_FIRST_STRING);
3409 
3410                   /* If exceeds best match so far, save it.  */
3411                   if (!best_regs_set
3412                       || (same_str_p && d > match_end)
3413                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3414                     {
3415                       best_regs_set = true;
3416                       match_end = d;
3417                       
3418                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3419                       
3420                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3421                         {
3422                           best_regstart[mcnt] = regstart[mcnt];
3423                           best_regend[mcnt] = regend[mcnt];
3424                         }
3425                     }
3426                   goto fail;           
3427                 }
3428 
3429               /* If no failure points, don't restore garbage.  */
3430               else if (best_regs_set)   
3431                 {
3432                 restore_best_regs:
3433                   /* Restore best match.  It may happen that `dend ==
3434                      end_match_1' while the restored d is in string2.
3435                      For example, the pattern `x.*y.*z' against the
3436                      strings `x-' and `y-z-', if the two strings are
3437                      not consecutive in memory.  */
3438                   DEBUG_PRINT1 ("Restoring best registers.\n");
3439                   
3440                   d = match_end;
3441                   dend = ((d >= string1 && d <= end1)
3442                            ? end_match_1 : end_match_2);
3443 
3444                   for (mcnt = 1; mcnt < num_regs; mcnt++)
3445                     {
3446                       regstart[mcnt] = best_regstart[mcnt];
3447                       regend[mcnt] = best_regend[mcnt];
3448                     }
3449                 }
3450             } /* d != end_match_2 */
3451 
3452           DEBUG_PRINT1 ("Accepting match.\n");
3453 
3454           /* If caller wants register contents data back, do it.  */
3455           if (regs && !bufp->no_sub)
3456             {
3457               /* Have the register data arrays been allocated?  */
3458               if (bufp->regs_allocated == REGS_UNALLOCATED)
3459                 { /* No.  So allocate them with malloc.  We need one
3460                      extra element beyond `num_regs' for the `-1' marker
3461                      GNU code uses.  */
3462                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3463                   regs->start = TALLOC (regs->num_regs, regoff_t);
3464                   regs->end = TALLOC (regs->num_regs, regoff_t);
3465                   if (regs->start == NULL || regs->end == NULL)
3466                     return -2;
3467                   bufp->regs_allocated = REGS_REALLOCATE;
3468                 }
3469               else if (bufp->regs_allocated == REGS_REALLOCATE)
3470                 { /* Yes.  If we need more elements than were already
3471                      allocated, reallocate them.  If we need fewer, just
3472                      leave it alone.  */
3473                   if (regs->num_regs < num_regs + 1)
3474                     {
3475                       regs->num_regs = num_regs + 1;
3476                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3477                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3478                       if (regs->start == NULL || regs->end == NULL)
3479                         return -2;
3480                     }
3481                 }
3482               else
3483                 assert (bufp->regs_allocated == REGS_FIXED);
3484 
3485               /* Convert the pointer data in `regstart' and `regend' to
3486                  indices.  Register zero has to be set differently,
3487                  since we haven't kept track of any info for it.  */
3488               if (regs->num_regs > 0)
3489                 {
3490                   regs->start[0] = pos;
3491                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3492                                   : d - string2 + size1);
3493                 }
3494               
3495               /* Go through the first `min (num_regs, regs->num_regs)'
3496                  registers, since that is all we initialized.  */
3497               for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3498                 {
3499                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3500                     regs->start[mcnt] = regs->end[mcnt] = -1;
3501                   else
3502                     {
3503                       regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3504                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3505                     }
3506                 }
3507               
3508               /* If the regs structure we return has more elements than
3509                  were in the pattern, set the extra elements to -1.  If
3510                  we (re)allocated the registers, this is the case,
3511                  because we always allocate enough to have at least one
3512                  -1 at the end.  */
3513               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3514                 regs->start[mcnt] = regs->end[mcnt] = -1;
3515             } /* regs && !bufp->no_sub */
3516 
3517           FREE_VARIABLES ();
3518           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3519                         nfailure_points_pushed, nfailure_points_popped,
3520                         nfailure_points_pushed - nfailure_points_popped);
3521           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3522 
3523           mcnt = d - pos - (MATCHING_IN_FIRST_STRING 
3524                             ? string1 
3525                             : string2 - size1);
3526 
3527           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3528 
3529           return mcnt;
3530         }
3531 
3532       /* Otherwise match next pattern command.  */
3533 #ifdef SWITCH_ENUM_BUG
3534       switch ((int) ((re_opcode_t) *p++))
3535 #else
3536       switch ((re_opcode_t) *p++)
3537 #endif
3538         {
3539         /* Ignore these.  Used to ignore the n of succeed_n's which
3540            currently have n == 0.  */
3541         case no_op:
3542           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3543           break;
3544 
3545 
3546         /* Match the next n pattern characters exactly.  The following
3547            byte in the pattern defines n, and the n bytes after that
3548            are the characters to match.  */
3549         case exactn:
3550           mcnt = *p++;
3551           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3552 
3553           /* This is written out as an if-else so we don't waste time
3554              testing `translate' inside the loop.  */
3555           if (translate)
3556             {
3557               do
3558                 {
3559                   PREFETCH ();
3560                   if (translate[(unsigned char) *d++] != (char) *p++)
3561                     goto fail;
3562                 }
3563               while (--mcnt);
3564             }
3565           else
3566             {
3567               do
3568                 {
3569                   PREFETCH ();
3570                   if (*d++ != (char) *p++) goto fail;
3571                 }
3572               while (--mcnt);
3573             }
3574           SET_REGS_MATCHED ();
3575           break;
3576 
3577 
3578         /* Match any character except possibly a newline or a null.  */
3579         case anychar:
3580           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3581 
3582           PREFETCH ();
3583 
3584           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3585               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3586             goto fail;
3587 
3588           SET_REGS_MATCHED ();
3589           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3590           d++;
3591           break;
3592 
3593 
3594         case charset:
3595         case charset_not:
3596           {
3597             register unsigned char c;
3598             boolean not = (re_opcode_t) *(p - 1) == charset_not;
3599 
3600             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3601 
3602             PREFETCH ();
3603             c = TRANSLATE (*d); /* The character to match.  */
3604 
3605             /* Cast to `unsigned' instead of `unsigned char' in case the
3606                bit list is a full 32 bytes long.  */
3607             if (c < (unsigned) (*p * BYTEWIDTH)
3608                 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3609               not = !not;
3610 
3611             p += 1 + *p;
3612 
3613             if (!not) goto fail;
3614             
3615             SET_REGS_MATCHED ();
3616             d++;
3617             break;
3618           }
3619 
3620 
3621         /* The beginning of a group is represented by start_memory.
3622            The arguments are the register number in the next byte, and the
3623            number of groups inner to this one in the next.  The text
3624            matched within the group is recorded (in the internal
3625            registers data structure) under the register number.  */
3626         case start_memory:
3627           DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3628 
3629           /* Find out if this group can match the empty string.  */
3630           p1 = p;               /* To send to group_match_null_string_p.  */
3631           
3632           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3633             REG_MATCH_NULL_STRING_P (reg_info[*p]) 
3634               = group_match_null_string_p (&p1, pend, reg_info);
3635 
3636           /* Save the position in the string where we were the last time
3637              we were at this open-group operator in case the group is
3638              operated upon by a repetition operator, e.g., with `(a*)*b'
3639              against `ab'; then we want to ignore where we are now in
3640              the string in case this attempt to match fails.  */
3641           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3642                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3643                              : regstart[*p];
3644           DEBUG_PRINT2 ("  old_regstart: %d\n", 
3645                          POINTER_TO_OFFSET (old_regstart[*p]));
3646 
3647           regstart[*p] = d;
3648           DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3649 
3650           IS_ACTIVE (reg_info[*p]) = 1;
3651           MATCHED_SOMETHING (reg_info[*p]) = 0;
3652           
3653           /* This is the new highest active register.  */
3654           highest_active_reg = *p;
3655           
3656           /* If nothing was active before, this is the new lowest active
3657              register.  */
3658           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3659             lowest_active_reg = *p;
3660 
3661           /* Move past the register number and inner group count.  */
3662           p += 2;
3663           break;
3664 
3665 
3666         /* The stop_memory opcode represents the end of a group.  Its
3667            arguments are the same as start_memory's: the register
3668            number, and the number of inner groups.  */
3669         case stop_memory:
3670           DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3671              
3672           /* We need to save the string position the last time we were at
3673              this close-group operator in case the group is operated
3674              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3675              against `aba'; then we want to ignore where we are now in
3676              the string in case this attempt to match fails.  */
3677           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3678                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3679                            : regend[*p];
3680           DEBUG_PRINT2 ("      old_regend: %d\n", 
3681                          POINTER_TO_OFFSET (old_regend[*p]));
3682 
3683           regend[*p] = d;
3684           DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3685 
3686           /* This register isn't active anymore.  */
3687           IS_ACTIVE (reg_info[*p]) = 0;
3688           
3689           /* If this was the only register active, nothing is active
3690              anymore.  */
3691           if (lowest_active_reg == highest_active_reg)
3692             {
3693               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3694               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3695             }
3696           else
3697             { /* We must scan for the new highest active register, since
3698                  it isn't necessarily one less than now: consider
3699                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3700                  new highest active register is 1.  */
3701               unsigned char r = *p - 1;
3702               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3703                 r--;
3704               
3705               /* If we end up at register zero, that means that we saved
3706                  the registers as the result of an `on_failure_jump', not
3707                  a `start_memory', and we jumped to past the innermost
3708                  `stop_memory'.  For example, in ((.)*) we save
3709                  registers 1 and 2 as a result of the *, but when we pop
3710                  back to the second ), we are at the stop_memory 1.
3711                  Thus, nothing is active.  */
3712               if (r == 0)
3713                 {
3714                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3715                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3716                 }
3717               else
3718                 highest_active_reg = r;
3719             }
3720           
3721           /* If just failed to match something this time around with a
3722              group that's operated on by a repetition operator, try to
3723              force exit from the ``loop'', and restore the register
3724              information for this group that we had before trying this
3725              last match.  */
3726           if ((!MATCHED_SOMETHING (reg_info[*p])
3727                || (re_opcode_t) p[-3] == start_memory)
3728               && (p + 2) < pend)              
3729             {
3730               boolean is_a_jump_n = false;
3731               
3732               p1 = p + 2;
3733               mcnt = 0;
3734               switch ((re_opcode_t) *p1++)
3735                 {
3736                   case jump_n:
3737                     is_a_jump_n = true;
3738                   case pop_failure_jump:
3739                   case maybe_pop_jump:
3740                   case jump:
3741                   case dummy_failure_jump:
3742                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3743                     if (is_a_jump_n)
3744                       p1 += 2;
3745                     break;
3746                   
3747                   default:
3748                     /* do nothing */ ;
3749                 }
3750               p1 += mcnt;
3751         
3752               /* If the next operation is a jump backwards in the pattern
3753                  to an on_failure_jump right before the start_memory
3754                  corresponding to this stop_memory, exit from the loop
3755                  by forcing a failure after pushing on the stack the
3756                  on_failure_jump's jump in the pattern, and d.  */
3757               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3758                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3759                 {
3760                   /* If this group ever matched anything, then restore
3761                      what its registers were before trying this last
3762                      failed match, e.g., with `(a*)*b' against `ab' for
3763                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3764                      against `aba' for regend[3].
3765                      
3766                      Also restore the registers for inner groups for,
3767                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3768                      otherwise get trashed).  */
3769                      
3770                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3771                     {
3772                       unsigned r; 
3773         
3774                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3775                       
3776                       /* Restore this and inner groups' (if any) registers.  */
3777                       for (r = *p; r < *p + *(p + 1); r++)
3778                         {
3779                           regstart[r] = old_regstart[r];
3780 
3781                           /* xx why this test?  */
3782                           if ((int) old_regend[r] >= (int) regstart[r])
3783                             regend[r] = old_regend[r];
3784                         }     
3785                     }
3786                   p1++;
3787                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3788                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3789 
3790                   goto fail;
3791                 }
3792             }
3793           
3794           /* Move past the register number and the inner group count.  */
3795           p += 2;
3796           break;
3797 
3798 
3799         /* \<digit> has been turned into a `duplicate' command which is
3800            followed by the numeric value of <digit> as the register number.  */
3801         case duplicate:
3802           {
3803             register const char *d2, *dend2;
3804             int regno = *p++;   /* Get which register to match against.  */
3805             DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3806 
3807             /* Can't back reference a group which we've never matched.  */
3808             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3809               goto fail;
3810               
3811             /* Where in input to try to start matching.  */
3812             d2 = regstart[regno];
3813             
3814             /* Where to stop matching; if both the place to start and
3815                the place to stop matching are in the same string, then
3816                set to the place to stop, otherwise, for now have to use
3817                the end of the first string.  */
3818 
3819             dend2 = ((FIRST_STRING_P (regstart[regno]) 
3820                       == FIRST_STRING_P (regend[regno]))
3821                      ? regend[regno] : end_match_1);
3822             for (;;)
3823               {
3824                 /* If necessary, advance to next segment in register
3825                    contents.  */
3826                 while (d2 == dend2)
3827                   {
3828                     if (dend2 == end_match_2) break;
3829                     if (dend2 == regend[regno]) break;
3830 
3831                     /* End of string1 => advance to string2. */
3832                     d2 = string2;
3833                     dend2 = regend[regno];
3834                   }
3835                 /* At end of register contents => success */
3836                 if (d2 == dend2) break;
3837 
3838                 /* If necessary, advance to next segment in data.  */
3839                 PREFETCH ();
3840 
3841                 /* How many characters left in this segment to match.  */
3842                 mcnt = dend - d;
3843                 
3844                 /* Want how many consecutive characters we can match in
3845                    one shot, so, if necessary, adjust the count.  */
3846                 if (mcnt > dend2 - d2)
3847                   mcnt = dend2 - d2;
3848                   
3849                 /* Compare that many; failure if mismatch, else move
3850                    past them.  */
3851                 if (translate 
3852                     ? bcmp_translate (d, d2, mcnt, translate) 
3853                     : bcmp (d, d2, mcnt))
3854                   goto fail;
3855                 d += mcnt, d2 += mcnt;
3856               }
3857           }
3858           break;
3859 
3860 
3861         /* begline matches the empty string at the beginning of the string
3862            (unless `not_bol' is set in `bufp'), and, if
3863            `newline_anchor' is set, after newlines.  */
3864         case begline:
3865           DEBUG_PRINT1 ("EXECUTING begline.\n");
3866           
3867           if (AT_STRINGS_BEG (d))
3868             {
3869               if (!bufp->not_bol) break;
3870             }
3871           else if (d[-1] == '\n' && bufp->newline_anchor)
3872             {
3873               break;
3874             }
3875           /* In all other cases, we fail.  */
3876           goto fail;
3877 
3878 
3879         /* endline is the dual of begline.  */
3880         case endline:
3881           DEBUG_PRINT1 ("EXECUTING endline.\n");
3882 
3883           if (AT_STRINGS_END (d))
3884             {
3885               if (!bufp->not_eol) break;
3886             }
3887           
3888           /* We have to ``prefetch'' the next character.  */
3889           else if ((d == end1 ? *string2 : *d) == '\n'
3890                    && bufp->newline_anchor)
3891             {
3892               break;
3893             }
3894           goto fail;
3895 
3896 
3897         /* Match at the very beginning of the data.  */
3898         case begbuf:
3899           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3900           if (AT_STRINGS_BEG (d))
3901             break;
3902           goto fail;
3903 
3904 
3905         /* Match at the very end of the data.  */
3906         case endbuf:
3907           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3908           if (AT_STRINGS_END (d))
3909             break;
3910           goto fail;
3911 
3912 
3913         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3914            pushes NULL as the value for the string on the stack.  Then
3915            `pop_failure_point' will keep the current value for the
3916            string, instead of restoring it.  To see why, consider
3917            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3918            then the . fails against the \n.  But the next thing we want
3919            to do is match the \n against the \n; if we restored the
3920            string value, we would be back at the foo.
3921            
3922            Because this is used only in specific cases, we don't need to
3923            check all the things that `on_failure_jump' does, to make
3924            sure the right things get saved on the stack.  Hence we don't
3925            share its code.  The only reason to push anything on the
3926            stack at all is that otherwise we would have to change
3927            `anychar's code to do something besides goto fail in this
3928            case; that seems worse than this.  */
3929         case on_failure_keep_string_jump:
3930           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3931           
3932           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3933           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3934 
3935           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3936           break;
3937 
3938 
3939         /* Uses of on_failure_jump:
3940         
3941            Each alternative starts with an on_failure_jump that points
3942            to the beginning of the next alternative.  Each alternative
3943            except the last ends with a jump that in effect jumps past
3944            the rest of the alternatives.  (They really jump to the
3945            ending jump of the following alternative, because tensioning
3946            these jumps is a hassle.)
3947 
3948            Repeats start with an on_failure_jump that points past both
3949            the repetition text and either the following jump or
3950            pop_failure_jump back to this on_failure_jump.  */
3951         case on_failure_jump:
3952         on_failure:
3953           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3954 
3955           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3956           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3957 
3958           /* If this on_failure_jump comes right before a group (i.e.,
3959              the original * applied to a group), save the information
3960              for that group and all inner ones, so that if we fail back
3961              to this point, the group's information will be correct.
3962              For example, in \(a*\)*\1, we need the preceding group,
3963              and in \(\(a*\)b*\)\2, we need the inner group.  */
3964 
3965           /* We can't use `p' to check ahead because we push
3966              a failure point to `p + mcnt' after we do this.  */
3967           p1 = p;
3968 
3969           /* We need to skip no_op's before we look for the
3970              start_memory in case this on_failure_jump is happening as
3971              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3972              against aba.  */
3973           while (p1 < pend && (re_opcode_t) *p1 == no_op)
3974             p1++;
3975 
3976           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3977             {
3978               /* We have a new highest active register now.  This will
3979                  get reset at the start_memory we are about to get to,
3980                  but we will have saved all the registers relevant to
3981                  this repetition op, as described above.  */
3982               highest_active_reg = *(p1 + 1) + *(p1 + 2);
3983               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3984                 lowest_active_reg = *(p1 + 1);
3985             }
3986 
3987           DEBUG_PRINT1 (":\n");
3988           PUSH_FAILURE_POINT (p + mcnt, d, -2);
3989           break;
3990 
3991 
3992         /* A smart repeat ends with `maybe_pop_jump'.
3993            We change it to either `pop_failure_jump' or `jump'.  */
3994         case maybe_pop_jump:
3995           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3996           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3997           {
3998             register unsigned char *p2 = p;
3999 
4000             /* Compare the beginning of the repeat with what in the
4001                pattern follows its end. If we can establish that there
4002                is nothing that they would both match, i.e., that we
4003                would have to backtrack because of (as in, e.g., `a*a')
4004                then we can change to pop_failure_jump, because we'll
4005                never have to backtrack.
4006                
4007                This is not true in the case of alternatives: in
4008                `(a|ab)*' we do need to backtrack to the `ab' alternative
4009                (e.g., if the string was `ab').  But instead of trying to
4010                detect that here, the alternative has put on a dummy
4011                failure point which is what we will end up popping.  */
4012 
4013             /* Skip over open/close-group commands.  */
4014             while (p2 + 2 < pend
4015                    && ((re_opcode_t) *p2 == stop_memory
4016                        || (re_opcode_t) *p2 == start_memory))
4017               p2 += 3;                  /* Skip over args, too.  */
4018 
4019             /* If we're at the end of the pattern, we can change.  */
4020             if (p2 == pend)
4021               {
4022                 /* Consider what happens when matching ":\(.*\)"
4023                    against ":/".  I don't really understand this code
4024                    yet.  */
4025                 p[-3] = (unsigned char) pop_failure_jump;
4026                 DEBUG_PRINT1
4027                   ("  End of pattern: change to `pop_failure_jump'.\n");
4028               }
4029 
4030             else if ((re_opcode_t) *p2 == exactn
4031                      || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4032               {
4033                 register unsigned char c
4034                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4035                 p1 = p + mcnt;
4036 
4037                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4038                    to the `maybe_finalize_jump' of this case.  Examine what 
4039                    follows.  */
4040                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4041                   {
4042                     p[-3] = (unsigned char) pop_failure_jump;
4043                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4044                                   c, p1[5]);
4045                   }
4046                   
4047                 else if ((re_opcode_t) p1[3] == charset
4048                          || (re_opcode_t) p1[3] == charset_not)
4049                   {
4050                     int not = (re_opcode_t) p1[3] == charset_not;
4051                     
4052                     if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4053                         && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4054                       not = !not;
4055 
4056                     /* `not' is equal to 1 if c would match, which means
4057                         that we can't change to pop_failure_jump.  */
4058                     if (!not)
4059                       {
4060                         p[-3] = (unsigned char) pop_failure_jump;
4061                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4062                       }
4063                   }
4064               }
4065           }
4066           p -= 2;               /* Point at relative address again.  */
4067           if ((re_opcode_t) p[-1] != pop_failure_jump)
4068             {
4069               p[-1] = (unsigned char) jump;
4070               DEBUG_PRINT1 ("  Match => jump.\n");
4071               goto unconditional_jump;
4072             }
4073         /* Note fall through.  */
4074 
4075 
4076         /* The end of a simple repeat has a pop_failure_jump back to
4077            its matching on_failure_jump, where the latter will push a
4078            failure point.  The pop_failure_jump takes off failure
4079            points put on by this pop_failure_jump's matching
4080            on_failure_jump; we got through the pattern to here from the
4081            matching on_failure_jump, so didn't fail.  */
4082         case pop_failure_jump:
4083           {
4084             /* We need to pass separate storage for the lowest and
4085                highest registers, even though we don't care about the
4086                actual values.  Otherwise, we will restore only one
4087                register from the stack, since lowest will == highest in
4088                `pop_failure_point'.  */
4089             unsigned dummy_low_reg, dummy_high_reg;
4090             unsigned char *pdummy;
4091             const char *sdummy;
4092 
4093             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4094             POP_FAILURE_POINT (sdummy, pdummy,
4095                                dummy_low_reg, dummy_high_reg,
4096                                reg_dummy, reg_dummy, reg_info_dummy);
4097           }
4098           /* Note fall through.  */
4099 
4100           
4101         /* Unconditionally jump (without popping any failure points).  */
4102         case jump:
4103         unconditional_jump:
4104           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
4105           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4106           p += mcnt;                            /* Do the jump.  */
4107           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4108           break;
4109 
4110         
4111         /* We need this opcode so we can detect where alternatives end
4112            in `group_match_null_string_p' et al.  */
4113         case jump_past_alt:
4114           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4115           goto unconditional_jump;
4116 
4117 
4118         /* Normally, the on_failure_jump pushes a failure point, which
4119            then gets popped at pop_failure_jump.  We will end up at
4120            pop_failure_jump, also, and with a pattern of, say, `a+', we
4121            are skipping over the on_failure_jump, so we have to push
4122            something meaningless for pop_failure_jump to pop.  */
4123         case dummy_failure_jump:
4124           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4125           /* It doesn't matter what we push for the string here.  What
4126              the code at `fail' tests is the value for the pattern.  */
4127           PUSH_FAILURE_POINT (0, 0, -2);
4128           goto unconditional_jump;
4129 
4130 
4131         /* At the end of an alternative, we need to push a dummy failure
4132            point in case we are followed by a `pop_failure_jump', because
4133            we don't want the failure point for the alternative to be
4134            popped.  For example, matching `(a|ab)*' against `aab'
4135            requires that we match the `ab' alternative.  */
4136         case push_dummy_failure:
4137           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4138           /* See comments just above at `dummy_failure_jump' about the
4139              two zeroes.  */
4140           PUSH_FAILURE_POINT (0, 0, -2);
4141           break;
4142 
4143         /* Have to succeed matching what follows at least n times.
4144            After that, handle like `on_failure_jump'.  */
4145         case succeed_n: 
4146           EXTRACT_NUMBER (mcnt, p + 2);
4147           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4148 
4149           assert (mcnt >= 0);
4150           /* Originally, this is how many times we HAVE to succeed.  */
4151           if (mcnt > 0)
4152             {
4153                mcnt--;
4154                p += 2;
4155                STORE_NUMBER_AND_INCR (p, mcnt);
4156                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4157             }
4158           else if (mcnt == 0)
4159             {
4160               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4161               p[2] = (unsigned char) no_op;
4162               p[3] = (unsigned char) no_op;
4163               goto on_failure;
4164             }
4165           break;
4166         
4167         case jump_n: 
4168           EXTRACT_NUMBER (mcnt, p + 2);
4169           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4170 
4171           /* Originally, this is how many times we CAN jump.  */
4172           if (mcnt)
4173             {
4174                mcnt--;
4175                STORE_NUMBER (p + 2, mcnt);
4176                goto unconditional_jump;      
4177             }
4178           /* If don't have to jump any more, skip over the rest of command.  */
4179           else      
4180             p += 4;                  
4181           break;
4182         
4183         case set_number_at:
4184           {
4185             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4186 
4187             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4188             p1 = p + mcnt;
4189             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4190             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4191             STORE_NUMBER (p1, mcnt);
4192             break;
4193           }
4194 
4195         case wordbound:
4196           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4197           if (AT_WORD_BOUNDARY (d))
4198             break;
4199           goto fail;
4200 
4201         case notwordbound:
4202           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4203           if (AT_WORD_BOUNDARY (d))
4204             goto fail;
4205           break;
4206 
4207         case wordbeg:
4208           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4209           if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4210             break;
4211           goto fail;
4212 
4213         case wordend:
4214           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4215           if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4216               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4217             break;
4218           goto fail;
4219 
4220 #ifdef emacs
4221 #ifdef emacs19
4222         case before_dot:
4223           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4224           if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4225             goto fail;
4226           break;
4227   
4228         case at_dot:
4229           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4230           if (PTR_CHAR_POS ((unsigned char *) d) != point)
4231             goto fail;
4232           break;
4233   
4234         case after_dot:
4235           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4236           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4237             goto fail;
4238           break;
4239 #else /* not emacs19 */
4240         case at_dot:
4241           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4242           if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4243             goto fail;
4244           break;
4245 #endif /* not emacs19 */
4246 
4247         case syntaxspec:
4248           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4249           mcnt = *p++;
4250           goto matchsyntax;
4251 
4252         case wordchar:
4253           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4254           mcnt = (int) Sword;
4255         matchsyntax:
4256           PREFETCH ();
4257           if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4258             goto fail;
4259           SET_REGS_MATCHED ();
4260           break;
4261 
4262         case notsyntaxspec:
4263           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4264           mcnt = *p++;
4265           goto matchnotsyntax;
4266 
4267         case notwordchar:
4268           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4269           mcnt = (int) Sword;
4270         matchnotsyntax:
4271           PREFETCH ();
4272           if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4273             goto fail;
4274           SET_REGS_MATCHED ();
4275           break;
4276 
4277 #else /* not emacs */
4278         case wordchar:
4279           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4280           PREFETCH ();
4281           if (!WORDCHAR_P (d))
4282             goto fail;
4283           SET_REGS_MATCHED ();
4284           d++;
4285           break;
4286           
4287         case notwordchar:
4288           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4289           PREFETCH ();
4290           if (WORDCHAR_P (d))
4291             goto fail;
4292           SET_REGS_MATCHED ();
4293           d++;
4294           break;
4295 #endif /* not emacs */
4296           
4297         default:
4298           abort ();
4299         }
4300       continue;  /* Successfully executed one pattern command; keep going.  */
4301 
4302 
4303     /* We goto here if a matching operation fails. */
4304     fail:
4305       if (!FAIL_STACK_EMPTY ())
4306         { /* A restart point is known.  Restore to that state.  */
4307           DEBUG_PRINT1 ("\nFAIL:\n");
4308           POP_FAILURE_POINT (d, p,
4309                              lowest_active_reg, highest_active_reg,
4310                              regstart, regend, reg_info);
4311 
4312           /* If this failure point is a dummy, try the next one.  */
4313           if (!p)
4314             goto fail;
4315 
4316           /* If we failed to the end of the pattern, don't examine *p.  */
4317           assert (p <= pend);
4318           if (p < pend)
4319             {
4320               boolean is_a_jump_n = false;
4321               
4322               /* If failed to a backwards jump that's part of a repetition
4323                  loop, need to pop this failure point and use the next one.  */
4324               switch ((re_opcode_t) *p)
4325                 {
4326                 case jump_n:
4327                   is_a_jump_n = true;
4328                 case maybe_pop_jump:
4329                 case pop_failure_jump:
4330                 case jump:
4331                   p1 = p + 1;
4332                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4333                   p1 += mcnt;   
4334 
4335                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4336                       || (!is_a_jump_n
4337                           && (re_opcode_t) *p1 == on_failure_jump))
4338                     goto fail;
4339                   break;
4340                 default:
4341                   /* do nothing */ ;
4342                 }
4343             }
4344 
4345           if (d >= string1 && d <= end1)
4346             dend = end_match_1;
4347         }
4348       else
4349         break;   /* Matching at this starting point really fails.  */
4350     } /* for (;;) */
4351 
4352   if (best_regs_set)
4353     goto restore_best_regs;
4354 
4355   FREE_VARIABLES ();
4356 
4357   return -1;                            /* Failure to match.  */
4358 } /* re_match_2 */
4359 
4360 /* Subroutine definitions for re_match_2.  */
4361 
4362 
4363 /* We are passed P pointing to a register number after a start_memory.
4364    
4365    Return true if the pattern up to the corresponding stop_memory can
4366    match the empty string, and false otherwise.
4367    
4368    If we find the matching stop_memory, sets P to point to one past its number.
4369    Otherwise, sets P to an undefined byte less than or equal to END.
4370 
4371    We don't handle duplicates properly (yet).  */
4372 
4373 static boolean
4374 group_match_null_string_p (p, end, reg_info)
     /* [<][>][^][v][top][bottom][index][help] */
4375     unsigned char **p, *end;
4376     register_info_type *reg_info;
4377 {
4378   int mcnt;
4379   /* Point to after the args to the start_memory.  */
4380   unsigned char *p1 = *p + 2;
4381   
4382   while (p1 < end)
4383     {
4384       /* Skip over opcodes that can match nothing, and return true or
4385          false, as appropriate, when we get to one that can't, or to the
4386          matching stop_memory.  */
4387       
4388       switch ((re_opcode_t) *p1)
4389         {
4390         /* Could be either a loop or a series of alternatives.  */
4391         case on_failure_jump:
4392           p1++;
4393           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4394           
4395           /* If the next operation is not a jump backwards in the
4396              pattern.  */
4397 
4398           if (mcnt >= 0)
4399             {
4400               /* Go through the on_failure_jumps of the alternatives,
4401                  seeing if any of the alternatives cannot match nothing.
4402                  The last alternative starts with only a jump,
4403                  whereas the rest start with on_failure_jump and end
4404                  with a jump, e.g., here is the pattern for `a|b|c':
4405 
4406                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4407                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4408                  /exactn/1/c                                            
4409 
4410                  So, we have to first go through the first (n-1)
4411                  alternatives and then deal with the last one separately.  */
4412 
4413 
4414               /* Deal with the first (n-1) alternatives, which start
4415                  with an on_failure_jump (see above) that jumps to right
4416                  past a jump_past_alt.  */
4417 
4418               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4419                 {
4420                   /* `mcnt' holds how many bytes long the alternative
4421                      is, including the ending `jump_past_alt' and
4422                      its number.  */
4423 
4424                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 
4425                                                       reg_info))
4426                     return false;
4427 
4428                   /* Move to right after this alternative, including the
4429                      jump_past_alt.  */
4430                   p1 += mcnt;   
4431 
4432                   /* Break if it's the beginning of an n-th alternative
4433                      that doesn't begin with an on_failure_jump.  */
4434                   if ((re_opcode_t) *p1 != on_failure_jump)
4435                     break;
4436                 
4437                   /* Still have to check that it's not an n-th
4438                      alternative that starts with an on_failure_jump.  */
4439                   p1++;
4440                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4441                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4442                     {
4443                       /* Get to the beginning of the n-th alternative.  */
4444                       p1 -= 3;
4445                       break;
4446                     }
4447                 }
4448 
4449               /* Deal with the last alternative: go back and get number
4450                  of the `jump_past_alt' just before it.  `mcnt' contains
4451                  the length of the alternative.  */
4452               EXTRACT_NUMBER (mcnt, p1 - 2);
4453 
4454               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4455                 return false;
4456 
4457               p1 += mcnt;       /* Get past the n-th alternative.  */
4458             } /* if mcnt > 0 */
4459           break;
4460 
4461           
4462         case stop_memory:
4463           assert (p1[1] == **p);
4464           *p = p1 + 2;
4465           return true;
4466 
4467         
4468         default: 
4469           if (!common_op_match_null_string_p (&p1, end, reg_info))
4470             return false;
4471         }
4472     } /* while p1 < end */
4473 
4474   return false;
4475 } /* group_match_null_string_p */
4476 
4477 
4478 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4479    It expects P to be the first byte of a single alternative and END one
4480    byte past the last. The alternative can contain groups.  */
4481    
4482 static boolean
4483 alt_match_null_string_p (p, end, reg_info)
     /* [<][>][^][v][top][bottom][index][help] */
4484     unsigned char *p, *end;
4485     register_info_type *reg_info;
4486 {
4487   int mcnt;
4488   unsigned char *p1 = p;
4489   
4490   while (p1 < end)
4491     {
4492       /* Skip over opcodes that can match nothing, and break when we get 
4493          to one that can't.  */
4494       
4495       switch ((re_opcode_t) *p1)
4496         {
4497         /* It's a loop.  */
4498         case on_failure_jump:
4499           p1++;
4500           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4501           p1 += mcnt;
4502           break;
4503           
4504         default: 
4505           if (!common_op_match_null_string_p (&p1, end, reg_info))
4506             return false;
4507         }
4508     }  /* while p1 < end */
4509 
4510   return true;
4511 } /* alt_match_null_string_p */
4512 
4513 
4514 /* Deals with the ops common to group_match_null_string_p and
4515    alt_match_null_string_p.  
4516    
4517    Sets P to one after the op and its arguments, if any.  */
4518 
4519 static boolean
4520 common_op_match_null_string_p (p, end, reg_info)
     /* [<][>][^][v][top][bottom][index][help] */
4521     unsigned char **p, *end;
4522     register_info_type *reg_info;
4523 {
4524   int mcnt;
4525   boolean ret;
4526   int reg_no;
4527   unsigned char *p1 = *p;
4528 
4529   switch ((re_opcode_t) *p1++)
4530     {
4531     case no_op:
4532     case begline:
4533     case endline:
4534     case begbuf:
4535     case endbuf:
4536     case wordbeg:
4537     case wordend:
4538     case wordbound:
4539     case notwordbound:
4540 #ifdef emacs
4541     case before_dot:
4542     case at_dot:
4543     case after_dot:
4544 #endif
4545       break;
4546 
4547     case start_memory:
4548       reg_no = *p1;
4549       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4550       ret = group_match_null_string_p (&p1, end, reg_info);
4551       
4552       /* Have to set this here in case we're checking a group which
4553          contains a group and a back reference to it.  */
4554 
4555       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4556         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4557 
4558       if (!ret)
4559         return false;
4560       break;
4561           
4562     /* If this is an optimized succeed_n for zero times, make the jump.  */
4563     case jump:
4564       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4565       if (mcnt >= 0)
4566         p1 += mcnt;
4567       else
4568         return false;
4569       break;
4570 
4571     case succeed_n:
4572       /* Get to the number of times to succeed.  */
4573       p1 += 2;          
4574       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4575 
4576       if (mcnt == 0)
4577         {
4578           p1 -= 4;
4579           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4580           p1 += mcnt;
4581         }
4582       else
4583         return false;
4584       break;
4585 
4586     case duplicate: 
4587       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4588         return false;
4589       break;
4590 
4591     case set_number_at:
4592       p1 += 4;
4593 
4594     default:
4595       /* All other opcodes mean we cannot match the empty string.  */
4596       return false;
4597   }
4598 
4599   *p = p1;
4600   return true;
4601 } /* common_op_match_null_string_p */
4602 
4603 
4604 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4605    bytes; nonzero otherwise.  */
4606    
4607 static int
4608 bcmp_translate (s1, s2, len, translate)
     /* [<][>][^][v][top][bottom][index][help] */
4609      unsigned char *s1, *s2;
4610      register int len;
4611      char *translate;
4612 {
4613   register unsigned char *p1 = s1, *p2 = s2;
4614   while (len)
4615     {
4616       if (translate[*p1++] != translate[*p2++]) return 1;
4617       len--;
4618     }
4619   return 0;
4620 }
4621 
4622 /* Entry points for GNU code.  */
4623 
4624 /* re_compile_pattern is the GNU regular expression compiler: it
4625    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4626    Returns 0 if the pattern was valid, otherwise an error string.
4627    
4628    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4629    are set in BUFP on entry.
4630    
4631    We call regex_compile to do the actual compilation.  */
4632 
4633 const char *
4634 re_compile_pattern (pattern, length, bufp)
     /* [<][>][^][v][top][bottom][index][help] */
4635      const char *pattern;
4636      int length;
4637      struct re_pattern_buffer *bufp;
4638 {
4639   reg_errcode_t ret;
4640   
4641   /* GNU code is written to assume at least RE_NREGS registers will be set
4642      (and at least one extra will be -1).  */
4643   bufp->regs_allocated = REGS_UNALLOCATED;
4644   
4645   /* And GNU code determines whether or not to get register information
4646      by passing null for the REGS argument to re_match, etc., not by
4647      setting no_sub.  */
4648   bufp->no_sub = 0;
4649   
4650   /* Match anchors at newline.  */
4651   bufp->newline_anchor = 1;
4652   
4653   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4654 
4655   return re_error_msg[(int) ret];
4656 }     
4657 
4658 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4659    them if this is an Emacs or POSIX compilation.  */
4660 
4661 #if !defined (emacs) && !defined (_POSIX_SOURCE)
4662 
4663 /* BSD has one and only one pattern buffer.  */
4664 static struct re_pattern_buffer re_comp_buf;
4665 
4666 char *
4667 re_comp (s)
     /* [<][>][^][v][top][bottom][index][help] */
4668     const char *s;
4669 {
4670   reg_errcode_t ret;
4671   
4672   if (!s)
4673     {
4674       if (!re_comp_buf.buffer)
4675         return "No previous regular expression";
4676       return 0;
4677     }
4678 
4679   if (!re_comp_buf.buffer)
4680     {
4681       re_comp_buf.buffer = (unsigned char *) malloc (200);
4682       if (re_comp_buf.buffer == NULL)
4683         return "Memory exhausted";
4684       re_comp_buf.allocated = 200;
4685 
4686       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4687       if (re_comp_buf.fastmap == NULL)
4688         return "Memory exhausted";
4689     }
4690 
4691   /* Since `re_exec' always passes NULL for the `regs' argument, we
4692      don't need to initialize the pattern buffer fields which affect it.  */
4693 
4694   /* Match anchors at newlines.  */
4695   re_comp_buf.newline_anchor = 1;
4696 
4697   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4698   
4699   /* Yes, we're discarding `const' here.  */
4700   return (char *) re_error_msg[(int) ret];
4701 }
4702 
4703 
4704 int
4705 re_exec (s)
     /* [<][>][^][v][top][bottom][index][help] */
4706     const char *s;
4707 {
4708   const int len = strlen (s);
4709   return
4710     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4711 }
4712 #endif /* not emacs and not _POSIX_SOURCE */
4713 
4714 /* POSIX.2 functions.  Don't define these for Emacs.  */
4715 
4716 #ifndef emacs
4717 
4718 /* regcomp takes a regular expression as a string and compiles it.
4719 
4720    PREG is a regex_t *.  We do not expect any fields to be initialized,
4721    since POSIX says we shouldn't.  Thus, we set
4722 
4723      `buffer' to the compiled pattern;
4724      `used' to the length of the compiled pattern;
4725      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4726        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4727        RE_SYNTAX_POSIX_BASIC;
4728      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4729      `fastmap' and `fastmap_accurate' to zero;
4730      `re_nsub' to the number of subexpressions in PATTERN.
4731 
4732    PATTERN is the address of the pattern string.
4733 
4734    CFLAGS is a series of bits which affect compilation.
4735 
4736      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4737      use POSIX basic syntax.
4738 
4739      If REG_NEWLINE is set, then . and [^...] don't match newline.
4740      Also, regexec will try a match beginning after every newline.
4741 
4742      If REG_ICASE is set, then we considers upper- and lowercase
4743      versions of letters to be equivalent when matching.
4744 
4745      If REG_NOSUB is set, then when PREG is passed to regexec, that
4746      routine will report only success or failure, and nothing about the
4747      registers.
4748 
4749    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4750    the return codes and their meanings.)  */
4751 
4752 int
4753 regcomp (preg, pattern, cflags)
     /* [<][>][^][v][top][bottom][index][help] */
4754     regex_t *preg;
4755     const char *pattern; 
4756     int cflags;
4757 {
4758   reg_errcode_t ret;
4759   unsigned syntax
4760     = (cflags & REG_EXTENDED) ?
4761       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4762 
4763   /* regex_compile will allocate the space for the compiled pattern.  */
4764   preg->buffer = 0;
4765   preg->allocated = 0;
4766   
4767   /* Don't bother to use a fastmap when searching.  This simplifies the
4768      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4769      characters after newlines into the fastmap.  This way, we just try
4770      every character.  */
4771   preg->fastmap = 0;
4772   
4773   if (cflags & REG_ICASE)
4774     {
4775       unsigned i;
4776       
4777       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4778       if (preg->translate == NULL)
4779         return (int) REG_ESPACE;
4780 
4781       /* Map uppercase characters to corresponding lowercase ones.  */
4782       for (i = 0; i < CHAR_SET_SIZE; i++)
4783         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4784     }
4785   else
4786     preg->translate = NULL;
4787 
4788   /* If REG_NEWLINE is set, newlines are treated differently.  */
4789   if (cflags & REG_NEWLINE)
4790     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4791       syntax &= ~RE_DOT_NEWLINE;
4792       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4793       /* It also changes the matching behavior.  */
4794       preg->newline_anchor = 1;
4795     }
4796   else
4797     preg->newline_anchor = 0;
4798 
4799   preg->no_sub = !!(cflags & REG_NOSUB);
4800 
4801   /* POSIX says a null character in the pattern terminates it, so we 
4802      can use strlen here in compiling the pattern.  */
4803   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4804   
4805   /* POSIX doesn't distinguish between an unmatched open-group and an
4806      unmatched close-group: both are REG_EPAREN.  */
4807   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4808   
4809   return (int) ret;
4810 }
4811 
4812 
4813 /* regexec searches for a given pattern, specified by PREG, in the
4814    string STRING.
4815    
4816    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4817    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4818    least NMATCH elements, and we set them to the offsets of the
4819    corresponding matched substrings.
4820    
4821    EFLAGS specifies `execution flags' which affect matching: if
4822    REG_NOTBOL is set, then ^ does not match at the beginning of the
4823    string; if REG_NOTEOL is set, then $ does not match at the end.
4824    
4825    We return 0 if we find a match and REG_NOMATCH if not.  */
4826 
4827 int
4828 regexec (preg, string, nmatch, pmatch, eflags)
     /* [<][>][^][v][top][bottom][index][help] */
4829     const regex_t *preg;
4830     const char *string; 
4831     size_t nmatch; 
4832     regmatch_t pmatch[]; 
4833     int eflags;
4834 {
4835   int ret;
4836   struct re_registers regs;
4837   regex_t private_preg;
4838   int len = strlen (string);
4839   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4840 
4841   private_preg = *preg;
4842   
4843   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4844   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4845   
4846   /* The user has told us exactly how many registers to return
4847      information about, via `nmatch'.  We have to pass that on to the
4848      matching routines.  */
4849   private_preg.regs_allocated = REGS_FIXED;
4850   
4851   if (want_reg_info)
4852     {
4853       regs.num_regs = nmatch;
4854       regs.start = TALLOC (nmatch, regoff_t);
4855       regs.end = TALLOC (nmatch, regoff_t);
4856       if (regs.start == NULL || regs.end == NULL)
4857         return (int) REG_NOMATCH;
4858     }
4859 
4860   /* Perform the searching operation.  */
4861   ret = re_search (&private_preg, string, len,
4862                    /* start: */ 0, /* range: */ len,
4863                    want_reg_info ? &regs : (struct re_registers *) 0);
4864   
4865   /* Copy the register information to the POSIX structure.  */
4866   if (want_reg_info)
4867     {
4868       if (ret >= 0)
4869         {
4870           unsigned r;
4871 
4872           for (r = 0; r < nmatch; r++)
4873             {
4874               pmatch[r].rm_so = regs.start[r];
4875               pmatch[r].rm_eo = regs.end[r];
4876             }
4877         }
4878 
4879       /* If we needed the temporary register info, free the space now.  */
4880       free (regs.start);
4881       free (regs.end);
4882     }
4883 
4884   /* We want zero return to mean success, unlike `re_search'.  */
4885   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4886 }
4887 
4888 
4889 /* Returns a message corresponding to an error code, ERRCODE, returned
4890    from either regcomp or regexec.   We don't use PREG here.  */
4891 
4892 size_t
4893 regerror (errcode, preg, errbuf, errbuf_size)
     /* [<][>][^][v][top][bottom][index][help] */
4894     int errcode;
4895     const regex_t *preg;
4896     char *errbuf;
4897     size_t errbuf_size;
4898 {
4899   const char *msg;
4900   size_t msg_size;
4901 
4902   if (errcode < 0
4903       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4904     /* Only error codes returned by the rest of the code should be passed 
4905        to this routine.  If we are given anything else, or if other regex
4906        code generates an invalid error code, then the program has a bug.
4907        Dump core so we can fix it.  */
4908     abort ();
4909 
4910   msg = re_error_msg[errcode];
4911 
4912   /* POSIX doesn't require that we do anything in this case, but why
4913      not be nice.  */
4914   if (! msg)
4915     msg = "Success";
4916 
4917   msg_size = strlen (msg) + 1; /* Includes the null.  */
4918   
4919   if (errbuf_size != 0)
4920     {
4921       if (msg_size > errbuf_size)
4922         {
4923           strncpy (errbuf, msg, errbuf_size - 1);
4924           errbuf[errbuf_size - 1] = 0;
4925         }
4926       else
4927         strcpy (errbuf, msg);
4928     }
4929 
4930   return msg_size;
4931 }
4932 
4933 
4934 /* Free dynamically allocated space used by PREG.  */
4935 
4936 void
4937 regfree (preg)
     /* [<][>][^][v][top][bottom][index][help] */
4938     regex_t *preg;
4939 {
4940   if (preg->buffer != NULL)
4941     free (preg->buffer);
4942   preg->buffer = NULL;
4943   
4944   preg->allocated = 0;
4945   preg->used = 0;
4946 
4947   if (preg->fastmap != NULL)
4948     free (preg->fastmap);
4949   preg->fastmap = NULL;
4950   preg->fastmap_accurate = 0;
4951 
4952   if (preg->translate != NULL)
4953     free (preg->translate);
4954   preg->translate = NULL;
4955 }
4956 
4957 #endif /* not emacs  */
4958 
4959 /*
4960 Local variables:
4961 make-backup-files: t
4962 version-control: t
4963 trim-versions-without-asking: nil
4964 End:
4965 */

/* [<][>][^][v][top][bottom][index][help] */