modules/up/src/Core/gnu/regex.c

/* [<][>]
[^][v][top][bottom][index][help] */

FUNCTIONS

This source file includes following functions.
  1. bcmp
  2. bcopy
  3. bzero
  4. init_syntax_once
  5. SYNTAX
  6. isascii
  7. ISBLANK
  8. ISBLANK
  9. ISGRAPH
  10. ISGRAPH
  11. ISPRINT
  12. ISDIGIT
  13. ISALNUM
  14. ISALPHA
  15. ISCNTRL
  16. ISLOWER
  17. ISPUNCT
  18. ISSPACE
  19. ISUPPER
  20. ISXDIGIT
  21. SIGN_EXTEND_CHAR
  22. SIGN_EXTEND_CHAR
  23. REGEX_REALLOCATE
  24. REGEX_REALLOCATE
  25. FIRST_STRING_P
  26. TALLOC
  27. RETALLOC
  28. REGEX_TALLOC
  29. STREQ
  30. MAX
  31. MIN
  32. STORE_NUMBER
  33. STORE_NUMBER_AND_INCR
  34. EXTRACT_NUMBER
  35. extract_number
  36. EXTRACT_NUMBER
  37. EXTRACT_NUMBER_AND_INCR
  38. extract_number_and_incr
  39. EXTRACT_NUMBER_AND_INCR
  40. DEBUG_STATEMENT
  41. DEBUG_PRINT1
  42. DEBUG_PRINT2
  43. DEBUG_PRINT3
  44. DEBUG_PRINT4
  45. DEBUG_PRINT_COMPILED_PATTERN
  46. DEBUG_PRINT_DOUBLE_STRING
  47. printchar
  48. print_fastmap
  49. print_partial_compiled_pattern
  50. print_compiled_pattern
  51. print_double_string
  52. assert
  53. DEBUG_STATEMENT
  54. DEBUG_PRINT1
  55. DEBUG_PRINT2
  56. DEBUG_PRINT3
  57. DEBUG_PRINT4
  58. DEBUG_PRINT_COMPILED_PATTERN
  59. DEBUG_PRINT_DOUBLE_STRING
  60. re_set_syntax
  61. PATFETCH
  62. PATFETCH_RAW
  63. TRANSLATE
  64. GET_BUFFER_SPACE
  65. BUF_PUSH
  66. BUF_PUSH_2
  67. BUF_PUSH_3
  68. STORE_JUMP
  69. STORE_JUMP2
  70. INSERT_JUMP
  71. INSERT_JUMP2
  72. EXTEND_BUFFER
  73. SET_LIST_BIT
  74. GET_UNSIGNED_NUMBER
  75. IS_CHAR_CLASS
  76. regex_compile
  77. store_op1
  78. store_op2
  79. insert_op1
  80. insert_op2
  81. at_begline_loc_p
  82. at_endline_loc_p
  83. group_in_compile_stack
  84. compile_range
  85. FAIL_STACK_EMPTY
  86. FAIL_STACK_PTR_EMPTY
  87. FAIL_STACK_FULL
  88. FAIL_STACK_TOP
  89. INIT_FAIL_STACK
  90. DOUBLE_FAIL_STACK
  91. PUSH_PATTERN_OP
  92. PUSH_FAILURE_ITEM
  93. POP_FAILURE_ITEM
  94. DEBUG_POP
  95. DEBUG_PUSH
  96. DEBUG_POP
  97. PUSH_FAILURE_POINT
  98. POP_FAILURE_POINT
  99. re_compile_fastmap
  100. re_set_registers
  101. re_search
  102. re_search_2
  103. REG_MATCH_NULL_STRING_P
  104. IS_ACTIVE
  105. MATCHED_SOMETHING
  106. EVER_MATCHED_SOMETHING
  107. SET_REGS_MATCHED
  108. POINTER_TO_OFFSET
  109. REG_UNSET
  110. PREFETCH
  111. AT_STRINGS_BEG
  112. AT_STRINGS_END
  113. WORDCHAR_P
  114. AT_WORD_BOUNDARY
  115. FREE_VAR
  116. FREE_VARIABLES
  117. FREE_VARIABLES
  118. re_match
  119. re_match_2
  120. group_match_null_string_p
  121. alt_match_null_string_p
  122. common_op_match_null_string_p
  123. bcmp_translate
  124. re_compile_pattern
  125. re_comp
  126. re_exec
  127. regcomp
  128. regexec
  129. regerror
  130. regfree

   1 /* Extended regular expression matching and search library,
   2    version 0.12.
   3    (Implements POSIX draft P10003.2/D11.2, except for
   4    internationalization features.)
   5 
   6    Copyright (C) 1993 Free Software Foundation, Inc.
   7 
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 2, or (at your option)
  11    any later version.
  12 
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17 
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  21 
  22 /* AIX requires this to be the first thing in the file. */
  23 #if defined (_AIX) && !defined (REGEX_MALLOC)
  24   #pragma alloca
  25 #endif
  26 
  27 #define _GNU_SOURCE
  28 
  29 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
  30 #include <sys/types.h>
  31 
  32 #ifdef HAVE_CONFIG_H
  33 #include "config.h"
  34 #endif
  35 
  36 /* The `emacs' switch turns on certain matching commands
  37    that make sense only in Emacs. */
  38 #ifdef emacs
  39 
  40 #include "lisp.h"
  41 #include "buffer.h"
  42 #include "syntax.h"
  43 
  44 /* Emacs uses `NULL' as a predicate.  */
  45 #undef NULL
  46 
  47 #else  /* not emacs */
  48 
  49 /* We used to test for `BSTRING' here, but only GCC and Emacs define
  50    `BSTRING', as far as I know, and neither of them use this code.  */
  51 #if HAVE_STRING_H || STDC_HEADERS
  52 #include <string.h>
  53 #ifndef bcmp
  54 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
     /* [<][>][^][v][top][bottom][index][help] */
  55 #endif
  56 #ifndef bcopy
  57 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
     /* [<][>][^][v][top][bottom][index][help] */
  58 #endif
  59 #ifndef bzero
  60 #define bzero(s, n)     memset ((s), 0, (n))
     /* [<][>][^][v][top][bottom][index][help] */
  61 #endif
  62 #else
  63 #include <strings.h>
  64 #endif
  65 
  66 #ifdef STDC_HEADERS
  67 #include <stdlib.h>
  68 #else
  69 char *malloc ();
  70 char *realloc ();
  71 #endif
  72 
  73 
  74 /* Define the syntax stuff for \<, \>, etc.  */
  75 
  76 /* This must be nonzero for the wordchar and notwordchar pattern
  77    commands in re_match_2.  */
  78 #ifndef Sword 
  79 #define Sword 1
  80 #endif
  81 
  82 #ifdef SYNTAX_TABLE
  83 
  84 extern char *re_syntax_table;
  85 
  86 #else /* not SYNTAX_TABLE */
  87 
  88 /* How many characters in the character set.  */
  89 #define CHAR_SET_SIZE 256
  90 
  91 static char re_syntax_table[CHAR_SET_SIZE];
  92 
  93 static void
  94 init_syntax_once ()
     /* [<][>][^][v][top][bottom][index][help] */
  95 {
  96    register int c;
  97    static int done = 0;
  98 
  99    if (done)
 100      return;
 101 
 102    bzero (re_syntax_table, sizeof re_syntax_table);
 103 
 104    for (c = 'a'; c <= 'z'; c++)
 105      re_syntax_table[c] = Sword;
 106 
 107    for (c = 'A'; c <= 'Z'; c++)
 108      re_syntax_table[c] = Sword;
 109 
 110    for (c = '0'; c <= '9'; c++)
 111      re_syntax_table[c] = Sword;
 112 
 113    re_syntax_table['_'] = Sword;
 114 
 115    done = 1;
 116 }
 117 
 118 #endif /* not SYNTAX_TABLE */
 119 
 120 #define SYNTAX(c) re_syntax_table[c]
     /* [<][>][^][v][top][bottom][index][help] */
 121 
 122 #endif /* not emacs */
 123 
 124 /* Get the interface, including the syntax bits.  */
 125 #include "regex.h"
 126 
 127 /* isalpha etc. are used for the character classes.  */
 128 #include <ctype.h>
 129 
 130 #ifndef isascii
 131 #define isascii(c) 1
     /* [<][>][^][v][top][bottom][index][help] */
 132 #endif
 133 
 134 #ifdef isblank
 135 #define ISBLANK(c) (isascii (c) && isblank (c))
     /* [<][>][^][v][top][bottom][index][help] */
 136 #else
 137 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
     /* [<][>][^][v][top][bottom][index][help] */
 138 #endif
 139 #ifdef isgraph
 140 #define ISGRAPH(c) (isascii (c) && isgraph (c))
     /* [<][>][^][v][top][bottom][index][help] */
 141 #else
 142 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
     /* [<][>][^][v][top][bottom][index][help] */
 143 #endif
 144 
 145 #define ISPRINT(c) (isascii (c) && isprint (c))
     /* [<][>][^][v][top][bottom][index][help] */
 146 #define ISDIGIT(c) (isascii (c) && isdigit (c))
     /* [<][>][^][v][top][bottom][index][help] */
 147 #define ISALNUM(c) (isascii (c) && isalnum (c))
     /* [<][>][^][v][top][bottom][index][help] */
 148 #define ISALPHA(c) (isascii (c) && isalpha (c))
     /* [<][>][^][v][top][bottom][index][help] */
 149 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
     /* [<][>][^][v][top][bottom][index][help] */
 150 #define ISLOWER(c) (isascii (c) && islower (c))
     /* [<][>][^][v][top][bottom][index][help] */
 151 #define ISPUNCT(c) (isascii (c) && ispunct (c))
     /* [<][>][^][v][top][bottom][index][help] */
 152 #define ISSPACE(c) (isascii (c) && isspace (c))
     /* [<][>][^][v][top][bottom][index][help] */
 153 #define ISUPPER(c) (isascii (c) && isupper (c))
     /* [<][>][^][v][top][bottom][index][help] */
 154 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
     /* [<][>][^][v][top][bottom][index][help] */
 155 
 156 #ifndef NULL
 157 #define NULL 0
 158 #endif
 159 
 160 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
 161    since ours (we hope) works properly with all combinations of
 162    machines, compilers, `char' and `unsigned char' argument types.
 163    (Per Bothner suggested the basic approach.)  */
 164 #undef SIGN_EXTEND_CHAR
 165 #if __STDC__
 166 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
     /* [<][>][^][v][top][bottom][index][help] */
 167 #else  /* not __STDC__ */
 168 /* As in Harbison and Steele.  */
 169 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
     /* [<][>][^][v][top][bottom][index][help] */
 170 #endif
 171 
 172 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
 173    use `alloca' instead of `malloc'.  This is because using malloc in
 174    re_search* or re_match* could cause memory leaks when C-g is used in
 175    Emacs; also, malloc is slower and causes storage fragmentation.  On
 176    the other hand, malloc is more portable, and easier to debug.  
 177    
 178    Because we sometimes use alloca, some routines have to be macros,
 179    not functions -- `alloca'-allocated space disappears at the end of the
 180    function it is called in.  */
 181 
 182 #ifdef REGEX_MALLOC
 183 
 184 #define REGEX_ALLOCATE malloc
 185 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
     /* [<][>][^][v][top][bottom][index][help] */
 186 
 187 #else /* not REGEX_MALLOC  */
 188 
 189 /* Emacs already defines alloca, sometimes.  */
 190 #ifndef alloca
 191 
 192 /* Make alloca work the best possible way.  */
 193 #ifdef __GNUC__
 194 #define alloca __builtin_alloca
 195 #else /* not __GNUC__ */
 196 #if HAVE_ALLOCA_H
 197 #include <alloca.h>
 198 #else /* not __GNUC__ or HAVE_ALLOCA_H */
 199 #ifndef _AIX /* Already did AIX, up at the top.  */
 200 char *alloca ();
 201 #endif /* not _AIX */
 202 #endif /* not HAVE_ALLOCA_H */ 
 203 #endif /* not __GNUC__ */
 204 
 205 #endif /* not alloca */
 206 
 207 #define REGEX_ALLOCATE alloca
 208 
 209 /* Assumes a `char *destination' variable.  */
 210 #define REGEX_REALLOCATE(source, osize, nsize)                          \
     /* [<][>][^][v][top][bottom][index][help] */
 211   (destination = (char *) alloca (nsize),                               \
 212    bcopy (source, destination, osize),                                  \
 213    destination)
 214 
 215 #endif /* not REGEX_MALLOC */
 216 
 217 
 218 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
 219    `string1' or just past its end.  This works if PTR is NULL, which is
 220    a good thing.  */
 221 #define FIRST_STRING_P(ptr)                                     \
     /* [<][>][^][v][top][bottom][index][help] */
 222   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 223 
 224 /* (Re)Allocate N items of type T using malloc, or fail.  */
 225 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
     /* [<][>][^][v][top][bottom][index][help] */
 226 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
     /* [<][>][^][v][top][bottom][index][help] */
 227 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
     /* [<][>][^][v][top][bottom][index][help] */
 228 
 229 #define BYTEWIDTH 8 /* In bits.  */
 230 
 231 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
     /* [<][>][^][v][top][bottom][index][help] */
 232 
 233 #define MAX(a, b) ((a) > (b) ? (a) : (b))
     /* [<][>][^][v][top][bottom][index][help] */
 234 #define MIN(a, b) ((a) < (b) ? (a) : (b))
     /* [<][>][^][v][top][bottom][index][help] */
 235 
 236 typedef char boolean;
 237 #define false 0
 238 #define true 1
 239 
 240 /* These are the command codes that appear in compiled regular
 241    expressions.  Some opcodes are followed by argument bytes.  A
 242    command code can specify any interpretation whatsoever for its
 243    arguments.  Zero bytes may appear in the compiled regular expression.
 244 
 245    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
 246    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
 247    `exactn' we use here must also be 1.  */
 248 
 249 typedef enum
 250 {
 251   no_op = 0,
 252 
 253         /* Followed by one byte giving n, then by n literal bytes.  */
 254   exactn = 1,
 255 
 256         /* Matches any (more or less) character.  */
 257   anychar,
 258 
 259         /* Matches any one char belonging to specified set.  First
 260            following byte is number of bitmap bytes.  Then come bytes
 261            for a bitmap saying which chars are in.  Bits in each byte
 262            are ordered low-bit-first.  A character is in the set if its
 263            bit is 1.  A character too large to have a bit in the map is
 264            automatically not in the set.  */
 265   charset,
 266 
 267         /* Same parameters as charset, but match any character that is
 268            not one of those specified.  */
 269   charset_not,
 270 
 271         /* Start remembering the text that is matched, for storing in a
 272            register.  Followed by one byte with the register number, in
 273            the range 0 to one less than the pattern buffer's re_nsub
 274            field.  Then followed by one byte with the number of groups
 275            inner to this one.  (This last has to be part of the
 276            start_memory only because we need it in the on_failure_jump
 277            of re_match_2.)  */
 278   start_memory,
 279 
 280         /* Stop remembering the text that is matched and store it in a
 281            memory register.  Followed by one byte with the register
 282            number, in the range 0 to one less than `re_nsub' in the
 283            pattern buffer, and one byte with the number of inner groups,
 284            just like `start_memory'.  (We need the number of inner
 285            groups here because we don't have any easy way of finding the
 286            corresponding start_memory when we're at a stop_memory.)  */
 287   stop_memory,
 288 
 289         /* Match a duplicate of something remembered. Followed by one
 290            byte containing the register number.  */
 291   duplicate,
 292 
 293         /* Fail unless at beginning of line.  */
 294   begline,
 295 
 296         /* Fail unless at end of line.  */
 297   endline,
 298 
 299         /* Succeeds if at beginning of buffer (if emacs) or at beginning
 300            of string to be matched (if not).  */
 301   begbuf,
 302 
 303         /* Analogously, for end of buffer/string.  */
 304   endbuf,
 305  
 306         /* Followed by two byte relative address to which to jump.  */
 307   jump, 
 308 
 309         /* Same as jump, but marks the end of an alternative.  */
 310   jump_past_alt,
 311 
 312         /* Followed by two-byte relative address of place to resume at
 313            in case of failure.  */
 314   on_failure_jump,
 315         
 316         /* Like on_failure_jump, but pushes a placeholder instead of the
 317            current string position when executed.  */
 318   on_failure_keep_string_jump,
 319   
 320         /* Throw away latest failure point and then jump to following
 321            two-byte relative address.  */
 322   pop_failure_jump,
 323 
 324         /* Change to pop_failure_jump if know won't have to backtrack to
 325            match; otherwise change to jump.  This is used to jump
 326            back to the beginning of a repeat.  If what follows this jump
 327            clearly won't match what the repeat does, such that we can be
 328            sure that there is no use backtracking out of repetitions
 329            already matched, then we change it to a pop_failure_jump.
 330            Followed by two-byte address.  */
 331   maybe_pop_jump,
 332 
 333         /* Jump to following two-byte address, and push a dummy failure
 334            point. This failure point will be thrown away if an attempt
 335            is made to use it for a failure.  A `+' construct makes this
 336            before the first repeat.  Also used as an intermediary kind
 337            of jump when compiling an alternative.  */
 338   dummy_failure_jump,
 339 
 340         /* Push a dummy failure point and continue.  Used at the end of
 341            alternatives.  */
 342   push_dummy_failure,
 343 
 344         /* Followed by two-byte relative address and two-byte number n.
 345            After matching N times, jump to the address upon failure.  */
 346   succeed_n,
 347 
 348         /* Followed by two-byte relative address, and two-byte number n.
 349            Jump to the address N times, then fail.  */
 350   jump_n,
 351 
 352         /* Set the following two-byte relative address to the
 353            subsequent two-byte number.  The address *includes* the two
 354            bytes of number.  */
 355   set_number_at,
 356 
 357   wordchar,     /* Matches any word-constituent character.  */
 358   notwordchar,  /* Matches any char that is not a word-constituent.  */
 359 
 360   wordbeg,      /* Succeeds if at word beginning.  */
 361   wordend,      /* Succeeds if at word end.  */
 362 
 363   wordbound,    /* Succeeds if at a word boundary.  */
 364   notwordbound  /* Succeeds if not at a word boundary.  */
 365 
 366 #ifdef emacs
 367   ,before_dot,  /* Succeeds if before point.  */
 368   at_dot,       /* Succeeds if at point.  */
 369   after_dot,    /* Succeeds if after point.  */
 370 
 371         /* Matches any character whose syntax is specified.  Followed by
 372            a byte which contains a syntax code, e.g., Sword.  */
 373   syntaxspec,
 374 
 375         /* Matches any character whose syntax is not that specified.  */
 376   notsyntaxspec
 377 #endif /* emacs */
 378 } re_opcode_t;
 379 
 380 /* Common operations on the compiled pattern.  */
 381 
 382 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 383 
 384 #define STORE_NUMBER(destination, number)                               \
     /* [<][>][^][v][top][bottom][index][help] */
 385   do {                                                                  \
 386     (destination)[0] = (number) & 0377;                                 \
 387     (destination)[1] = (number) >> 8;                                   \
 388   } while (0)
 389 
 390 /* Same as STORE_NUMBER, except increment DESTINATION to
 391    the byte after where the number is stored.  Therefore, DESTINATION
 392    must be an lvalue.  */
 393 
 394 #define STORE_NUMBER_AND_INCR(destination, number)                      \
     /* [<][>][^][v][top][bottom][index][help] */
 395   do {                                                                  \
 396     STORE_NUMBER (destination, number);                                 \
 397     (destination) += 2;                                                 \
 398   } while (0)
 399 
 400 /* Put into DESTINATION a number stored in two contiguous bytes starting
 401    at SOURCE.  */
 402 
 403 #define EXTRACT_NUMBER(destination, source)                             \
     /* [<][>][^][v][top][bottom][index][help] */
 404   do {                                                                  \
 405     (destination) = *(source) & 0377;                                   \
 406     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
 407   } while (0)
 408 
 409 #ifdef DEBUG
 410 static void
 411 extract_number (dest, source)
     /* [<][>][^][v][top][bottom][index][help] */
 412     int *dest;
 413     unsigned char *source;
 414 {
 415   int temp = SIGN_EXTEND_CHAR (*(source + 1)); 
 416   *dest = *source & 0377;
 417   *dest += temp << 8;
 418 }
 419 
 420 #ifndef EXTRACT_MACROS /* To debug the macros.  */
 421 #undef EXTRACT_NUMBER
 422 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
     /* [<][>][^][v][top][bottom][index][help] */
 423 #endif /* not EXTRACT_MACROS */
 424 
 425 #endif /* DEBUG */
 426 
 427 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
 428    SOURCE must be an lvalue.  */
 429 
 430 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
     /* [<][>][^][v][top][bottom][index][help] */
 431   do {                                                                  \
 432     EXTRACT_NUMBER (destination, source);                               \
 433     (source) += 2;                                                      \
 434   } while (0)
 435 
 436 #ifdef DEBUG
 437 static void
 438 extract_number_and_incr (destination, source)
     /* [<][>][^][v][top][bottom][index][help] */
 439     int *destination;
 440     unsigned char **source;
 441 { 
 442   extract_number (destination, *source);
 443   *source += 2;
 444 }
 445 
 446 #ifndef EXTRACT_MACROS
 447 #undef EXTRACT_NUMBER_AND_INCR
 448 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
     /* [<][>][^][v][top][bottom][index][help] */
 449   extract_number_and_incr (&dest, &src)
 450 #endif /* not EXTRACT_MACROS */
 451 
 452 #endif /* DEBUG */
 453 
 454 /* If DEBUG is defined, Regex prints many voluminous messages about what
 455    it is doing (if the variable `debug' is nonzero).  If linked with the
 456    main program in `iregex.c', you can enter patterns and strings
 457    interactively.  And if linked with the main program in `main.c' and
 458    the other test files, you can run the already-written tests.  */
 459 
 460 #ifdef DEBUG
 461 
 462 /* We use standard I/O for debugging.  */
 463 #include <stdio.h>
 464 
 465 /* It is useful to test things that ``must'' be true when debugging.  */
 466 #include <assert.h>
 467 
 468 static int debug = 0;
 469 
 470 #define DEBUG_STATEMENT(e) e
     /* [<][>][^][v][top][bottom][index][help] */
 471 #define DEBUG_PRINT1(x) if (debug) printf (x)
     /* [<][>][^][v][top][bottom][index][help] */
 472 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
     /* [<][>][^][v][top][bottom][index][help] */
 473 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
     /* [<][>][^][v][top][bottom][index][help] */
 474 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
     /* [<][>][^][v][top][bottom][index][help] */
 475 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
     /* [<][>][^][v][top][bottom][index][help] */
 476   if (debug) print_partial_compiled_pattern (s, e)
 477 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
     /* [<][>][^][v][top][bottom][index][help] */
 478   if (debug) print_double_string (w, s1, sz1, s2, sz2)
 479 
 480 // Commented out by wlee@isi.edu
 481 //extern void printchar ();
 482 
 483 // Instead, put the following in
 484 static void
 485 printchar (c)
     /* [<][>][^][v][top][bottom][index][help] */
 486      char c;
 487 {
 488   if (c < 040 || c >= 0177)
 489     {
 490       putchar ('\\');
 491       putchar (((c >> 6) & 3) + '0');
 492       putchar (((c >> 3) & 7) + '0');
 493       putchar ((c & 7) + '0');
 494     }
 495   else
 496     putchar (c);
 497 }
 498 
 499 
 500 /* Print the fastmap in human-readable form.  */
 501 
 502 void
 503 print_fastmap (fastmap)
     /* [<][>][^][v][top][bottom][index][help] */
 504     char *fastmap;
 505 {
 506   unsigned was_a_range = 0;
 507   unsigned i = 0;  
 508   
 509   while (i < (1 << BYTEWIDTH))
 510     {
 511       if (fastmap[i++])
 512         {
 513           was_a_range = 0;
 514           printchar (i - 1);
 515           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
 516             {
 517               was_a_range = 1;
 518               i++;
 519             }
 520           if (was_a_range)
 521             {
 522               printf ("-");
 523               printchar (i - 1);
 524             }
 525         }
 526     }
 527   putchar ('\n'); 
 528 }
 529 
 530 
 531 /* Print a compiled pattern string in human-readable form, starting at
 532    the START pointer into it and ending just before the pointer END.  */
 533 
 534 void
 535 print_partial_compiled_pattern (start, end)
     /* [<][>][^][v][top][bottom][index][help] */
 536     unsigned char *start;
 537     unsigned char *end;
 538 {
 539   int mcnt, mcnt2;
 540   unsigned char *p = start;
 541   unsigned char *pend = end;
 542 
 543   if (start == NULL)
 544     {
 545       printf ("(null)\n");
 546       return;
 547     }
 548     
 549   /* Loop over pattern commands.  */
 550   while (p < pend)
 551     {
 552       switch ((re_opcode_t) *p++)
 553         {
 554         case no_op:
 555           printf ("/no_op");
 556           break;
 557 
 558         case exactn:
 559           mcnt = *p++;
 560           printf ("/exactn/%d", mcnt);
 561           do
 562             {
 563               putchar ('/');
 564               printchar (*p++);
 565             }
 566           while (--mcnt);
 567           break;
 568 
 569         case start_memory:
 570           mcnt = *p++;
 571           printf ("/start_memory/%d/%d", mcnt, *p++);
 572           break;
 573 
 574         case stop_memory:
 575           mcnt = *p++;
 576           printf ("/stop_memory/%d/%d", mcnt, *p++);
 577           break;
 578 
 579         case duplicate:
 580           printf ("/duplicate/%d", *p++);
 581           break;
 582 
 583         case anychar:
 584           printf ("/anychar");
 585           break;
 586 
 587         case charset:
 588         case charset_not:
 589           {
 590             register int c;
 591 
 592             printf ("/charset%s",
 593                     (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
 594             
 595             assert (p + *p < pend);
 596 
 597             for (c = 0; c < *p; c++)
 598               {
 599                 unsigned bit;
 600                 unsigned char map_byte = p[1 + c];
 601                 
 602                 putchar ('/');
 603 
 604                 for (bit = 0; bit < BYTEWIDTH; bit++)
 605                   if (map_byte & (1 << bit))
 606                     printchar (c * BYTEWIDTH + bit);
 607               }
 608             p += 1 + *p;
 609             break;
 610           }
 611 
 612         case begline:
 613           printf ("/begline");
 614           break;
 615 
 616         case endline:
 617           printf ("/endline");
 618           break;
 619 
 620         case on_failure_jump:
 621           extract_number_and_incr (&mcnt, &p);
 622           printf ("/on_failure_jump/0/%d", mcnt);
 623           break;
 624 
 625         case on_failure_keep_string_jump:
 626           extract_number_and_incr (&mcnt, &p);
 627           printf ("/on_failure_keep_string_jump/0/%d", mcnt);
 628           break;
 629 
 630         case dummy_failure_jump:
 631           extract_number_and_incr (&mcnt, &p);
 632           printf ("/dummy_failure_jump/0/%d", mcnt);
 633           break;
 634 
 635         case push_dummy_failure:
 636           printf ("/push_dummy_failure");
 637           break;
 638           
 639         case maybe_pop_jump:
 640           extract_number_and_incr (&mcnt, &p);
 641           printf ("/maybe_pop_jump/0/%d", mcnt);
 642           break;
 643 
 644         case pop_failure_jump:
 645           extract_number_and_incr (&mcnt, &p);
 646           printf ("/pop_failure_jump/0/%d", mcnt);
 647           break;          
 648           
 649         case jump_past_alt:
 650           extract_number_and_incr (&mcnt, &p);
 651           printf ("/jump_past_alt/0/%d", mcnt);
 652           break;          
 653           
 654         case jump:
 655           extract_number_and_incr (&mcnt, &p);
 656           printf ("/jump/0/%d", mcnt);
 657           break;
 658 
 659         case succeed_n: 
 660           extract_number_and_incr (&mcnt, &p);
 661           extract_number_and_incr (&mcnt2, &p);
 662           printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
 663           break;
 664         
 665         case jump_n: 
 666           extract_number_and_incr (&mcnt, &p);
 667           extract_number_and_incr (&mcnt2, &p);
 668           printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
 669           break;
 670         
 671         case set_number_at: 
 672           extract_number_and_incr (&mcnt, &p);
 673           extract_number_and_incr (&mcnt2, &p);
 674           printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
 675           break;
 676         
 677         case wordbound:
 678           printf ("/wordbound");
 679           break;
 680 
 681         case notwordbound:
 682           printf ("/notwordbound");
 683           break;
 684 
 685         case wordbeg:
 686           printf ("/wordbeg");
 687           break;
 688           
 689         case wordend:
 690           printf ("/wordend");
 691           
 692 #ifdef emacs
 693         case before_dot:
 694           printf ("/before_dot");
 695           break;
 696 
 697         case at_dot:
 698           printf ("/at_dot");
 699           break;
 700 
 701         case after_dot:
 702           printf ("/after_dot");
 703           break;
 704 
 705         case syntaxspec:
 706           printf ("/syntaxspec");
 707           mcnt = *p++;
 708           printf ("/%d", mcnt);
 709           break;
 710           
 711         case notsyntaxspec:
 712           printf ("/notsyntaxspec");
 713           mcnt = *p++;
 714           printf ("/%d", mcnt);
 715           break;
 716 #endif /* emacs */
 717 
 718         case wordchar:
 719           printf ("/wordchar");
 720           break;
 721           
 722         case notwordchar:
 723           printf ("/notwordchar");
 724           break;
 725 
 726         case begbuf:
 727           printf ("/begbuf");
 728           break;
 729 
 730         case endbuf:
 731           printf ("/endbuf");
 732           break;
 733 
 734         default:
 735           printf ("?%d", *(p-1));
 736         }
 737     }
 738   printf ("/\n");
 739 }
 740 
 741 
 742 void
 743 print_compiled_pattern (bufp)
     /* [<][>][^][v][top][bottom][index][help] */
 744     struct re_pattern_buffer *bufp;
 745 {
 746   unsigned char *buffer = bufp->buffer;
 747 
 748   print_partial_compiled_pattern (buffer, buffer + bufp->used);
 749   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
 750 
 751   if (bufp->fastmap_accurate && bufp->fastmap)
 752     {
 753       printf ("fastmap: ");
 754       print_fastmap (bufp->fastmap);
 755     }
 756 
 757   printf ("re_nsub: %d\t", bufp->re_nsub);
 758   printf ("regs_alloc: %d\t", bufp->regs_allocated);
 759   printf ("can_be_null: %d\t", bufp->can_be_null);
 760   printf ("newline_anchor: %d\n", bufp->newline_anchor);
 761   printf ("no_sub: %d\t", bufp->no_sub);
 762   printf ("not_bol: %d\t", bufp->not_bol);
 763   printf ("not_eol: %d\t", bufp->not_eol);
 764   printf ("syntax: %d\n", bufp->syntax);
 765   /* Perhaps we should print the translate table?  */
 766 }
 767 
 768 
 769 void
 770 print_double_string (where, string1, size1, string2, size2)
     /* [<][>][^][v][top][bottom][index][help] */
 771     const char *where;
 772     const char *string1;
 773     const char *string2;
 774     int size1;
 775     int size2;
 776 {
 777   unsigned this_char;
 778   
 779   if (where == NULL)
 780     printf ("(null)");
 781   else
 782     {
 783       if (FIRST_STRING_P (where))
 784         {
 785           for (this_char = where - string1; this_char < size1; this_char++)
 786             printchar (string1[this_char]);
 787 
 788           where = string2;    
 789         }
 790 
 791       for (this_char = where - string2; this_char < size2; this_char++)
 792         printchar (string2[this_char]);
 793     }
 794 }
 795 
 796 #else /* not DEBUG */
 797 
 798 #undef assert
 799 #define assert(e)
     /* [<][>][^][v][top][bottom][index][help] */
 800 
 801 #define DEBUG_STATEMENT(e)
     /* [<][>][^][v][top][bottom][index][help] */
 802 #define DEBUG_PRINT1(x)
     /* [<][>][^][v][top][bottom][index][help] */
 803 #define DEBUG_PRINT2(x1, x2)
     /* [<][>][^][v][top][bottom][index][help] */
 804 #define DEBUG_PRINT3(x1, x2, x3)
     /* [<][>][^][v][top][bottom][index][help] */
 805 #define DEBUG_PRINT4(x1, x2, x3, x4)
     /* [<][>][^][v][top][bottom][index][help] */
 806 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
     /* [<][>][^][v][top][bottom][index][help] */
 807 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
     /* [<][>][^][v][top][bottom][index][help] */
 808 
 809 #endif /* not DEBUG */
 810 
 811 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
 812    also be assigned to arbitrarily: each pattern buffer stores its own
 813    syntax, so it can be changed between regex compilations.  */
 814 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
 815 
 816 
 817 /* Specify the precise syntax of regexps for compilation.  This provides
 818    for compatibility for various utilities which historically have
 819    different, incompatible syntaxes.
 820 
 821    The argument SYNTAX is a bit mask comprised of the various bits
 822    defined in regex.h.  We return the old syntax.  */
 823 
 824 reg_syntax_t
 825 re_set_syntax (syntax)
     /* [<][>][^][v][top][bottom][index][help] */
 826     reg_syntax_t syntax;
 827 {
 828   reg_syntax_t ret = re_syntax_options;
 829   
 830   re_syntax_options = syntax;
 831   return ret;
 832 }
 833 
 834 /* This table gives an error message for each of the error codes listed
 835    in regex.h.  Obviously the order here has to be same as there.  */
 836 
 837 static const char *re_error_msg[] =
 838   { NULL,                                       /* REG_NOERROR */
 839     "No match",                                 /* REG_NOMATCH */
 840     "Invalid regular expression",               /* REG_BADPAT */
 841     "Invalid collation character",              /* REG_ECOLLATE */
 842     "Invalid character class name",             /* REG_ECTYPE */
 843     "Trailing backslash",                       /* REG_EESCAPE */
 844     "Invalid back reference",                   /* REG_ESUBREG */
 845     "Unmatched [ or [^",                        /* REG_EBRACK */
 846     "Unmatched ( or \\(",                       /* REG_EPAREN */
 847     "Unmatched \\{",                            /* REG_EBRACE */
 848     "Invalid content of \\{\\}",                /* REG_BADBR */
 849     "Invalid range end",                        /* REG_ERANGE */
 850     "Memory exhausted",                         /* REG_ESPACE */
 851     "Invalid preceding regular expression",     /* REG_BADRPT */
 852     "Premature end of regular expression",      /* REG_EEND */
 853     "Regular expression too big",               /* REG_ESIZE */
 854     "Unmatched ) or \\)",                       /* REG_ERPAREN */
 855   };
 856 
 857 /* Subroutine declarations and macros for regex_compile.  */
 858 
 859 static void store_op1 (), store_op2 ();
 860 static void insert_op1 (), insert_op2 ();
 861 static boolean at_begline_loc_p (), at_endline_loc_p ();
 862 static boolean group_in_compile_stack ();
 863 static reg_errcode_t compile_range ();
 864 
 865 /* Fetch the next character in the uncompiled pattern---translating it 
 866    if necessary.  Also cast from a signed character in the constant
 867    string passed to us by the user to an unsigned char that we can use
 868    as an array index (in, e.g., `translate').  */
 869 #define PATFETCH(c)                                                     \
     /* [<][>][^][v][top][bottom][index][help] */
 870   do {if (p == pend) return REG_EEND;                                   \
 871     c = (unsigned char) *p++;                                           \
 872     if (translate) c = translate[c];                                    \
 873   } while (0)
 874 
 875 /* Fetch the next character in the uncompiled pattern, with no
 876    translation.  */
 877 #define PATFETCH_RAW(c)                                                 \
     /* [<][>][^][v][top][bottom][index][help] */
 878   do {if (p == pend) return REG_EEND;                                   \
 879     c = (unsigned char) *p++;                                           \
 880   } while (0)
 881 
 882 /* Go backwards one character in the pattern.  */
 883 #define PATUNFETCH p--
 884 
 885 
 886 /* If `translate' is non-null, return translate[D], else just D.  We
 887    cast the subscript to translate because some data is declared as
 888    `char *', to avoid warnings when a string constant is passed.  But
 889    when we use a character as a subscript we must make it unsigned.  */
 890 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
     /* [<][>][^][v][top][bottom][index][help] */
 891 
 892 
 893 /* Macros for outputting the compiled pattern into `buffer'.  */
 894 
 895 /* If the buffer isn't allocated when it comes in, use this.  */
 896 #define INIT_BUF_SIZE  32
 897 
 898 /* Make sure we have at least N more bytes of space in buffer.  */
 899 #define GET_BUFFER_SPACE(n)                                             \
     /* [<][>][^][v][top][bottom][index][help] */
 900     while (b - bufp->buffer + (n) > bufp->allocated)                    \
 901       EXTEND_BUFFER ()
 902 
 903 /* Make sure we have one more byte of buffer space and then add C to it.  */
 904 #define BUF_PUSH(c)                                                     \
     /* [<][>][^][v][top][bottom][index][help] */
 905   do {                                                                  \
 906     GET_BUFFER_SPACE (1);                                               \
 907     *b++ = (unsigned char) (c);                                         \
 908   } while (0)
 909 
 910 
 911 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
 912 #define BUF_PUSH_2(c1, c2)                                              \
     /* [<][>][^][v][top][bottom][index][help] */
 913   do {                                                                  \
 914     GET_BUFFER_SPACE (2);                                               \
 915     *b++ = (unsigned char) (c1);                                        \
 916     *b++ = (unsigned char) (c2);                                        \
 917   } while (0)
 918 
 919 
 920 /* As with BUF_PUSH_2, except for three bytes.  */
 921 #define BUF_PUSH_3(c1, c2, c3)                                          \
     /* [<][>][^][v][top][bottom][index][help] */
 922   do {                                                                  \
 923     GET_BUFFER_SPACE (3);                                               \
 924     *b++ = (unsigned char) (c1);                                        \
 925     *b++ = (unsigned char) (c2);                                        \
 926     *b++ = (unsigned char) (c3);                                        \
 927   } while (0)
 928 
 929 
 930 /* Store a jump with opcode OP at LOC to location TO.  We store a
 931    relative address offset by the three bytes the jump itself occupies.  */
 932 #define STORE_JUMP(op, loc, to) \
     /* [<][>][^][v][top][bottom][index][help] */
 933   store_op1 (op, loc, (to) - (loc) - 3)
 934 
 935 /* Likewise, for a two-argument jump.  */
 936 #define STORE_JUMP2(op, loc, to, arg) \
     /* [<][>][^][v][top][bottom][index][help] */
 937   store_op2 (op, loc, (to) - (loc) - 3, arg)
 938 
 939 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
 940 #define INSERT_JUMP(op, loc, to) \
     /* [<][>][^][v][top][bottom][index][help] */
 941   insert_op1 (op, loc, (to) - (loc) - 3, b)
 942 
 943 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
 944 #define INSERT_JUMP2(op, loc, to, arg) \
     /* [<][>][^][v][top][bottom][index][help] */
 945   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
 946 
 947 
 948 /* This is not an arbitrary limit: the arguments which represent offsets
 949    into the pattern are two bytes long.  So if 2^16 bytes turns out to
 950    be too small, many things would have to change.  */
 951 #define MAX_BUF_SIZE (1L << 16)
 952 
 953 
 954 /* Extend the buffer by twice its current size via realloc and
 955    reset the pointers that pointed into the old block to point to the
 956    correct places in the new one.  If extending the buffer results in it
 957    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
 958 #define EXTEND_BUFFER()                                                 \
     /* [<][>][^][v][top][bottom][index][help] */
 959   do {                                                                  \
 960     unsigned char *old_buffer = bufp->buffer;                           \
 961     if (bufp->allocated == MAX_BUF_SIZE)                                \
 962       return REG_ESIZE;                                                 \
 963     bufp->allocated <<= 1;                                              \
 964     if (bufp->allocated > MAX_BUF_SIZE)                                 \
 965       bufp->allocated = MAX_BUF_SIZE;                                   \
 966     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
 967     if (bufp->buffer == NULL)                                           \
 968       return REG_ESPACE;                                                \
 969     /* If the buffer moved, move all the pointers into it.  */          \
 970     if (old_buffer != bufp->buffer)                                     \
 971       {                                                                 \
 972         b = (b - old_buffer) + bufp->buffer;                            \
 973         begalt = (begalt - old_buffer) + bufp->buffer;                  \
 974         if (fixup_alt_jump)                                             \
 975           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
 976         if (laststart)                                                  \
 977           laststart = (laststart - old_buffer) + bufp->buffer;          \
 978         if (pending_exact)                                              \
 979           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
 980       }                                                                 \
 981   } while (0)
 982 
 983 
 984 /* Since we have one byte reserved for the register number argument to
 985    {start,stop}_memory, the maximum number of groups we can report
 986    things about is what fits in that byte.  */
 987 #define MAX_REGNUM 255
 988 
 989 /* But patterns can have more than `MAX_REGNUM' registers.  We just
 990    ignore the excess.  */
 991 typedef unsigned regnum_t;
 992 
 993 
 994 /* Macros for the compile stack.  */
 995 
 996 /* Since offsets can go either forwards or backwards, this type needs to
 997    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
 998 typedef int pattern_offset_t;
 999 
1000 typedef struct
1001 {
1002   pattern_offset_t begalt_offset;
1003   pattern_offset_t fixup_alt_jump;
1004   pattern_offset_t inner_group_offset;
1005   pattern_offset_t laststart_offset;  
1006   regnum_t regnum;
1007 } compile_stack_elt_t;
1008 
1009 
1010 typedef struct
1011 {
1012   compile_stack_elt_t *stack;
1013   unsigned size;
1014   unsigned avail;                       /* Offset of next open position.  */
1015 } compile_stack_type;
1016 
1017 
1018 #define INIT_COMPILE_STACK_SIZE 32
1019 
1020 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1021 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1022 
1023 /* The next available element.  */
1024 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1025 
1026 
1027 /* Set the bit for character C in a list.  */
1028 #define SET_LIST_BIT(c)                               \
     /* [<][>][^][v][top][bottom][index][help] */
1029   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1030    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1031 
1032 
1033 /* Get the next unsigned number in the uncompiled pattern.  */
1034 #define GET_UNSIGNED_NUMBER(num)                                        \
     /* [<][>][^][v][top][bottom][index][help] */
1035   { if (p != pend)                                                      \
1036      {                                                                  \
1037        PATFETCH (c);                                                    \
1038        while (ISDIGIT (c))                                              \
1039          {                                                              \
1040            if (num < 0)                                                 \
1041               num = 0;                                                  \
1042            num = num * 10 + c - '0';                                    \
1043            if (p == pend)                                               \
1044               break;                                                    \
1045            PATFETCH (c);                                                \
1046          }                                                              \
1047        }                                                                \
1048     }           
1049 
1050 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1051 
1052 #define IS_CHAR_CLASS(string)                                           \
     /* [<][>][^][v][top][bottom][index][help] */
1053    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1054     || STREQ (string, "lower") || STREQ (string, "digit")               \
1055     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1056     || STREQ (string, "space") || STREQ (string, "print")               \
1057     || STREQ (string, "punct") || STREQ (string, "graph")               \
1058     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1059 
1060 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1061    Returns one of error codes defined in `regex.h', or zero for success.
1062 
1063    Assumes the `allocated' (and perhaps `buffer') and `translate'
1064    fields are set in BUFP on entry.
1065 
1066    If it succeeds, results are put in BUFP (if it returns an error, the
1067    contents of BUFP are undefined):
1068      `buffer' is the compiled pattern;
1069      `syntax' is set to SYNTAX;
1070      `used' is set to the length of the compiled pattern;
1071      `fastmap_accurate' is zero;
1072      `re_nsub' is the number of subexpressions in PATTERN;
1073      `not_bol' and `not_eol' are zero;
1074    
1075    The `fastmap' and `newline_anchor' fields are neither
1076    examined nor set.  */
1077 
1078 static reg_errcode_t
1079 regex_compile (pattern, size, syntax, bufp)
     /* [<][>][^][v][top][bottom][index][help] */
1080      const char *pattern;
1081      int size;
1082      reg_syntax_t syntax;
1083      struct re_pattern_buffer *bufp;
1084 {
1085   /* We fetch characters from PATTERN here.  Even though PATTERN is
1086      `char *' (i.e., signed), we declare these variables as unsigned, so
1087      they can be reliably used as array indices.  */
1088   register unsigned char c, c1;
1089   
1090   /* A random tempory spot in PATTERN.  */
1091   const char *p1;
1092 
1093   /* Points to the end of the buffer, where we should append.  */
1094   register unsigned char *b;
1095   
1096   /* Keeps track of unclosed groups.  */
1097   compile_stack_type compile_stack;
1098 
1099   /* Points to the current (ending) position in the pattern.  */
1100   const char *p = pattern;
1101   const char *pend = pattern + size;
1102   
1103   /* How to translate the characters in the pattern.  */
1104   char *translate = bufp->translate;
1105 
1106   /* Address of the count-byte of the most recently inserted `exactn'
1107      command.  This makes it possible to tell if a new exact-match
1108      character can be added to that command or if the character requires
1109      a new `exactn' command.  */
1110   unsigned char *pending_exact = 0;
1111 
1112   /* Address of start of the most recently finished expression.
1113      This tells, e.g., postfix * where to find the start of its
1114      operand.  Reset at the beginning of groups and alternatives.  */
1115   unsigned char *laststart = 0;
1116 
1117   /* Address of beginning of regexp, or inside of last group.  */
1118   unsigned char *begalt;
1119 
1120   /* Place in the uncompiled pattern (i.e., the {) to
1121      which to go back if the interval is invalid.  */
1122   const char *beg_interval;
1123                 
1124   /* Address of the place where a forward jump should go to the end of
1125      the containing expression.  Each alternative of an `or' -- except the
1126      last -- ends with a forward jump of this sort.  */
1127   unsigned char *fixup_alt_jump = 0;
1128 
1129   /* Counts open-groups as they are encountered.  Remembered for the
1130      matching close-group on the compile stack, so the same register
1131      number is put in the stop_memory as the start_memory.  */
1132   regnum_t regnum = 0;
1133 
1134 #ifdef DEBUG
1135   DEBUG_PRINT1 ("\nCompiling pattern: ");
1136   if (debug)
1137     {
1138       unsigned debug_count;
1139       
1140       for (debug_count = 0; debug_count < size; debug_count++)
1141         printchar (pattern[debug_count]);
1142       putchar ('\n');
1143     }
1144 #endif /* DEBUG */
1145 
1146   /* Initialize the compile stack.  */
1147   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1148   if (compile_stack.stack == NULL)
1149     return REG_ESPACE;
1150 
1151   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1152   compile_stack.avail = 0;
1153 
1154   /* Initialize the pattern buffer.  */
1155   bufp->syntax = syntax;
1156   bufp->fastmap_accurate = 0;
1157   bufp->not_bol = bufp->not_eol = 0;
1158 
1159   /* Set `used' to zero, so that if we return an error, the pattern
1160      printer (for debugging) will think there's no pattern.  We reset it
1161      at the end.  */
1162   bufp->used = 0;
1163   
1164   /* Always count groups, whether or not bufp->no_sub is set.  */
1165   bufp->re_nsub = 0;                            
1166 
1167 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1168   /* Initialize the syntax table.  */
1169    init_syntax_once ();
1170 #endif
1171 
1172   if (bufp->allocated == 0)
1173     {
1174       if (bufp->buffer)
1175         { /* If zero allocated, but buffer is non-null, try to realloc
1176              enough space.  This loses if buffer's address is bogus, but
1177              that is the user's responsibility.  */
1178           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1179         }
1180       else
1181         { /* Caller did not allocate a buffer.  Do it for them.  */
1182           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1183         }
1184       if (!bufp->buffer) return REG_ESPACE;
1185 
1186       bufp->allocated = INIT_BUF_SIZE;
1187     }
1188 
1189   begalt = b = bufp->buffer;
1190 
1191   /* Loop through the uncompiled pattern until we're at the end.  */
1192   while (p != pend)
1193     {
1194       PATFETCH (c);
1195 
1196       switch (c)
1197         {
1198         case '^':
1199           {
1200             if (   /* If at start of pattern, it's an operator.  */
1201                    p == pattern + 1
1202                    /* If context independent, it's an operator.  */
1203                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1204                    /* Otherwise, depends on what's come before.  */
1205                 || at_begline_loc_p (pattern, p, syntax))
1206               BUF_PUSH (begline);
1207             else
1208               goto normal_char;
1209           }
1210           break;
1211 
1212 
1213         case '$':
1214           {
1215             if (   /* If at end of pattern, it's an operator.  */
1216                    p == pend 
1217                    /* If context independent, it's an operator.  */
1218                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1219                    /* Otherwise, depends on what's next.  */
1220                 || at_endline_loc_p (p, pend, syntax))
1221                BUF_PUSH (endline);
1222              else
1223                goto normal_char;
1224            }
1225            break;
1226 
1227 
1228         case '+':
1229         case '?':
1230           if ((syntax & RE_BK_PLUS_QM)
1231               || (syntax & RE_LIMITED_OPS))
1232             goto normal_char;
1233         handle_plus:
1234         case '*':
1235           /* If there is no previous pattern... */
1236           if (!laststart)
1237             {
1238               if (syntax & RE_CONTEXT_INVALID_OPS)
1239                 return REG_BADRPT;
1240               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1241                 goto normal_char;
1242             }
1243 
1244           {
1245             /* Are we optimizing this jump?  */
1246             boolean keep_string_p = false;
1247             
1248             /* 1 means zero (many) matches is allowed.  */
1249             char zero_times_ok = 0, many_times_ok = 0;
1250 
1251             /* If there is a sequence of repetition chars, collapse it
1252                down to just one (the right one).  We can't combine
1253                interval operators with these because of, e.g., `a{2}*',
1254                which should only match an even number of `a's.  */
1255 
1256             for (;;)
1257               {
1258                 zero_times_ok |= c != '+';
1259                 many_times_ok |= c != '?';
1260 
1261                 if (p == pend)
1262                   break;
1263 
1264                 PATFETCH (c);
1265 
1266                 if (c == '*'
1267                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1268                   ;
1269 
1270                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1271                   {
1272                     if (p == pend) return REG_EESCAPE;
1273 
1274                     PATFETCH (c1);
1275                     if (!(c1 == '+' || c1 == '?'))
1276                       {
1277                         PATUNFETCH;
1278                         PATUNFETCH;
1279                         break;
1280                       }
1281 
1282                     c = c1;
1283                   }
1284                 else
1285                   {
1286                     PATUNFETCH;
1287                     break;
1288                   }
1289 
1290                 /* If we get here, we found another repeat character.  */
1291                }
1292 
1293             /* Star, etc. applied to an empty pattern is equivalent
1294                to an empty pattern.  */
1295             if (!laststart)  
1296               break;
1297 
1298             /* Now we know whether or not zero matches is allowed
1299                and also whether or not two or more matches is allowed.  */
1300             if (many_times_ok)
1301               { /* More than one repetition is allowed, so put in at the
1302                    end a backward relative jump from `b' to before the next
1303                    jump we're going to put in below (which jumps from
1304                    laststart to after this jump).  
1305 
1306                    But if we are at the `*' in the exact sequence `.*\n',
1307                    insert an unconditional jump backwards to the .,
1308                    instead of the beginning of the loop.  This way we only
1309                    push a failure point once, instead of every time
1310                    through the loop.  */
1311                 assert (p - 1 > pattern);
1312 
1313                 /* Allocate the space for the jump.  */
1314                 GET_BUFFER_SPACE (3);
1315 
1316                 /* We know we are not at the first character of the pattern,
1317                    because laststart was nonzero.  And we've already
1318                    incremented `p', by the way, to be the character after
1319                    the `*'.  Do we have to do something analogous here
1320                    for null bytes, because of RE_DOT_NOT_NULL?  */
1321                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1322                     && zero_times_ok
1323                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1324                     && !(syntax & RE_DOT_NEWLINE))
1325                   { /* We have .*\n.  */
1326                     STORE_JUMP (jump, b, laststart);
1327                     keep_string_p = true;
1328                   }
1329                 else
1330                   /* Anything else.  */
1331                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1332 
1333                 /* We've added more stuff to the buffer.  */
1334                 b += 3;
1335               }
1336 
1337             /* On failure, jump from laststart to b + 3, which will be the
1338                end of the buffer after this jump is inserted.  */
1339             GET_BUFFER_SPACE (3);
1340             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1341                                        : on_failure_jump,
1342                          laststart, b + 3);
1343             pending_exact = 0;
1344             b += 3;
1345 
1346             if (!zero_times_ok)
1347               {
1348                 /* At least one repetition is required, so insert a
1349                    `dummy_failure_jump' before the initial
1350                    `on_failure_jump' instruction of the loop. This
1351                    effects a skip over that instruction the first time
1352                    we hit that loop.  */
1353                 GET_BUFFER_SPACE (3);
1354                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1355                 b += 3;
1356               }
1357             }
1358           break;
1359 
1360 
1361         case '.':
1362           laststart = b;
1363           BUF_PUSH (anychar);
1364           break;
1365 
1366 
1367         case '[':
1368           {
1369             boolean had_char_class = false;
1370 
1371             if (p == pend) return REG_EBRACK;
1372 
1373             /* Ensure that we have enough space to push a charset: the
1374                opcode, the length count, and the bitset; 34 bytes in all.  */
1375             GET_BUFFER_SPACE (34);
1376 
1377             laststart = b;
1378 
1379             /* We test `*p == '^' twice, instead of using an if
1380                statement, so we only need one BUF_PUSH.  */
1381             BUF_PUSH (*p == '^' ? charset_not : charset); 
1382             if (*p == '^')
1383               p++;
1384 
1385             /* Remember the first position in the bracket expression.  */
1386             p1 = p;
1387 
1388             /* Push the number of bytes in the bitmap.  */
1389             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1390 
1391             /* Clear the whole map.  */
1392             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1393 
1394             /* charset_not matches newline according to a syntax bit.  */
1395             if ((re_opcode_t) b[-2] == charset_not
1396                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1397               SET_LIST_BIT ('\n');
1398 
1399             /* Read in characters and ranges, setting map bits.  */
1400             for (;;)
1401               {
1402                 if (p == pend) return REG_EBRACK;
1403 
1404                 PATFETCH (c);
1405 
1406                 /* \ might escape characters inside [...] and [^...].  */
1407                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1408                   {
1409                     if (p == pend) return REG_EESCAPE;
1410 
1411                     PATFETCH (c1);
1412                     SET_LIST_BIT (c1);
1413                     continue;
1414                   }
1415 
1416                 /* Could be the end of the bracket expression.  If it's
1417                    not (i.e., when the bracket expression is `[]' so
1418                    far), the ']' character bit gets set way below.  */
1419                 if (c == ']' && p != p1 + 1)
1420                   break;
1421 
1422                 /* Look ahead to see if it's a range when the last thing
1423                    was a character class.  */
1424                 if (had_char_class && c == '-' && *p != ']')
1425                   return REG_ERANGE;
1426 
1427                 /* Look ahead to see if it's a range when the last thing
1428                    was a character: if this is a hyphen not at the
1429                    beginning or the end of a list, then it's the range
1430                    operator.  */
1431                 if (c == '-' 
1432                     && !(p - 2 >= pattern && p[-2] == '[') 
1433                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1434                     && *p != ']')
1435                   {
1436                     reg_errcode_t ret
1437                       = compile_range (&p, pend, translate, syntax, b);
1438                     if (ret != REG_NOERROR) return ret;
1439                   }
1440 
1441                 else if (p[0] == '-' && p[1] != ']')
1442                   { /* This handles ranges made up of characters only.  */
1443                     reg_errcode_t ret;
1444 
1445                     /* Move past the `-'.  */
1446                     PATFETCH (c1);
1447                     
1448                     ret = compile_range (&p, pend, translate, syntax, b);
1449                     if (ret != REG_NOERROR) return ret;
1450                   }
1451 
1452                 /* See if we're at the beginning of a possible character
1453                    class.  */
1454 
1455                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1456                   { /* Leave room for the null.  */
1457                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1458 
1459                     PATFETCH (c);
1460                     c1 = 0;
1461 
1462                     /* If pattern is `[[:'.  */
1463                     if (p == pend) return REG_EBRACK;
1464 
1465                     for (;;)
1466                       {
1467                         PATFETCH (c);
1468                         if (c == ':' || c == ']' || p == pend
1469                             || c1 == CHAR_CLASS_MAX_LENGTH)
1470                           break;
1471                         str[c1++] = c;
1472                       }
1473                     str[c1] = '\0';
1474 
1475                     /* If isn't a word bracketed by `[:' and:`]':
1476                        undo the ending character, the letters, and leave 
1477                        the leading `:' and `[' (but set bits for them).  */
1478                     if (c == ':' && *p == ']')
1479                       {
1480                         int ch;
1481                         boolean is_alnum = STREQ (str, "alnum");
1482                         boolean is_alpha = STREQ (str, "alpha");
1483                         boolean is_blank = STREQ (str, "blank");
1484                         boolean is_cntrl = STREQ (str, "cntrl");
1485                         boolean is_digit = STREQ (str, "digit");
1486                         boolean is_graph = STREQ (str, "graph");
1487                         boolean is_lower = STREQ (str, "lower");
1488                         boolean is_print = STREQ (str, "print");
1489                         boolean is_punct = STREQ (str, "punct");
1490                         boolean is_space = STREQ (str, "space");
1491                         boolean is_upper = STREQ (str, "upper");
1492                         boolean is_xdigit = STREQ (str, "xdigit");
1493                         
1494                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1495 
1496                         /* Throw away the ] at the end of the character
1497                            class.  */
1498                         PATFETCH (c);                                   
1499 
1500                         if (p == pend) return REG_EBRACK;
1501 
1502                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1503                           {
1504                             if (   (is_alnum  && ISALNUM (ch))
1505                                 || (is_alpha  && ISALPHA (ch))
1506                                 || (is_blank  && ISBLANK (ch))
1507                                 || (is_cntrl  && ISCNTRL (ch))
1508                                 || (is_digit  && ISDIGIT (ch))
1509                                 || (is_graph  && ISGRAPH (ch))
1510                                 || (is_lower  && ISLOWER (ch))
1511                                 || (is_print  && ISPRINT (ch))
1512                                 || (is_punct  && ISPUNCT (ch))
1513                                 || (is_space  && ISSPACE (ch))
1514                                 || (is_upper  && ISUPPER (ch))
1515                                 || (is_xdigit && ISXDIGIT (ch)))
1516                             SET_LIST_BIT (ch);
1517                           }
1518                         had_char_class = true;
1519                       }
1520                     else
1521                       {
1522                         c1++;
1523                         while (c1--)    
1524                           PATUNFETCH;
1525                         SET_LIST_BIT ('[');
1526                         SET_LIST_BIT (':');
1527                         had_char_class = false;
1528                       }
1529                   }
1530                 else
1531                   {
1532                     had_char_class = false;
1533                     SET_LIST_BIT (c);
1534                   }
1535               }
1536 
1537             /* Discard any (non)matching list bytes that are all 0 at the
1538                end of the map.  Decrease the map-length byte too.  */
1539             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 
1540               b[-1]--; 
1541             b += b[-1];
1542           }
1543           break;
1544 
1545 
1546         case '(':
1547           if (syntax & RE_NO_BK_PARENS)
1548             goto handle_open;
1549           else
1550             goto normal_char;
1551 
1552 
1553         case ')':
1554           if (syntax & RE_NO_BK_PARENS)
1555             goto handle_close;
1556           else
1557             goto normal_char;
1558 
1559 
1560         case '\n':
1561           if (syntax & RE_NEWLINE_ALT)
1562             goto handle_alt;
1563           else
1564             goto normal_char;
1565 
1566 
1567         case '|':
1568           if (syntax & RE_NO_BK_VBAR)
1569             goto handle_alt;
1570           else
1571             goto normal_char;
1572 
1573 
1574         case '{':
1575            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1576              goto handle_interval;
1577            else
1578              goto normal_char;
1579 
1580 
1581         case '\\':
1582           if (p == pend) return REG_EESCAPE;
1583 
1584           /* Do not translate the character after the \, so that we can
1585              distinguish, e.g., \B from \b, even if we normally would
1586              translate, e.g., B to b.  */
1587           PATFETCH_RAW (c);
1588 
1589           switch (c)
1590             {
1591             case '(':
1592               if (syntax & RE_NO_BK_PARENS)
1593                 goto normal_backslash;
1594 
1595             handle_open:
1596               bufp->re_nsub++;
1597               regnum++;
1598 
1599               if (COMPILE_STACK_FULL)
1600                 { 
1601                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1602                             compile_stack_elt_t);
1603                   if (compile_stack.stack == NULL) return REG_ESPACE;
1604 
1605                   compile_stack.size <<= 1;
1606                 }
1607 
1608               /* These are the values to restore when we hit end of this
1609                  group.  They are all relative offsets, so that if the
1610                  whole pattern moves because of realloc, they will still
1611                  be valid.  */
1612               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1613               COMPILE_STACK_TOP.fixup_alt_jump 
1614                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1615               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1616               COMPILE_STACK_TOP.regnum = regnum;
1617 
1618               /* We will eventually replace the 0 with the number of
1619                  groups inner to this one.  But do not push a
1620                  start_memory for groups beyond the last one we can
1621                  represent in the compiled pattern.  */
1622               if (regnum <= MAX_REGNUM)
1623                 {
1624                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1625                   BUF_PUSH_3 (start_memory, regnum, 0);
1626                 }
1627                 
1628               compile_stack.avail++;
1629 
1630               fixup_alt_jump = 0;
1631               laststart = 0;
1632               begalt = b;
1633               /* If we've reached MAX_REGNUM groups, then this open
1634                  won't actually generate any code, so we'll have to
1635                  clear pending_exact explicitly.  */
1636               pending_exact = 0;
1637               break;
1638 
1639 
1640             case ')':
1641               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1642 
1643               if (COMPILE_STACK_EMPTY) {
1644                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1645                   goto normal_backslash;
1646                 else
1647                   return REG_ERPAREN;
1648               }
1649 
1650             handle_close:
1651               if (fixup_alt_jump)
1652                 { /* Push a dummy failure point at the end of the
1653                      alternative for a possible future
1654                      `pop_failure_jump' to pop.  See comments at
1655                      `push_dummy_failure' in `re_match_2'.  */
1656                   BUF_PUSH (push_dummy_failure);
1657                   
1658                   /* We allocated space for this jump when we assigned
1659                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1660                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1661                 }
1662 
1663               /* See similar code for backslashed left paren above.  */
1664               if (COMPILE_STACK_EMPTY) {
1665                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1666                   goto normal_char;
1667                 else
1668                   return REG_ERPAREN;
1669               }
1670 
1671               /* Since we just checked for an empty stack above, this
1672                  ``can't happen''.  */
1673               assert (compile_stack.avail != 0);
1674               {
1675                 /* We don't just want to restore into `regnum', because
1676                    later groups should continue to be numbered higher,
1677                    as in `(ab)c(de)' -- the second group is #2.  */
1678                 regnum_t this_group_regnum;
1679 
1680                 compile_stack.avail--;          
1681                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1682                 fixup_alt_jump
1683                   = COMPILE_STACK_TOP.fixup_alt_jump
1684                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 
1685                     : 0;
1686                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1687                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1688                 /* If we've reached MAX_REGNUM groups, then this open
1689                    won't actually generate any code, so we'll have to
1690                    clear pending_exact explicitly.  */
1691                 pending_exact = 0;
1692 
1693                 /* We're at the end of the group, so now we know how many
1694                    groups were inside this one.  */
1695                 if (this_group_regnum <= MAX_REGNUM)
1696                   {
1697                     unsigned char *inner_group_loc
1698                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1699                     
1700                     *inner_group_loc = regnum - this_group_regnum;
1701                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1702                                 regnum - this_group_regnum);
1703                   }
1704               }
1705               break;
1706 
1707 
1708             case '|':                                   /* `\|'.  */
1709               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1710                 goto normal_backslash;
1711             handle_alt:
1712               if (syntax & RE_LIMITED_OPS)
1713                 goto normal_char;
1714 
1715               /* Insert before the previous alternative a jump which
1716                  jumps to this alternative if the former fails.  */
1717               GET_BUFFER_SPACE (3);
1718               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1719               pending_exact = 0;
1720               b += 3;
1721 
1722               /* The alternative before this one has a jump after it
1723                  which gets executed if it gets matched.  Adjust that
1724                  jump so it will jump to this alternative's analogous
1725                  jump (put in below, which in turn will jump to the next
1726                  (if any) alternative's such jump, etc.).  The last such
1727                  jump jumps to the correct final destination.  A picture:
1728                           _____ _____ 
1729                           |   | |   |   
1730                           |   v |   v 
1731                          a | b   | c   
1732 
1733                  If we are at `b', then fixup_alt_jump right now points to a
1734                  three-byte space after `a'.  We'll put in the jump, set
1735                  fixup_alt_jump to right after `b', and leave behind three
1736                  bytes which we'll fill in when we get to after `c'.  */
1737 
1738               if (fixup_alt_jump)
1739                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1740 
1741               /* Mark and leave space for a jump after this alternative,
1742                  to be filled in later either by next alternative or
1743                  when know we're at the end of a series of alternatives.  */
1744               fixup_alt_jump = b;
1745               GET_BUFFER_SPACE (3);
1746               b += 3;
1747 
1748               laststart = 0;
1749               begalt = b;
1750               break;
1751 
1752 
1753             case '{': 
1754               /* If \{ is a literal.  */
1755               if (!(syntax & RE_INTERVALS)
1756                      /* If we're at `\{' and it's not the open-interval 
1757                         operator.  */
1758                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1759                   || (p - 2 == pattern  &&  p == pend))
1760                 goto normal_backslash;
1761 
1762             handle_interval:
1763               {
1764                 /* If got here, then the syntax allows intervals.  */
1765 
1766                 /* At least (most) this many matches must be made.  */
1767                 int lower_bound = -1, upper_bound = -1;
1768 
1769                 beg_interval = p - 1;
1770 
1771                 if (p == pend)
1772                   {
1773                     if (syntax & RE_NO_BK_BRACES)
1774                       goto unfetch_interval;
1775                     else
1776                       return REG_EBRACE;
1777                   }
1778 
1779                 GET_UNSIGNED_NUMBER (lower_bound);
1780 
1781                 if (c == ',')
1782                   {
1783                     GET_UNSIGNED_NUMBER (upper_bound);
1784                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1785                   }
1786                 else
1787                   /* Interval such as `{1}' => match exactly once. */
1788                   upper_bound = lower_bound;
1789 
1790                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1791                     || lower_bound > upper_bound)
1792                   {
1793                     if (syntax & RE_NO_BK_BRACES)
1794                       goto unfetch_interval;
1795                     else 
1796                       return REG_BADBR;
1797                   }
1798 
1799                 if (!(syntax & RE_NO_BK_BRACES)) 
1800                   {
1801                     if (c != '\\') return REG_EBRACE;
1802 
1803                     PATFETCH (c);
1804                   }
1805 
1806                 if (c != '}')
1807                   {
1808                     if (syntax & RE_NO_BK_BRACES)
1809                       goto unfetch_interval;
1810                     else 
1811                       return REG_BADBR;
1812                   }
1813 
1814                 /* We just parsed a valid interval.  */
1815 
1816                 /* If it's invalid to have no preceding re.  */
1817                 if (!laststart)
1818                   {
1819                     if (syntax & RE_CONTEXT_INVALID_OPS)
1820                       return REG_BADRPT;
1821                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1822                       laststart = b;
1823                     else
1824                       goto unfetch_interval;
1825                   }
1826 
1827                 /* If the upper bound is zero, don't want to succeed at
1828                    all; jump from `laststart' to `b + 3', which will be
1829                    the end of the buffer after we insert the jump.  */
1830                  if (upper_bound == 0)
1831                    {
1832                      GET_BUFFER_SPACE (3);
1833                      INSERT_JUMP (jump, laststart, b + 3);
1834                      b += 3;
1835                    }
1836 
1837                  /* Otherwise, we have a nontrivial interval.  When
1838                     we're all done, the pattern will look like:
1839                       set_number_at <jump count> <upper bound>
1840                       set_number_at <succeed_n count> <lower bound>
1841                       succeed_n <after jump addr> <succed_n count>
1842                       <body of loop>
1843                       jump_n <succeed_n addr> <jump count>
1844                     (The upper bound and `jump_n' are omitted if
1845                     `upper_bound' is 1, though.)  */
1846                  else 
1847                    { /* If the upper bound is > 1, we need to insert
1848                         more at the end of the loop.  */
1849                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1850 
1851                      GET_BUFFER_SPACE (nbytes);
1852 
1853                      /* Initialize lower bound of the `succeed_n', even
1854                         though it will be set during matching by its
1855                         attendant `set_number_at' (inserted next),
1856                         because `re_compile_fastmap' needs to know.
1857                         Jump to the `jump_n' we might insert below.  */
1858                      INSERT_JUMP2 (succeed_n, laststart,
1859                                    b + 5 + (upper_bound > 1) * 5,
1860                                    lower_bound);
1861                      b += 5;
1862 
1863                      /* Code to initialize the lower bound.  Insert 
1864                         before the `succeed_n'.  The `5' is the last two
1865                         bytes of this `set_number_at', plus 3 bytes of
1866                         the following `succeed_n'.  */
1867                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1868                      b += 5;
1869 
1870                      if (upper_bound > 1)
1871                        { /* More than one repetition is allowed, so
1872                             append a backward jump to the `succeed_n'
1873                             that starts this interval.
1874                             
1875                             When we've reached this during matching,
1876                             we'll have matched the interval once, so
1877                             jump back only `upper_bound - 1' times.  */
1878                          STORE_JUMP2 (jump_n, b, laststart + 5,
1879                                       upper_bound - 1);
1880                          b += 5;
1881 
1882                          /* The location we want to set is the second
1883                             parameter of the `jump_n'; that is `b-2' as
1884                             an absolute address.  `laststart' will be
1885                             the `set_number_at' we're about to insert;
1886                             `laststart+3' the number to set, the source
1887                             for the relative address.  But we are
1888                             inserting into the middle of the pattern --
1889                             so everything is getting moved up by 5.
1890                             Conclusion: (b - 2) - (laststart + 3) + 5,
1891                             i.e., b - laststart.
1892                             
1893                             We insert this at the beginning of the loop
1894                             so that if we fail during matching, we'll
1895                             reinitialize the bounds.  */
1896                          insert_op2 (set_number_at, laststart, b - laststart,
1897                                      upper_bound - 1, b);
1898                          b += 5;
1899                        }
1900                    }
1901                 pending_exact = 0;
1902                 beg_interval = NULL;
1903               }
1904               break;
1905 
1906             unfetch_interval:
1907               /* If an invalid interval, match the characters as literals.  */
1908                assert (beg_interval);
1909                p = beg_interval;
1910                beg_interval = NULL;
1911 
1912                /* normal_char and normal_backslash need `c'.  */
1913                PATFETCH (c);    
1914 
1915                if (!(syntax & RE_NO_BK_BRACES))
1916                  {
1917                    if (p > pattern  &&  p[-1] == '\\')
1918                      goto normal_backslash;
1919                  }
1920                goto normal_char;
1921 
1922 #ifdef emacs
1923             /* There is no way to specify the before_dot and after_dot
1924                operators.  rms says this is ok.  --karl  */
1925             case '=':
1926               BUF_PUSH (at_dot);
1927               break;
1928 
1929             case 's':   
1930               laststart = b;
1931               PATFETCH (c);
1932               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1933               break;
1934 
1935             case 'S':
1936               laststart = b;
1937               PATFETCH (c);
1938               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1939               break;
1940 #endif /* emacs */
1941 
1942 
1943             case 'w':
1944               laststart = b;
1945               BUF_PUSH (wordchar);
1946               break;
1947 
1948 
1949             case 'W':
1950               laststart = b;
1951               BUF_PUSH (notwordchar);
1952               break;
1953 
1954 
1955             case '<':
1956               BUF_PUSH (wordbeg);
1957               break;
1958 
1959             case '>':
1960               BUF_PUSH (wordend);
1961               break;
1962 
1963             case 'b':
1964               BUF_PUSH (wordbound);
1965               break;
1966 
1967             case 'B':
1968               BUF_PUSH (notwordbound);
1969               break;
1970 
1971             case '`':
1972               BUF_PUSH (begbuf);
1973               break;
1974 
1975             case '\'':
1976               BUF_PUSH (endbuf);
1977               break;
1978 
1979             case '1': case '2': case '3': case '4': case '5':
1980             case '6': case '7': case '8': case '9':
1981               if (syntax & RE_NO_BK_REFS)
1982                 goto normal_char;
1983 
1984               c1 = c - '0';
1985 
1986               if (c1 > regnum)
1987                 return REG_ESUBREG;
1988 
1989               /* Can't back reference to a subexpression if inside of it.  */
1990               if (group_in_compile_stack (compile_stack, c1))
1991                 goto normal_char;
1992 
1993               laststart = b;
1994               BUF_PUSH_2 (duplicate, c1);
1995               break;
1996 
1997 
1998             case '+':
1999             case '?':
2000               if (syntax & RE_BK_PLUS_QM)
2001                 goto handle_plus;
2002               else
2003                 goto normal_backslash;
2004 
2005             default:
2006             normal_backslash:
2007               /* You might think it would be useful for \ to mean
2008                  not to translate; but if we don't translate it
2009                  it will never match anything.  */
2010               c = TRANSLATE (c);
2011               goto normal_char;
2012             }
2013           break;
2014 
2015 
2016         default:
2017         /* Expects the character in `c'.  */
2018         normal_char:
2019               /* If no exactn currently being built.  */
2020           if (!pending_exact 
2021 
2022               /* If last exactn not at current position.  */
2023               || pending_exact + *pending_exact + 1 != b
2024               
2025               /* We have only one byte following the exactn for the count.  */
2026               || *pending_exact == (1 << BYTEWIDTH) - 1
2027 
2028               /* If followed by a repetition operator.  */
2029               || *p == '*' || *p == '^'
2030               || ((syntax & RE_BK_PLUS_QM)
2031                   ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2032                   : (*p == '+' || *p == '?'))
2033               || ((syntax & RE_INTERVALS)
2034                   && ((syntax & RE_NO_BK_BRACES)
2035                       ? *p == '{'
2036                       : (p[0] == '\\' && p[1] == '{'))))
2037             {
2038               /* Start building a new exactn.  */
2039               
2040               laststart = b;
2041 
2042               BUF_PUSH_2 (exactn, 0);
2043               pending_exact = b - 1;
2044             }
2045             
2046           BUF_PUSH (c);
2047           (*pending_exact)++;
2048           break;
2049         } /* switch (c) */
2050     } /* while p != pend */
2051 
2052   
2053   /* Through the pattern now.  */
2054   
2055   if (fixup_alt_jump)
2056     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2057 
2058   if (!COMPILE_STACK_EMPTY) 
2059     return REG_EPAREN;
2060 
2061   free (compile_stack.stack);
2062 
2063   /* We have succeeded; set the length of the buffer.  */
2064   bufp->used = b - bufp->buffer;
2065 
2066 #ifdef DEBUG
2067   if (debug)
2068     {
2069       DEBUG_PRINT1 ("\nCompiled pattern: ");
2070       print_compiled_pattern (bufp);
2071     }
2072 #endif /* DEBUG */
2073 
2074   return REG_NOERROR;
2075 } /* regex_compile */
2076 
2077 /* Subroutines for `regex_compile'.  */
2078 
2079 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2080 
2081 static void
2082 store_op1 (op, loc, arg)
     /* [<][>][^][v][top][bottom][index][help] */
2083     re_opcode_t op;
2084     unsigned char *loc;
2085     int arg;
2086 {
2087   *loc = (unsigned char) op;
2088   STORE_NUMBER (loc + 1, arg);
2089 }
2090 
2091 
2092 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2093 
2094 static void
2095 store_op2 (op, loc, arg1, arg2)
     /* [<][>][^][v][top][bottom][index][help] */
2096     re_opcode_t op;
2097     unsigned char *loc;
2098     int arg1, arg2;
2099 {
2100   *loc = (unsigned char) op;
2101   STORE_NUMBER (loc + 1, arg1);
2102   STORE_NUMBER (loc + 3, arg2);
2103 }
2104 
2105 
2106 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2107    for OP followed by two-byte integer parameter ARG.  */
2108 
2109 static void
2110 insert_op1 (op, loc, arg, end)
     /* [<][>][^][v][top][bottom][index][help] */
2111     re_opcode_t op;
2112     unsigned char *loc;
2113     int arg;
2114     unsigned char *end;    
2115 {
2116   register unsigned char *pfrom = end;
2117   register unsigned char *pto = end + 3;
2118 
2119   while (pfrom != loc)
2120     *--pto = *--pfrom;
2121     
2122   store_op1 (op, loc, arg);
2123 }
2124 
2125 
2126 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2127 
2128 static void
2129 insert_op2 (op, loc, arg1, arg2, end)
     /* [<][>][^][v][top][bottom][index][help] */
2130     re_opcode_t op;
2131     unsigned char *loc;
2132     int arg1, arg2;
2133     unsigned char *end;    
2134 {
2135   register unsigned char *pfrom = end;
2136   register unsigned char *pto = end + 5;
2137 
2138   while (pfrom != loc)
2139     *--pto = *--pfrom;
2140     
2141   store_op2 (op, loc, arg1, arg2);
2142 }
2143 
2144 
2145 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2146    after an alternative or a begin-subexpression.  We assume there is at
2147    least one character before the ^.  */
2148 
2149 static boolean
2150 at_begline_loc_p (pattern, p, syntax)
     /* [<][>][^][v][top][bottom][index][help] */
2151     const char *pattern, *p;
2152     reg_syntax_t syntax;
2153 {
2154   const char *prev = p - 2;
2155   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2156   
2157   return
2158        /* After a subexpression?  */
2159        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2160        /* After an alternative?  */
2161     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2162 }
2163 
2164 
2165 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2166    at least one character after the $, i.e., `P < PEND'.  */
2167 
2168 static boolean
2169 at_endline_loc_p (p, pend, syntax)
     /* [<][>][^][v][top][bottom][index][help] */
2170     const char *p, *pend;
2171     int syntax;
2172 {
2173   const char *next = p;
2174   boolean next_backslash = *next == '\\';
2175   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2176   
2177   return
2178        /* Before a subexpression?  */
2179        (syntax & RE_NO_BK_PARENS ? *next == ')'
2180         : next_backslash && next_next && *next_next == ')')
2181        /* Before an alternative?  */
2182     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2183         : next_backslash && next_next && *next_next == '|');
2184 }
2185 
2186 
2187 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 
2188    false if it's not.  */
2189 
2190 static boolean
2191 group_in_compile_stack (compile_stack, regnum)
     /* [<][>][^][v][top][bottom][index][help] */
2192     compile_stack_type compile_stack;
2193     regnum_t regnum;
2194 {
2195   int this_element;
2196 
2197   for (this_element = compile_stack.avail - 1;  
2198        this_element >= 0; 
2199        this_element--)
2200     if (compile_stack.stack[this_element].regnum == regnum)
2201       return true;
2202 
2203   return false;
2204 }
2205 
2206 
2207 /* Read the ending character of a range (in a bracket expression) from the
2208    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2209    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2210    Then we set the translation of all bits between the starting and
2211    ending characters (inclusive) in the compiled pattern B.
2212    
2213    Return an error code.
2214    
2215    We use these short variable names so we can use the same macros as
2216    `regex_compile' itself.  */
2217 
2218 static reg_errcode_t
2219 compile_range (p_ptr, pend, translate, syntax, b)
     /* [<][>][^][v][top][bottom][index][help] */
2220     const char **p_ptr, *pend;
2221     char *translate;
2222     reg_syntax_t syntax;
2223     unsigned char *b;
2224 {
2225   unsigned this_char;
2226 
2227   const char *p = *p_ptr;
2228   int range_start, range_end;
2229   
2230   if (p == pend)
2231     return REG_ERANGE;
2232 
2233   /* Even though the pattern is a signed `char *', we need to fetch
2234      with unsigned char *'s; if the high bit of the pattern character
2235      is set, the range endpoints will be negative if we fetch using a
2236      signed char *.
2237 
2238      We also want to fetch the endpoints without translating them; the 
2239      appropriate translation is done in the bit-setting loop below.  */
2240   range_start = ((unsigned char *) p)[-2];
2241   range_end   = ((unsigned char *) p)[0];
2242 
2243   /* Have to increment the pointer into the pattern string, so the
2244      caller isn't still at the ending character.  */
2245   (*p_ptr)++;
2246 
2247   /* If the start is after the end, the range is empty.  */
2248   if (range_start > range_end)
2249     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2250 
2251   /* Here we see why `this_char' has to be larger than an `unsigned
2252      char' -- the range is inclusive, so if `range_end' == 0xff
2253      (assuming 8-bit characters), we would otherwise go into an infinite
2254      loop, since all characters <= 0xff.  */
2255   for (this_char = range_start; this_char <= range_end; this_char++)
2256     {
2257       SET_LIST_BIT (TRANSLATE (this_char));
2258     }
2259   
2260   return REG_NOERROR;
2261 }
2262 
2263 /* Failure stack declarations and macros; both re_compile_fastmap and
2264    re_match_2 use a failure stack.  These have to be macros because of
2265    REGEX_ALLOCATE.  */
2266    
2267 
2268 /* Number of failure points for which to initially allocate space
2269    when matching.  If this number is exceeded, we allocate more
2270    space, so it is not a hard limit.  */
2271 #ifndef INIT_FAILURE_ALLOC
2272 #define INIT_FAILURE_ALLOC 5
2273 #endif
2274 
2275 /* Roughly the maximum number of failure points on the stack.  Would be
2276    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2277    This is a variable only so users of regex can assign to it; we never
2278    change it ourselves.  */
2279 int re_max_failures = 2000;
2280 
2281 typedef const unsigned char *fail_stack_elt_t;
2282 
2283 typedef struct
2284 {
2285   fail_stack_elt_t *stack;
2286   unsigned size;
2287   unsigned avail;                       /* Offset of next open position.  */
2288 } fail_stack_type;
2289 
2290 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
     /* [<][>][^][v][top][bottom][index][help] */
2291 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
     /* [<][>][^][v][top][bottom][index][help] */
2292 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
     /* [<][>][^][v][top][bottom][index][help] */
2293 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
     /* [<][>][^][v][top][bottom][index][help] */
2294 
2295 
2296 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2297 
2298 #define INIT_FAIL_STACK()                                               \
     /* [<][>][^][v][top][bottom][index][help] */
2299   do {                                                                  \
2300     fail_stack.stack = (fail_stack_elt_t *)                             \
2301       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));  \
2302                                                                         \
2303     if (fail_stack.stack == NULL)                                       \
2304       return -2;                                                        \
2305                                                                         \
2306     fail_stack.size = INIT_FAILURE_ALLOC;                               \
2307     fail_stack.avail = 0;                                               \
2308   } while (0)
2309 
2310 
2311 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2312 
2313    Return 1 if succeeds, and 0 if either ran out of memory
2314    allocating space for it or it was already too large.  
2315    
2316    REGEX_REALLOCATE requires `destination' be declared.   */
2317 
2318 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
     /* [<][>][^][v][top][bottom][index][help] */
2319   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS              \
2320    ? 0                                                                  \
2321    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
2322         REGEX_REALLOCATE ((fail_stack).stack,                           \
2323           (fail_stack).size * sizeof (fail_stack_elt_t),                \
2324           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
2325                                                                         \
2326       (fail_stack).stack == NULL                                        \
2327       ? 0                                                               \
2328       : ((fail_stack).size <<= 1,                                       \
2329          1)))
2330 
2331 
2332 /* Push PATTERN_OP on FAIL_STACK. 
2333 
2334    Return 1 if was able to do so and 0 if ran out of memory allocating
2335    space to do so.  */
2336 #define PUSH_PATTERN_OP(pattern_op, fail_stack)                         \
     /* [<][>][^][v][top][bottom][index][help] */
2337   ((FAIL_STACK_FULL ()                                                  \
2338     && !DOUBLE_FAIL_STACK (fail_stack))                                 \
2339     ? 0                                                                 \
2340     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,           \
2341        1))
2342 
2343 /* This pushes an item onto the failure stack.  Must be a four-byte
2344    value.  Assumes the variable `fail_stack'.  Probably should only
2345    be called from within `PUSH_FAILURE_POINT'.  */
2346 #define PUSH_FAILURE_ITEM(item)                                         \
     /* [<][>][^][v][top][bottom][index][help] */
2347   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2348 
2349 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2350 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
     /* [<][>][^][v][top][bottom][index][help] */
2351 
2352 /* Used to omit pushing failure point id's when we're not debugging.  */
2353 #ifdef DEBUG
2354 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2355 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
     /* [<][>][^][v][top][bottom][index][help] */
2356 #else
2357 #define DEBUG_PUSH(item)
     /* [<][>][^][v][top][bottom][index][help] */
2358 #define DEBUG_POP(item_addr)
     /* [<][>][^][v][top][bottom][index][help] */
2359 #endif
2360 
2361 
2362 /* Push the information about the state we will need
2363    if we ever fail back to it.  
2364    
2365    Requires variables fail_stack, regstart, regend, reg_info, and
2366    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2367    declared.
2368    
2369    Does `return FAILURE_CODE' if runs out of memory.  */
2370 
2371 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
     /* [<][>][^][v][top][bottom][index][help] */
2372   do {                                                                  \
2373     char *destination;                                                  \
2374     /* Must be int, so when we don't save any registers, the arithmetic \
2375        of 0 + -1 isn't done as unsigned.  */                            \
2376     int this_reg;                                                       \
2377                                                                         \
2378     DEBUG_STATEMENT (failure_id++);                                     \
2379     DEBUG_STATEMENT (nfailure_points_pushed++);                         \
2380     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
2381     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2382     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2383                                                                         \
2384     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
2385     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
2386                                                                         \
2387     /* Ensure we have enough space allocated for what we will push.  */ \
2388     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
2389       {                                                                 \
2390         if (!DOUBLE_FAIL_STACK (fail_stack))                    \
2391           return failure_code;                                          \
2392                                                                         \
2393         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
2394                        (fail_stack).size);                              \
2395         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2396       }                                                                 \
2397                                                                         \
2398     /* Push the info, starting with the registers.  */                  \
2399     DEBUG_PRINT1 ("\n");                                                \
2400                                                                         \
2401     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
2402          this_reg++)                                                    \
2403       {                                                                 \
2404         DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
2405         DEBUG_STATEMENT (num_regs_pushed++);                            \
2406                                                                         \
2407         DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
2408         PUSH_FAILURE_ITEM (regstart[this_reg]);                         \
2409                                                                         \
2410         DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
2411         PUSH_FAILURE_ITEM (regend[this_reg]);                           \
2412                                                                         \
2413         DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
2414         DEBUG_PRINT2 (" match_null=%d",                                 \
2415                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));    \
2416         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));    \
2417         DEBUG_PRINT2 (" matched_something=%d",                          \
2418                       MATCHED_SOMETHING (reg_info[this_reg]));          \
2419         DEBUG_PRINT2 (" ever_matched=%d",                               \
2420                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));     \
2421         DEBUG_PRINT1 ("\n");                                            \
2422         PUSH_FAILURE_ITEM (reg_info[this_reg].word);                    \
2423       }                                                                 \
2424                                                                         \
2425     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2426     PUSH_FAILURE_ITEM (lowest_active_reg);                              \
2427                                                                         \
2428     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2429     PUSH_FAILURE_ITEM (highest_active_reg);                             \
2430                                                                         \
2431     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);           \
2432     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
2433     PUSH_FAILURE_ITEM (pattern_place);                                  \
2434                                                                         \
2435     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
2436     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2437                                  size2);                                \
2438     DEBUG_PRINT1 ("'\n");                                               \
2439     PUSH_FAILURE_ITEM (string_place);                                   \
2440                                                                         \
2441     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
2442     DEBUG_PUSH (failure_id);                                            \
2443   } while (0)
2444 
2445 /* This is the number of items that are pushed and popped on the stack
2446    for each register.  */
2447 #define NUM_REG_ITEMS  3
2448 
2449 /* Individual items aside from the registers.  */
2450 #ifdef DEBUG
2451 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2452 #else
2453 #define NUM_NONREG_ITEMS 4
2454 #endif
2455 
2456 /* We push at most this many items on the stack.  */
2457 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2458 
2459 /* We actually push this many items.  */
2460 #define NUM_FAILURE_ITEMS                                               \
2461   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
2462     + NUM_NONREG_ITEMS)
2463 
2464 /* How many items can still be added to the stack without overflowing it.  */
2465 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2466 
2467 
2468 /* Pops what PUSH_FAIL_STACK pushes.
2469 
2470    We restore into the parameters, all of which should be lvalues:
2471      STR -- the saved data position.
2472      PAT -- the saved pattern position.
2473      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2474      REGSTART, REGEND -- arrays of string positions.
2475      REG_INFO -- array of information about each subexpression.
2476    
2477    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2478    `pend', `string1', `size1', `string2', and `size2'.  */
2479 
2480 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
     /* [<][>][^][v][top][bottom][index][help] */
2481 {                                                                       \
2482   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
2483   int this_reg;                                                         \
2484   const unsigned char *string_temp;                                     \
2485                                                                         \
2486   assert (!FAIL_STACK_EMPTY ());                                        \
2487                                                                         \
2488   /* Remove failure points and point to how many regs pushed.  */       \
2489   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
2490   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
2491   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
2492                                                                         \
2493   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
2494                                                                         \
2495   DEBUG_POP (&failure_id);                                              \
2496   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
2497                                                                         \
2498   /* If the saved string location is NULL, it came from an              \
2499      on_failure_keep_string_jump opcode, and we want to throw away the  \
2500      saved NULL, thus retaining our current position in the string.  */ \
2501   string_temp = POP_FAILURE_ITEM ();                                    \
2502   if (string_temp != NULL)                                              \
2503     str = (const char *) string_temp;                                   \
2504                                                                         \
2505   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
2506   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
2507   DEBUG_PRINT1 ("'\n");                                                 \
2508                                                                         \
2509   pat = (unsigned char *) POP_FAILURE_ITEM ();                          \
2510   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                       \
2511   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
2512                                                                         \
2513   /* Restore register info.  */                                         \
2514   high_reg = (unsigned) POP_FAILURE_ITEM ();                            \
2515   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
2516                                                                         \
2517   low_reg = (unsigned) POP_FAILURE_ITEM ();                             \
2518   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
2519                                                                         \
2520   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)            \
2521     {                                                                   \
2522       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                 \
2523                                                                         \
2524       reg_info[this_reg].word = POP_FAILURE_ITEM ();                    \
2525       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);          \
2526                                                                         \
2527       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();            \
2528       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);             \
2529                                                                         \
2530       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();          \
2531       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);         \
2532     }                                                                   \
2533                                                                         \
2534   DEBUG_STATEMENT (nfailure_points_popped++);                           \
2535 } /* POP_FAILURE_POINT */
2536 
2537 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2538    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2539    characters can start a string that matches the pattern.  This fastmap
2540    is used by re_search to skip quickly over impossible starting points.
2541 
2542    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2543    area as BUFP->fastmap.
2544    
2545    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2546    the pattern buffer.
2547 
2548    Returns 0 if we succeed, -2 if an internal error.   */
2549 
2550 int
2551 re_compile_fastmap (bufp)
     /* [<][>][^][v][top][bottom][index][help] */
2552      struct re_pattern_buffer *bufp;
2553 {
2554   int j, k;
2555   fail_stack_type fail_stack;
2556 #ifndef REGEX_MALLOC
2557   char *destination;
2558 #endif
2559   /* We don't push any register information onto the failure stack.  */
2560   unsigned num_regs = 0;
2561   
2562   register char *fastmap = bufp->fastmap;
2563   unsigned char *pattern = bufp->buffer;
2564   unsigned long size = bufp->used;
2565   const unsigned char *p = pattern;
2566   register unsigned char *pend = pattern + size;
2567 
2568   /* Assume that each path through the pattern can be null until
2569      proven otherwise.  We set this false at the bottom of switch
2570      statement, to which we get only if a particular path doesn't
2571      match the empty string.  */
2572   boolean path_can_be_null = true;
2573 
2574   /* We aren't doing a `succeed_n' to begin with.  */
2575   boolean succeed_n_p = false;
2576 
2577   assert (fastmap != NULL && p != NULL);
2578   
2579   INIT_FAIL_STACK ();
2580   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2581   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
2582   bufp->can_be_null = 0;
2583       
2584   while (p != pend || !FAIL_STACK_EMPTY ())
2585     {
2586       if (p == pend)
2587         {
2588           bufp->can_be_null |= path_can_be_null;
2589           
2590           /* Reset for next path.  */
2591           path_can_be_null = true;
2592           
2593           p = fail_stack.stack[--fail_stack.avail];
2594         }
2595 
2596       /* We should never be about to go beyond the end of the pattern.  */
2597       assert (p < pend);
2598       
2599 #ifdef SWITCH_ENUM_BUG
2600       switch ((int) ((re_opcode_t) *p++))
2601 #else
2602       switch ((re_opcode_t) *p++)
2603 #endif
2604         {
2605 
2606         /* I guess the idea here is to simply not bother with a fastmap
2607            if a backreference is used, since it's too hard to figure out
2608            the fastmap for the corresponding group.  Setting
2609            `can_be_null' stops `re_search_2' from using the fastmap, so
2610            that is all we do.  */
2611         case duplicate:
2612           bufp->can_be_null = 1;
2613           return 0;
2614 
2615 
2616       /* Following are the cases which match a character.  These end
2617          with `break'.  */
2618 
2619         case exactn:
2620           fastmap[p[1]] = 1;
2621           break;
2622 
2623 
2624         case charset:
2625           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2626             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2627               fastmap[j] = 1;
2628           break;
2629 
2630 
2631         case charset_not:
2632           /* Chars beyond end of map must be allowed.  */
2633           for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2634             fastmap[j] = 1;
2635 
2636           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2637             if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2638               fastmap[j] = 1;
2639           break;
2640 
2641 
2642         case wordchar:
2643           for (j = 0; j < (1 << BYTEWIDTH); j++)
2644             if (SYNTAX (j) == Sword)
2645               fastmap[j] = 1;
2646           break;
2647 
2648 
2649         case notwordchar:
2650           for (j = 0; j < (1 << BYTEWIDTH); j++)
2651             if (SYNTAX (j) != Sword)
2652               fastmap[j] = 1;
2653           break;
2654 
2655 
2656         case anychar:
2657           /* `.' matches anything ...  */
2658           for (j = 0; j < (1 << BYTEWIDTH); j++)
2659             fastmap[j] = 1;
2660 
2661           /* ... except perhaps newline.  */
2662           if (!(bufp->syntax & RE_DOT_NEWLINE))
2663             fastmap['\n'] = 0;
2664 
2665           /* Return if we have already set `can_be_null'; if we have,
2666              then the fastmap is irrelevant.  Something's wrong here.  */
2667           else if (bufp->can_be_null)
2668             return 0;
2669 
2670           /* Otherwise, have to check alternative paths.  */
2671           break;
2672 
2673 
2674 #ifdef emacs
2675         case syntaxspec:
2676           k = *p++;
2677           for (j = 0; j < (1 << BYTEWIDTH); j++)
2678             if (SYNTAX (j) == (enum syntaxcode) k)
2679               fastmap[j] = 1;
2680           break;
2681 
2682 
2683         case notsyntaxspec:
2684           k = *p++;
2685           for (j = 0; j < (1 << BYTEWIDTH); j++)
2686             if (SYNTAX (j) != (enum syntaxcode) k)
2687               fastmap[j] = 1;
2688           break;
2689 
2690 
2691       /* All cases after this match the empty string.  These end with
2692          `continue'.  */
2693 
2694 
2695         case before_dot:
2696         case at_dot:
2697         case after_dot:
2698           continue;
2699 #endif /* not emacs */
2700 
2701 
2702         case no_op:
2703         case begline:
2704         case endline:
2705         case begbuf:
2706         case endbuf:
2707         case wordbound:
2708         case notwordbound:
2709         case wordbeg:
2710         case wordend:
2711         case push_dummy_failure:
2712           continue;
2713 
2714 
2715         case jump_n:
2716         case pop_failure_jump:
2717         case maybe_pop_jump:
2718         case jump:
2719         case jump_past_alt:
2720         case dummy_failure_jump:
2721           EXTRACT_NUMBER_AND_INCR (j, p);
2722           p += j;       
2723           if (j > 0)
2724             continue;
2725             
2726           /* Jump backward implies we just went through the body of a
2727              loop and matched nothing.  Opcode jumped to should be
2728              `on_failure_jump' or `succeed_n'.  Just treat it like an
2729              ordinary jump.  For a * loop, it has pushed its failure
2730              point already; if so, discard that as redundant.  */
2731           if ((re_opcode_t) *p != on_failure_jump
2732               && (re_opcode_t) *p != succeed_n)
2733             continue;
2734 
2735           p++;
2736           EXTRACT_NUMBER_AND_INCR (j, p);
2737           p += j;               
2738           
2739           /* If what's on the stack is where we are now, pop it.  */
2740           if (!FAIL_STACK_EMPTY () 
2741               && fail_stack.stack[fail_stack.avail - 1] == p)
2742             fail_stack.avail--;
2743 
2744           continue;
2745 
2746 
2747         case on_failure_jump:
2748         case on_failure_keep_string_jump:
2749         handle_on_failure_jump:
2750           EXTRACT_NUMBER_AND_INCR (j, p);
2751 
2752           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2753              end of the pattern.  We don't want to push such a point,
2754              since when we restore it above, entering the switch will
2755              increment `p' past the end of the pattern.  We don't need
2756              to push such a point since we obviously won't find any more
2757              fastmap entries beyond `pend'.  Such a pattern can match
2758              the null string, though.  */
2759           if (p + j < pend)
2760             {
2761               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2762                 return -2;
2763             }
2764           else
2765             bufp->can_be_null = 1;
2766 
2767           if (succeed_n_p)
2768             {
2769               EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
2770               succeed_n_p = false;
2771             }
2772 
2773           continue;
2774 
2775 
2776         case succeed_n:
2777           /* Get to the number of times to succeed.  */
2778           p += 2;               
2779 
2780           /* Increment p past the n for when k != 0.  */
2781           EXTRACT_NUMBER_AND_INCR (k, p);
2782           if (k == 0)
2783             {
2784               p -= 4;
2785               succeed_n_p = true;  /* Spaghetti code alert.  */
2786               goto handle_on_failure_jump;
2787             }
2788           continue;
2789 
2790 
2791         case set_number_at:
2792           p += 4;
2793           continue;
2794 
2795 
2796         case start_memory:
2797         case stop_memory:
2798           p += 2;
2799           continue;
2800 
2801 
2802         default:
2803           abort (); /* We have listed all the cases.  */
2804         } /* switch *p++ */
2805 
2806       /* Getting here means we have found the possible starting
2807          characters for one path of the pattern -- and that the empty
2808          string does not match.  We need not follow this path further.
2809          Instead, look at the next alternative (remembered on the
2810          stack), or quit if no more.  The test at the top of the loop
2811          does these things.  */
2812       path_can_be_null = false;
2813       p = pend;
2814     } /* while p */
2815 
2816   /* Set `can_be_null' for the last path (also the first path, if the
2817      pattern is empty).  */
2818   bufp->can_be_null |= path_can_be_null;
2819   return 0;
2820 } /* re_compile_fastmap */
2821 
2822 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2823    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2824    this memory for recording register information.  STARTS and ENDS
2825    must be allocated using the malloc library routine, and must each
2826    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2827 
2828    If NUM_REGS == 0, then subsequent matches should allocate their own
2829    register data.
2830 
2831    Unless this function is called, the first search or match using
2832    PATTERN_BUFFER will allocate its own register data, without
2833    freeing the old data.  */
2834 
2835 void
2836 re_set_registers (bufp, regs, num_regs, starts, ends)
     /* [<][>][^][v][top][bottom][index][help] */
2837     struct re_pattern_buffer *bufp;
2838     struct re_registers *regs;
2839     unsigned num_regs;
2840     regoff_t *starts, *ends;
2841 {
2842   if (num_regs)
2843     {
2844       bufp->regs_allocated = REGS_REALLOCATE;
2845       regs->num_regs = num_regs;
2846       regs->start = starts;
2847       regs->end = ends;
2848     }
2849   else
2850     {
2851       bufp->regs_allocated = REGS_UNALLOCATED;
2852       regs->num_regs = 0;
2853       regs->start = regs->end = (regoff_t) 0;
2854     }
2855 }
2856 
2857 /* Searching routines.  */
2858 
2859 /* Like re_search_2, below, but only one string is specified, and
2860    doesn't let you say where to stop matching. */
2861 
2862 int
2863 re_search (bufp, string, size, startpos, range, regs)
     /* [<][>][^][v][top][bottom][index][help] */
2864      struct re_pattern_buffer *bufp;
2865      const char *string;
2866      int size, startpos, range;
2867      struct re_registers *regs;
2868 {
2869   return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 
2870                       regs, size);
2871 }
2872 
2873 
2874 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2875    virtual concatenation of STRING1 and STRING2, starting first at index
2876    STARTPOS, then at STARTPOS + 1, and so on.
2877    
2878    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2879    
2880    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2881    only at STARTPOS; in general, the last start tried is STARTPOS +
2882    RANGE.
2883    
2884    In REGS, return the indices of the virtual concatenation of STRING1
2885    and STRING2 that matched the entire BUFP->buffer and its contained
2886    subexpressions.
2887    
2888    Do not consider matching one past the index STOP in the virtual
2889    concatenation of STRING1 and STRING2.
2890 
2891    We return either the position in the strings at which the match was
2892    found, -1 if no match, or -2 if error (such as failure
2893    stack overflow).  */
2894 
2895 int
2896 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
     /* [<][>][^][v][top][bottom][index][help] */
2897      struct re_pattern_buffer *bufp;
2898      const char *string1, *string2;
2899      int size1, size2;
2900      int startpos;
2901      int range;
2902      struct re_registers *regs;
2903      int stop;
2904 {
2905   int val;
2906   register char *fastmap = bufp->fastmap;
2907   register char *translate = bufp->translate;
2908   int total_size = size1 + size2;
2909   int endpos = startpos + range;
2910 
2911   /* Check for out-of-range STARTPOS.  */
2912   if (startpos < 0 || startpos > total_size)
2913     return -1;
2914     
2915   /* Fix up RANGE if it might eventually take us outside
2916      the virtual concatenation of STRING1 and STRING2.  */
2917   if (endpos < -1)
2918     range = -1 - startpos;
2919   else if (endpos > total_size)
2920     range = total_size - startpos;
2921 
2922   /* If the search isn't to be a backwards one, don't waste time in a
2923      search for a pattern that must be anchored.  */
2924   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2925     {
2926       if (startpos > 0)
2927         return -1;
2928       else
2929         range = 1;
2930     }
2931 
2932   /* Update the fastmap now if not correct already.  */
2933   if (fastmap && !bufp->fastmap_accurate)
2934     if (re_compile_fastmap (bufp) == -2)
2935       return -2;
2936   
2937   /* Loop through the string, looking for a place to start matching.  */
2938   for (;;)
2939     { 
2940       /* If a fastmap is supplied, skip quickly over characters that
2941          cannot be the start of a match.  If the pattern can match the
2942          null string, however, we don't need to skip characters; we want
2943          the first null string.  */
2944       if (fastmap && startpos < total_size && !bufp->can_be_null)
2945         {
2946           if (range > 0)        /* Searching forwards.  */
2947             {
2948               register const char *d;
2949               register int lim = 0;
2950               int irange = range;
2951 
2952               if (startpos < size1 && startpos + range >= size1)
2953                 lim = range - (size1 - startpos);
2954 
2955               d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2956    
2957               /* Written out as an if-else to avoid testing `translate'
2958                  inside the loop.  */
2959               if (translate)
2960                 while (range > lim
2961                        && !fastmap[(unsigned char)
2962                                    translate[(unsigned char) *d++]])
2963                   range--;
2964               else
2965                 while (range > lim && !fastmap[(unsigned char) *d++])
2966                   range--;
2967 
2968               startpos += irange - range;
2969             }
2970           else                          /* Searching backwards.  */
2971             {
2972               register char c = (size1 == 0 || startpos >= size1
2973                                  ? string2[startpos - size1] 
2974                                  : string1[startpos]);
2975 
2976               if (!fastmap[(unsigned char) TRANSLATE (c)])
2977                 goto advance;
2978             }
2979         }
2980 
2981       /* If can't match the null string, and that's all we have left, fail.  */
2982       if (range >= 0 && startpos == total_size && fastmap
2983           && !bufp->can_be_null)
2984         return -1;
2985 
2986       val = re_match_2 (bufp, string1, size1, string2, size2,
2987                         startpos, regs, stop);
2988       if (val >= 0)
2989         return startpos;
2990         
2991       if (val == -2)
2992         return -2;
2993 
2994     advance:
2995       if (!range) 
2996         break;
2997       else if (range > 0) 
2998         {
2999           range--; 
3000           startpos++;
3001         }
3002       else
3003         {
3004           range++; 
3005           startpos--;
3006         }
3007     }
3008   return -1;
3009 } /* re_search_2 */
3010 
3011 /* Declarations and macros for re_match_2.  */
3012 
3013 static int bcmp_translate ();
3014 static boolean alt_match_null_string_p (),
3015                common_op_match_null_string_p (),
3016                group_match_null_string_p ();
3017 
3018 /* Structure for per-register (a.k.a. per-group) information.
3019    This must not be longer than one word, because we push this value
3020    onto the failure stack.  Other register information, such as the
3021    starting and ending positions (which are addresses), and the list of
3022    inner groups (which is a bits list) are maintained in separate
3023    variables.  
3024    
3025    We are making a (strictly speaking) nonportable assumption here: that
3026    the compiler will pack our bit fields into something that fits into
3027    the type of `word', i.e., is something that fits into one item on the
3028    failure stack.  */
3029 typedef union
3030 {
3031   fail_stack_elt_t word;
3032   struct
3033   {
3034       /* This field is one if this group can match the empty string,
3035          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3036 #define MATCH_NULL_UNSET_VALUE 3
3037     unsigned match_null_string_p : 2;
3038     unsigned is_active : 1;
3039     unsigned matched_something : 1;
3040     unsigned ever_matched_something : 1;
3041   } bits;
3042 } register_info_type;
3043 
3044 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
     /* [<][>][^][v][top][bottom][index][help] */
3045 #define IS_ACTIVE(R)  ((R).bits.is_active)
     /* [<][>][^][v][top][bottom][index][help] */
3046 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
     /* [<][>][^][v][top][bottom][index][help] */
3047 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
     /* [<][>][^][v][top][bottom][index][help] */
3048 
3049 
3050 /* Call this when have matched a real character; it sets `matched' flags
3051    for the subexpressions which we are currently inside.  Also records
3052    that those subexprs have matched.  */
3053 #define SET_REGS_MATCHED()                                              \
     /* [<][>][^][v][top][bottom][index][help] */
3054   do                                                                    \
3055     {                                                                   \
3056       unsigned r;                                                       \
3057       for (r = lowest_active_reg; r <= highest_active_reg; r++)         \
3058         {                                                               \
3059           MATCHED_SOMETHING (reg_info[r])                               \
3060             = EVER_MATCHED_SOMETHING (reg_info[r])                      \
3061             = 1;                                                        \
3062         }                                                               \
3063     }                                                                   \
3064   while (0)
3065 
3066 
3067 /* This converts PTR, a pointer into one of the search strings `string1'
3068    and `string2' into an offset from the beginning of that string.  */
3069 #define POINTER_TO_OFFSET(ptr)                                          \
     /* [<][>][^][v][top][bottom][index][help] */
3070   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3071 
3072 /* Registers are set to a sentinel when they haven't yet matched.  */
3073 #define REG_UNSET_VALUE ((char *) -1)
3074 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
     /* [<][>][^][v][top][bottom][index][help] */
3075 
3076 
3077 /* Macros for dealing with the split strings in re_match_2.  */
3078 
3079 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3080 
3081 /* Call before fetching a character with *d.  This switches over to
3082    string2 if necessary.  */
3083 #define PREFETCH()                                                      \
     /* [<][>][^][v][top][bottom][index][help] */
3084   while (d == dend)                                                     \
3085     {                                                                   \
3086       /* End of string2 => fail.  */                                    \
3087       if (dend == end_match_2)                                          \
3088         goto fail;                                                      \
3089       /* End of string1 => advance to string2.  */                      \
3090       d = string2;                                                      \
3091       dend = end_match_2;                                               \
3092     }
3093 
3094 
3095 /* Test if at very beginning or at very end of the virtual concatenation
3096    of `string1' and `string2'.  If only one string, it's `string2'.  */
3097 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
     /* [<][>][^][v][top][bottom][index][help] */
3098 #define AT_STRINGS_END(d) ((d) == end2) 
     /* [<][>][^][v][top][bottom][index][help] */
3099 
3100 
3101 /* Test if D points to a character which is word-constituent.  We have
3102    two special cases to check for: if past the end of string1, look at
3103    the first character in string2; and if before the beginning of
3104    string2, look at the last character in string1.  */
3105 #define WORDCHAR_P(d)                                                   \
     /* [<][>][^][v][top][bottom][index][help] */
3106   (SYNTAX ((d) == end1 ? *string2                                       \
3107            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3108    == Sword)
3109 
3110 /* Test if the character before D and the one at D differ with respect
3111    to being word-constituent.  */
3112 #define AT_WORD_BOUNDARY(d)                                             \
     /* [<][>][^][v][top][bottom][index][help] */
3113   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3114    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3115 
3116 
3117 /* Free everything we malloc.  */
3118 #ifdef REGEX_MALLOC
3119 #define FREE_VAR(var) if (var) free (var); var = NULL
     /* [<][>][^][v][top][bottom][index][help] */
3120 #define FREE_VARIABLES()                                                \
     /* [<][>][^][v][top][bottom][index][help] */
3121   do {                                                                  \
3122     FREE_VAR (fail_stack.stack);                                        \
3123     FREE_VAR (regstart);                                                \
3124     FREE_VAR (regend);                                                  \
3125     FREE_VAR (old_regstart);                                            \
3126     FREE_VAR (old_regend);                                              \
3127     FREE_VAR (best_regstart);                                           \
3128     FREE_VAR (best_regend);                                             \
3129     FREE_VAR (reg_info);                                                \
3130     FREE_VAR (reg_dummy);                                               \
3131     FREE_VAR (reg_info_dummy);                                          \
3132   } while (0)
3133 #else /* not REGEX_MALLOC */
3134 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3135 #define FREE_VARIABLES() alloca (0)
     /* [<][>][^][v][top][bottom][index][help] */
3136 #endif /* not REGEX_MALLOC */
3137 
3138 
3139 /* These values must meet several constraints.  They must not be valid
3140    register values; since we have a limit of 255 registers (because
3141    we use only one byte in the pattern for the register number), we can
3142    use numbers larger than 255.  They must differ by 1, because of
3143    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3144    be larger than the value for the highest register, so we do not try
3145    to actually save any registers when none are active.  */
3146 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3147 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3148 
3149 /* Matching routines.  */
3150 
3151 #ifndef emacs   /* Emacs never uses this.  */
3152 /* re_match is like re_match_2 except it takes only a single string.  */
3153 
3154 int
3155 re_match (bufp, string, size, pos, regs)
     /* [<][>][^][v][top][bottom][index][help] */
3156      struct re_pattern_buffer *bufp;
3157      const char *string;
3158      int size, pos;
3159      struct re_registers *regs;
3160  {
3161   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); 
3162 }
3163 #endif /* not emacs */
3164 
3165 
3166 /* re_match_2 matches the compiled pattern in BUFP against the
3167    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3168    and SIZE2, respectively).  We start matching at POS, and stop
3169    matching at STOP.
3170    
3171    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3172    store offsets for the substring each group matched in REGS.  See the
3173    documentation for exactly how many groups we fill.
3174 
3175    We return -1 if no match, -2 if an internal error (such as the
3176    failure stack overflowing).  Otherwise, we return the length of the
3177    matched substring.  */
3178 
3179 int
3180 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
     /* [<][>][^][v][top][bottom][index][help] */
3181      struct re_pattern_buffer *bufp;
3182      const char *string1, *string2;
3183      int size1, size2;
3184      int pos;
3185      struct re_registers *regs;
3186      int stop;
3187 {
3188   /* General temporaries.  */
3189   int mcnt;
3190   unsigned char *p1;
3191 
3192   /* Just past the end of the corresponding string.  */
3193   const char *end1, *end2;
3194 
3195   /* Pointers into string1 and string2, just past the last characters in
3196      each to consider matching.  */
3197   const char *end_match_1, *end_match_2;
3198 
3199   /* Where we are in the data, and the end of the current string.  */
3200   const char *d, *dend;
3201   
3202   /* Where we are in the pattern, and the end of the pattern.  */
3203   unsigned char *p = bufp->buffer;
3204   register unsigned char *pend = p + bufp->used;
3205 
3206   /* We use this to map every character in the string.  */
3207   char *translate = bufp->translate;
3208 
3209   /* Failure point stack.  Each place that can handle a failure further
3210      down the line pushes a failure point on this stack.  It consists of
3211      restart, regend, and reg_info for all registers corresponding to
3212      the subexpressions we're currently inside, plus the number of such
3213      registers, and, finally, two char *'s.  The first char * is where
3214      to resume scanning the pattern; the second one is where to resume
3215      scanning the strings.  If the latter is zero, the failure point is
3216      a ``dummy''; if a failure happens and the failure point is a dummy,
3217      it gets discarded and the next next one is tried.  */
3218   fail_stack_type fail_stack;
3219 #ifdef DEBUG
3220   static unsigned failure_id = 0;
3221   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3222 #endif
3223 
3224   /* We fill all the registers internally, independent of what we
3225      return, for use in backreferences.  The number here includes
3226      an element for register zero.  */
3227   unsigned num_regs = bufp->re_nsub + 1;
3228   
3229   /* The currently active registers.  */
3230   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3231   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3232 
3233   /* Information on the contents of registers. These are pointers into
3234      the input strings; they record just what was matched (on this
3235      attempt) by a subexpression part of the pattern, that is, the
3236      regnum-th regstart pointer points to where in the pattern we began
3237      matching and the regnum-th regend points to right after where we
3238      stopped matching the regnum-th subexpression.  (The zeroth register
3239      keeps track of what the whole pattern matches.)  */
3240   const char **regstart, **regend;
3241 
3242   /* If a group that's operated upon by a repetition operator fails to
3243      match anything, then the register for its start will need to be
3244      restored because it will have been set to wherever in the string we
3245      are when we last see its open-group operator.  Similarly for a
3246      register's end.  */
3247   const char **old_regstart, **old_regend;
3248 
3249   /* The is_active field of reg_info helps us keep track of which (possibly
3250      nested) subexpressions we are currently in. The matched_something
3251      field of reg_info[reg_num] helps us tell whether or not we have
3252      matched any of the pattern so far this time through the reg_num-th
3253      subexpression.  These two fields get reset each time through any
3254      loop their register is in.  */
3255   register_info_type *reg_info; 
3256 
3257   /* The following record the register info as found in the above
3258      variables when we find a match better than any we've seen before. 
3259      This happens as we backtrack through the failure points, which in
3260      turn happens only if we have not yet matched the entire string. */
3261   unsigned best_regs_set = false;
3262   const char **best_regstart, **best_regend;
3263   
3264   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3265      allocate space for that if we're not allocating space for anything
3266      else (see below).  Also, we never need info about register 0 for
3267      any of the other register vectors, and it seems rather a kludge to
3268      treat `best_regend' differently than the rest.  So we keep track of
3269      the end of the best match so far in a separate variable.  We
3270      initialize this to NULL so that when we backtrack the first time
3271      and need to test it, it's not garbage.  */
3272   const char *match_end = NULL;
3273 
3274   /* Used when we pop values we don't care about.  */
3275   const char **reg_dummy;
3276   register_info_type *reg_info_dummy;
3277 
3278 #ifdef DEBUG
3279   /* Counts the total number of registers pushed.  */
3280   unsigned num_regs_pushed = 0;         
3281 #endif
3282 
3283   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3284   
3285   INIT_FAIL_STACK ();
3286   
3287   /* Do not bother to initialize all the register variables if there are
3288      no groups in the pattern, as it takes a fair amount of time.  If
3289      there are groups, we include space for register 0 (the whole
3290      pattern), even though we never use it, since it simplifies the
3291      array indexing.  We should fix this.  */
3292   if (bufp->re_nsub)
3293     {
3294       regstart = REGEX_TALLOC (num_regs, const char *);
3295       regend = REGEX_TALLOC (num_regs, const char *);
3296       old_regstart = REGEX_TALLOC (num_regs, const char *);
3297       old_regend = REGEX_TALLOC (num_regs, const char *);
3298       best_regstart = REGEX_TALLOC (num_regs, const char *);
3299       best_regend = REGEX_TALLOC (num_regs, const char *);
3300       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3301       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3302       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3303 
3304       if (!(regstart && regend && old_regstart && old_regend && reg_info 
3305             && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 
3306         {
3307           FREE_VARIABLES ();
3308           return -2;
3309         }
3310     }
3311 #ifdef REGEX_MALLOC
3312   else
3313     {
3314       /* We must initialize all our variables to NULL, so that
3315          `FREE_VARIABLES' doesn't try to free them.  */
3316       regstart = regend = old_regstart = old_regend = best_regstart
3317         = best_regend = reg_dummy = NULL;
3318       reg_info = reg_info_dummy = (register_info_type *) NULL;
3319     }
3320 #endif /* REGEX_MALLOC */
3321 
3322   /* The starting position is bogus.  */
3323   if (pos < 0 || pos > size1 + size2)
3324     {
3325       FREE_VARIABLES ();
3326       return -1;
3327     }
3328     
3329   /* Initialize subexpression text positions to -1 to mark ones that no
3330      start_memory/stop_memory has been seen for. Also initialize the
3331      register information struct.  */
3332   for (mcnt = 1; mcnt < num_regs; mcnt++)
3333     {
3334       regstart[mcnt] = regend[mcnt] 
3335         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3336         
3337       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3338       IS_ACTIVE (reg_info[mcnt]) = 0;
3339       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3340       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3341     }
3342   
3343   /* We move `string1' into `string2' if the latter's empty -- but not if
3344      `string1' is null.  */
3345   if (size2 == 0 && string1 != NULL)
3346     {
3347       string2 = string1;
3348       size2 = size1;
3349       string1 = 0;
3350       size1 = 0;
3351     }
3352   end1 = string1 + size1;
3353   end2 = string2 + size2;
3354 
3355   /* Compute where to stop matching, within the two strings.  */
3356   if (stop <= size1)
3357     {
3358       end_match_1 = string1 + stop;
3359       end_match_2 = string2;
3360     }
3361   else
3362     {
3363       end_match_1 = end1;
3364       end_match_2 = string2 + stop - size1;
3365     }
3366 
3367   /* `p' scans through the pattern as `d' scans through the data. 
3368      `dend' is the end of the input string that `d' points within.  `d'
3369      is advanced into the following input string whenever necessary, but
3370      this happens before fetching; therefore, at the beginning of the
3371      loop, `d' can be pointing at the end of a string, but it cannot
3372      equal `string2'.  */
3373   if (size1 > 0 && pos <= size1)
3374     {
3375       d = string1 + pos;
3376       dend = end_match_1;
3377     }
3378   else
3379     {
3380       d = string2 + pos - size1;
3381       dend = end_match_2;
3382     }
3383 
3384   DEBUG_PRINT1 ("The compiled pattern is: ");
3385   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3386   DEBUG_PRINT1 ("The string to match is: `");
3387   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3388   DEBUG_PRINT1 ("'\n");
3389   
3390   /* This loops over pattern commands.  It exits by returning from the
3391      function if the match is complete, or it drops through if the match
3392      fails at this starting point in the input data.  */
3393   for (;;)
3394     {
3395       DEBUG_PRINT2 ("\n0x%x: ", p);
3396 
3397       if (p == pend)
3398         { /* End of pattern means we might have succeeded.  */
3399           DEBUG_PRINT1 ("end of pattern ... ");
3400           
3401           /* If we haven't matched the entire string, and we want the
3402              longest match, try backtracking.  */
3403           if (d != end_match_2)
3404             {
3405               DEBUG_PRINT1 ("backtracking.\n");
3406               
3407               if (!FAIL_STACK_EMPTY ())
3408                 { /* More failure points to try.  */
3409                   boolean same_str_p = (FIRST_STRING_P (match_end) 
3410                                         == MATCHING_IN_FIRST_STRING);
3411 
3412                   /* If exceeds best match so far, save it.  */
3413                   if (!best_regs_set
3414                       || (same_str_p && d > match_end)
3415                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3416                     {
3417                       best_regs_set = true;
3418                       match_end = d;
3419                       
3420                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3421                       
3422                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3423                         {
3424                           best_regstart[mcnt] = regstart[mcnt];
3425                           best_regend[mcnt] = regend[mcnt];
3426                         }
3427                     }
3428                   goto fail;           
3429                 }
3430 
3431               /* If no failure points, don't restore garbage.  */
3432               else if (best_regs_set)   
3433                 {
3434                 restore_best_regs:
3435                   /* Restore best match.  It may happen that `dend ==
3436                      end_match_1' while the restored d is in string2.
3437                      For example, the pattern `x.*y.*z' against the
3438                      strings `x-' and `y-z-', if the two strings are
3439                      not consecutive in memory.  */
3440                   DEBUG_PRINT1 ("Restoring best registers.\n");
3441                   
3442                   d = match_end;
3443                   dend = ((d >= string1 && d <= end1)
3444                            ? end_match_1 : end_match_2);
3445 
3446                   for (mcnt = 1; mcnt < num_regs; mcnt++)
3447                     {
3448                       regstart[mcnt] = best_regstart[mcnt];
3449                       regend[mcnt] = best_regend[mcnt];
3450                     }
3451                 }
3452             } /* d != end_match_2 */
3453 
3454           DEBUG_PRINT1 ("Accepting match.\n");
3455 
3456           /* If caller wants register contents data back, do it.  */
3457           if (regs && !bufp->no_sub)
3458             {
3459               /* Have the register data arrays been allocated?  */
3460               if (bufp->regs_allocated == REGS_UNALLOCATED)
3461                 { /* No.  So allocate them with malloc.  We need one
3462                      extra element beyond `num_regs' for the `-1' marker
3463                      GNU code uses.  */
3464                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3465                   regs->start = TALLOC (regs->num_regs, regoff_t);
3466                   regs->end = TALLOC (regs->num_regs, regoff_t);
3467                   if (regs->start == NULL || regs->end == NULL)
3468                     return -2;
3469                   bufp->regs_allocated = REGS_REALLOCATE;
3470                 }
3471               else if (bufp->regs_allocated == REGS_REALLOCATE)
3472                 { /* Yes.  If we need more elements than were already
3473                      allocated, reallocate them.  If we need fewer, just
3474                      leave it alone.  */
3475                   if (regs->num_regs < num_regs + 1)
3476                     {
3477                       regs->num_regs = num_regs + 1;
3478                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3479                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3480                       if (regs->start == NULL || regs->end == NULL)
3481                         return -2;
3482                     }
3483                 }
3484               else
3485                 assert (bufp->regs_allocated == REGS_FIXED);
3486 
3487               /* Convert the pointer data in `regstart' and `regend' to
3488                  indices.  Register zero has to be set differently,
3489                  since we haven't kept track of any info for it.  */
3490               if (regs->num_regs > 0)
3491                 {
3492                   regs->start[0] = pos;
3493                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3494                                   : d - string2 + size1);
3495                 }
3496               
3497               /* Go through the first `min (num_regs, regs->num_regs)'
3498                  registers, since that is all we initialized.  */
3499               for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3500                 {
3501                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3502                     regs->start[mcnt] = regs->end[mcnt] = -1;
3503                   else
3504                     {
3505                       regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3506                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3507                     }
3508                 }
3509               
3510               /* If the regs structure we return has more elements than
3511                  were in the pattern, set the extra elements to -1.  If
3512                  we (re)allocated the registers, this is the case,
3513                  because we always allocate enough to have at least one
3514                  -1 at the end.  */
3515               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3516                 regs->start[mcnt] = regs->end[mcnt] = -1;
3517             } /* regs && !bufp->no_sub */
3518 
3519           FREE_VARIABLES ();
3520           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3521                         nfailure_points_pushed, nfailure_points_popped,
3522                         nfailure_points_pushed - nfailure_points_popped);
3523           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3524 
3525           mcnt = d - pos - (MATCHING_IN_FIRST_STRING 
3526                             ? string1 
3527                             : string2 - size1);
3528 
3529           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3530 
3531           return mcnt;
3532         }
3533 
3534       /* Otherwise match next pattern command.  */
3535 #ifdef SWITCH_ENUM_BUG
3536       switch ((int) ((re_opcode_t) *p++))
3537 #else
3538       switch ((re_opcode_t) *p++)
3539 #endif
3540         {
3541         /* Ignore these.  Used to ignore the n of succeed_n's which
3542            currently have n == 0.  */
3543         case no_op:
3544           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3545           break;
3546 
3547 
3548         /* Match the next n pattern characters exactly.  The following
3549            byte in the pattern defines n, and the n bytes after that
3550            are the characters to match.  */
3551         case exactn:
3552           mcnt = *p++;
3553           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3554 
3555           /* This is written out as an if-else so we don't waste time
3556              testing `translate' inside the loop.  */
3557           if (translate)
3558             {
3559               do
3560                 {
3561                   PREFETCH ();
3562                   if (translate[(unsigned char) *d++] != (char) *p++)
3563                     goto fail;
3564                 }
3565               while (--mcnt);
3566             }
3567           else
3568             {
3569               do
3570                 {
3571                   PREFETCH ();
3572                   if (*d++ != (char) *p++) goto fail;
3573                 }
3574               while (--mcnt);
3575             }
3576           SET_REGS_MATCHED ();
3577           break;
3578 
3579 
3580         /* Match any character except possibly a newline or a null.  */
3581         case anychar:
3582           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3583 
3584           PREFETCH ();
3585 
3586           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3587               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3588             goto fail;
3589 
3590           SET_REGS_MATCHED ();
3591           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3592           d++;
3593           break;
3594 
3595 
3596         case charset:
3597         case charset_not:
3598           {
3599             register unsigned char c;
3600             boolean not = (re_opcode_t) *(p - 1) == charset_not;
3601 
3602             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3603 
3604             PREFETCH ();
3605             c = TRANSLATE (*d); /* The character to match.  */
3606 
3607             /* Cast to `unsigned' instead of `unsigned char' in case the
3608                bit list is a full 32 bytes long.  */
3609             if (c < (unsigned) (*p * BYTEWIDTH)
3610                 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3611               not = !not;
3612 
3613             p += 1 + *p;
3614 
3615             if (!not) goto fail;
3616             
3617             SET_REGS_MATCHED ();
3618             d++;
3619             break;
3620           }
3621 
3622 
3623         /* The beginning of a group is represented by start_memory.
3624            The arguments are the register number in the next byte, and the
3625            number of groups inner to this one in the next.  The text
3626            matched within the group is recorded (in the internal
3627            registers data structure) under the register number.  */
3628         case start_memory:
3629           DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3630 
3631           /* Find out if this group can match the empty string.  */
3632           p1 = p;               /* To send to group_match_null_string_p.  */
3633           
3634           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3635             REG_MATCH_NULL_STRING_P (reg_info[*p]) 
3636               = group_match_null_string_p (&p1, pend, reg_info);
3637 
3638           /* Save the position in the string where we were the last time
3639              we were at this open-group operator in case the group is
3640              operated upon by a repetition operator, e.g., with `(a*)*b'
3641              against `ab'; then we want to ignore where we are now in
3642              the string in case this attempt to match fails.  */
3643           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3644                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3645                              : regstart[*p];
3646           DEBUG_PRINT2 ("  old_regstart: %d\n", 
3647                          POINTER_TO_OFFSET (old_regstart[*p]));
3648 
3649           regstart[*p] = d;
3650           DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3651 
3652           IS_ACTIVE (reg_info[*p]) = 1;
3653           MATCHED_SOMETHING (reg_info[*p]) = 0;
3654           
3655           /* This is the new highest active register.  */
3656           highest_active_reg = *p;
3657           
3658           /* If nothing was active before, this is the new lowest active
3659              register.  */
3660           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3661             lowest_active_reg = *p;
3662 
3663           /* Move past the register number and inner group count.  */
3664           p += 2;
3665           break;
3666 
3667 
3668         /* The stop_memory opcode represents the end of a group.  Its
3669            arguments are the same as start_memory's: the register
3670            number, and the number of inner groups.  */
3671         case stop_memory:
3672           DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3673              
3674           /* We need to save the string position the last time we were at
3675              this close-group operator in case the group is operated
3676              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3677              against `aba'; then we want to ignore where we are now in
3678              the string in case this attempt to match fails.  */
3679           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3680                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3681                            : regend[*p];
3682           DEBUG_PRINT2 ("      old_regend: %d\n", 
3683                          POINTER_TO_OFFSET (old_regend[*p]));
3684 
3685           regend[*p] = d;
3686           DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3687 
3688           /* This register isn't active anymore.  */
3689           IS_ACTIVE (reg_info[*p]) = 0;
3690           
3691           /* If this was the only register active, nothing is active
3692              anymore.  */
3693           if (lowest_active_reg == highest_active_reg)
3694             {
3695               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3696               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3697             }
3698           else
3699             { /* We must scan for the new highest active register, since
3700                  it isn't necessarily one less than now: consider
3701                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3702                  new highest active register is 1.  */
3703               unsigned char r = *p - 1;
3704               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3705                 r--;
3706               
3707               /* If we end up at register zero, that means that we saved
3708                  the registers as the result of an `on_failure_jump', not
3709                  a `start_memory', and we jumped to past the innermost
3710                  `stop_memory'.  For example, in ((.)*) we save
3711                  registers 1 and 2 as a result of the *, but when we pop
3712                  back to the second ), we are at the stop_memory 1.
3713                  Thus, nothing is active.  */
3714               if (r == 0)
3715                 {
3716                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3717                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3718                 }
3719               else
3720                 highest_active_reg = r;
3721             }
3722           
3723           /* If just failed to match something this time around with a
3724              group that's operated on by a repetition operator, try to
3725              force exit from the ``loop'', and restore the register
3726              information for this group that we had before trying this
3727              last match.  */
3728           if ((!MATCHED_SOMETHING (reg_info[*p])
3729                || (re_opcode_t) p[-3] == start_memory)
3730               && (p + 2) < pend)              
3731             {
3732               boolean is_a_jump_n = false;
3733               
3734               p1 = p + 2;
3735               mcnt = 0;
3736               switch ((re_opcode_t) *p1++)
3737                 {
3738                   case jump_n:
3739                     is_a_jump_n = true;
3740                   case pop_failure_jump:
3741                   case maybe_pop_jump:
3742                   case jump:
3743                   case dummy_failure_jump:
3744                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3745                     if (is_a_jump_n)
3746                       p1 += 2;
3747                     break;
3748                   
3749                   default:
3750                     /* do nothing */ ;
3751                 }
3752               p1 += mcnt;
3753         
3754               /* If the next operation is a jump backwards in the pattern
3755                  to an on_failure_jump right before the start_memory
3756                  corresponding to this stop_memory, exit from the loop
3757                  by forcing a failure after pushing on the stack the
3758                  on_failure_jump's jump in the pattern, and d.  */
3759               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3760                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3761                 {
3762                   /* If this group ever matched anything, then restore
3763                      what its registers were before trying this last
3764                      failed match, e.g., with `(a*)*b' against `ab' for
3765                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3766                      against `aba' for regend[3].
3767                      
3768                      Also restore the registers for inner groups for,
3769                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3770                      otherwise get trashed).  */
3771                      
3772                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3773                     {
3774                       unsigned r; 
3775         
3776                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3777                       
3778                       /* Restore this and inner groups' (if any) registers.  */
3779                       for (r = *p; r < *p + *(p + 1); r++)
3780                         {
3781                           regstart[r] = old_regstart[r];
3782 
3783                           /* xx why this test?  */
3784                           if ((int) old_regend[r] >= (int) regstart[r])
3785                             regend[r] = old_regend[r];
3786                         }     
3787                     }
3788                   p1++;
3789                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3790                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3791 
3792                   goto fail;
3793                 }
3794             }
3795           
3796           /* Move past the register number and the inner group count.  */
3797           p += 2;
3798           break;
3799 
3800 
3801         /* \<digit> has been turned into a `duplicate' command which is
3802            followed by the numeric value of <digit> as the register number.  */
3803         case duplicate:
3804           {
3805             register const char *d2, *dend2;
3806             int regno = *p++;   /* Get which register to match against.  */
3807             DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3808 
3809             /* Can't back reference a group which we've never matched.  */
3810             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3811               goto fail;
3812               
3813             /* Where in input to try to start matching.  */
3814             d2 = regstart[regno];
3815             
3816             /* Where to stop matching; if both the place to start and
3817                the place to stop matching are in the same string, then
3818                set to the place to stop, otherwise, for now have to use
3819                the end of the first string.  */
3820 
3821             dend2 = ((FIRST_STRING_P (regstart[regno]) 
3822                       == FIRST_STRING_P (regend[regno]))
3823                      ? regend[regno] : end_match_1);
3824             for (;;)
3825               {
3826                 /* If necessary, advance to next segment in register
3827                    contents.  */
3828                 while (d2 == dend2)
3829                   {
3830                     if (dend2 == end_match_2) break;
3831                     if (dend2 == regend[regno]) break;
3832 
3833                     /* End of string1 => advance to string2. */
3834                     d2 = string2;
3835                     dend2 = regend[regno];
3836                   }
3837                 /* At end of register contents => success */
3838                 if (d2 == dend2) break;
3839 
3840                 /* If necessary, advance to next segment in data.  */
3841                 PREFETCH ();
3842 
3843                 /* How many characters left in this segment to match.  */
3844                 mcnt = dend - d;
3845                 
3846                 /* Want how many consecutive characters we can match in
3847                    one shot, so, if necessary, adjust the count.  */
3848                 if (mcnt > dend2 - d2)
3849                   mcnt = dend2 - d2;
3850                   
3851                 /* Compare that many; failure if mismatch, else move
3852                    past them.  */
3853                 if (translate 
3854                     ? bcmp_translate (d, d2, mcnt, translate) 
3855                     : bcmp (d, d2, mcnt))
3856                   goto fail;
3857                 d += mcnt, d2 += mcnt;
3858               }
3859           }
3860           break;
3861 
3862 
3863         /* begline matches the empty string at the beginning of the string
3864            (unless `not_bol' is set in `bufp'), and, if
3865            `newline_anchor' is set, after newlines.  */
3866         case begline:
3867           DEBUG_PRINT1 ("EXECUTING begline.\n");
3868           
3869           if (AT_STRINGS_BEG (d))
3870             {
3871               if (!bufp->not_bol) break;
3872             }
3873           else if (d[-1] == '\n' && bufp->newline_anchor)
3874             {
3875               break;
3876             }
3877           /* In all other cases, we fail.  */
3878           goto fail;
3879 
3880 
3881         /* endline is the dual of begline.  */
3882         case endline:
3883           DEBUG_PRINT1 ("EXECUTING endline.\n");
3884 
3885           if (AT_STRINGS_END (d))
3886             {
3887               if (!bufp->not_eol) break;
3888             }
3889           
3890           /* We have to ``prefetch'' the next character.  */
3891           else if ((d == end1 ? *string2 : *d) == '\n'
3892                    && bufp->newline_anchor)
3893             {
3894               break;
3895             }
3896           goto fail;
3897 
3898 
3899         /* Match at the very beginning of the data.  */
3900         case begbuf:
3901           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3902           if (AT_STRINGS_BEG (d))
3903             break;
3904           goto fail;
3905 
3906 
3907         /* Match at the very end of the data.  */
3908         case endbuf:
3909           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3910           if (AT_STRINGS_END (d))
3911             break;
3912           goto fail;
3913 
3914 
3915         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3916            pushes NULL as the value for the string on the stack.  Then
3917            `pop_failure_point' will keep the current value for the
3918            string, instead of restoring it.  To see why, consider
3919            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3920            then the . fails against the \n.  But the next thing we want
3921            to do is match the \n against the \n; if we restored the
3922            string value, we would be back at the foo.
3923            
3924            Because this is used only in specific cases, we don't need to
3925            check all the things that `on_failure_jump' does, to make
3926            sure the right things get saved on the stack.  Hence we don't
3927            share its code.  The only reason to push anything on the
3928            stack at all is that otherwise we would have to change
3929            `anychar's code to do something besides goto fail in this
3930            case; that seems worse than this.  */
3931         case on_failure_keep_string_jump:
3932           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3933           
3934           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3935           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3936 
3937           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3938           break;
3939 
3940 
3941         /* Uses of on_failure_jump:
3942         
3943            Each alternative starts with an on_failure_jump that points
3944            to the beginning of the next alternative.  Each alternative
3945            except the last ends with a jump that in effect jumps past
3946            the rest of the alternatives.  (They really jump to the
3947            ending jump of the following alternative, because tensioning
3948            these jumps is a hassle.)
3949 
3950            Repeats start with an on_failure_jump that points past both
3951            the repetition text and either the following jump or
3952            pop_failure_jump back to this on_failure_jump.  */
3953         case on_failure_jump:
3954         on_failure:
3955           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3956 
3957           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3958           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3959 
3960           /* If this on_failure_jump comes right before a group (i.e.,
3961              the original * applied to a group), save the information
3962              for that group and all inner ones, so that if we fail back
3963              to this point, the group's information will be correct.
3964              For example, in \(a*\)*\1, we need the preceding group,
3965              and in \(\(a*\)b*\)\2, we need the inner group.  */
3966 
3967           /* We can't use `p' to check ahead because we push
3968              a failure point to `p + mcnt' after we do this.  */
3969           p1 = p;
3970 
3971           /* We need to skip no_op's before we look for the
3972              start_memory in case this on_failure_jump is happening as
3973              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3974              against aba.  */
3975           while (p1 < pend && (re_opcode_t) *p1 == no_op)
3976             p1++;
3977 
3978           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3979             {
3980               /* We have a new highest active register now.  This will
3981                  get reset at the start_memory we are about to get to,
3982                  but we will have saved all the registers relevant to
3983                  this repetition op, as described above.  */
3984               highest_active_reg = *(p1 + 1) + *(p1 + 2);
3985               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3986                 lowest_active_reg = *(p1 + 1);
3987             }
3988 
3989           DEBUG_PRINT1 (":\n");
3990           PUSH_FAILURE_POINT (p + mcnt, d, -2);
3991           break;
3992 
3993 
3994         /* A smart repeat ends with `maybe_pop_jump'.
3995            We change it to either `pop_failure_jump' or `jump'.  */
3996         case maybe_pop_jump:
3997           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3998           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3999           {
4000             register unsigned char *p2 = p;
4001 
4002             /* Compare the beginning of the repeat with what in the
4003                pattern follows its end. If we can establish that there
4004                is nothing that they would both match, i.e., that we
4005                would have to backtrack because of (as in, e.g., `a*a')
4006                then we can change to pop_failure_jump, because we'll
4007                never have to backtrack.
4008                
4009                This is not true in the case of alternatives: in
4010                `(a|ab)*' we do need to backtrack to the `ab' alternative
4011                (e.g., if the string was `ab').  But instead of trying to
4012                detect that here, the alternative has put on a dummy
4013                failure point which is what we will end up popping.  */
4014 
4015             /* Skip over open/close-group commands.  */
4016             while (p2 + 2 < pend
4017                    && ((re_opcode_t) *p2 == stop_memory
4018                        || (re_opcode_t) *p2 == start_memory))
4019               p2 += 3;                  /* Skip over args, too.  */
4020 
4021             /* If we're at the end of the pattern, we can change.  */
4022             if (p2 == pend)
4023               {
4024                 /* Consider what happens when matching ":\(.*\)"
4025                    against ":/".  I don't really understand this code
4026                    yet.  */
4027                 p[-3] = (unsigned char) pop_failure_jump;
4028                 DEBUG_PRINT1
4029                   ("  End of pattern: change to `pop_failure_jump'.\n");
4030               }
4031 
4032             else if ((re_opcode_t) *p2 == exactn
4033                      || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4034               {
4035                 register unsigned char c
4036                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4037                 p1 = p + mcnt;
4038 
4039                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4040                    to the `maybe_finalize_jump' of this case.  Examine what 
4041                    follows.  */
4042                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4043                   {
4044                     p[-3] = (unsigned char) pop_failure_jump;
4045                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4046                                   c, p1[5]);
4047                   }
4048                   
4049                 else if ((re_opcode_t) p1[3] == charset
4050                          || (re_opcode_t) p1[3] == charset_not)
4051                   {
4052                     int not = (re_opcode_t) p1[3] == charset_not;
4053                     
4054                     if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4055                         && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4056                       not = !not;
4057 
4058                     /* `not' is equal to 1 if c would match, which means
4059                         that we can't change to pop_failure_jump.  */
4060                     if (!not)
4061                       {
4062                         p[-3] = (unsigned char) pop_failure_jump;
4063                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4064                       }
4065                   }
4066               }
4067           }
4068           p -= 2;               /* Point at relative address again.  */
4069           if ((re_opcode_t) p[-1] != pop_failure_jump)
4070             {
4071               p[-1] = (unsigned char) jump;
4072               DEBUG_PRINT1 ("  Match => jump.\n");
4073               goto unconditional_jump;
4074             }
4075         /* Note fall through.  */
4076 
4077 
4078         /* The end of a simple repeat has a pop_failure_jump back to
4079            its matching on_failure_jump, where the latter will push a
4080            failure point.  The pop_failure_jump takes off failure
4081            points put on by this pop_failure_jump's matching
4082            on_failure_jump; we got through the pattern to here from the
4083            matching on_failure_jump, so didn't fail.  */
4084         case pop_failure_jump:
4085           {
4086             /* We need to pass separate storage for the lowest and
4087                highest registers, even though we don't care about the
4088                actual values.  Otherwise, we will restore only one
4089                register from the stack, since lowest will == highest in
4090                `pop_failure_point'.  */
4091             unsigned dummy_low_reg, dummy_high_reg;
4092             unsigned char *pdummy;
4093             const char *sdummy;
4094 
4095             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4096             POP_FAILURE_POINT (sdummy, pdummy,
4097                                dummy_low_reg, dummy_high_reg,
4098                                reg_dummy, reg_dummy, reg_info_dummy);
4099           }
4100           /* Note fall through.  */
4101 
4102           
4103         /* Unconditionally jump (without popping any failure points).  */
4104         case jump:
4105         unconditional_jump:
4106           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
4107           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4108           p += mcnt;                            /* Do the jump.  */
4109           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4110           break;
4111 
4112         
4113         /* We need this opcode so we can detect where alternatives end
4114            in `group_match_null_string_p' et al.  */
4115         case jump_past_alt:
4116           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4117           goto unconditional_jump;
4118 
4119 
4120         /* Normally, the on_failure_jump pushes a failure point, which
4121            then gets popped at pop_failure_jump.  We will end up at
4122            pop_failure_jump, also, and with a pattern of, say, `a+', we
4123            are skipping over the on_failure_jump, so we have to push
4124            something meaningless for pop_failure_jump to pop.  */
4125         case dummy_failure_jump:
4126           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4127           /* It doesn't matter what we push for the string here.  What
4128              the code at `fail' tests is the value for the pattern.  */
4129           PUSH_FAILURE_POINT (0, 0, -2);
4130           goto unconditional_jump;
4131 
4132 
4133         /* At the end of an alternative, we need to push a dummy failure
4134            point in case we are followed by a `pop_failure_jump', because
4135            we don't want the failure point for the alternative to be
4136            popped.  For example, matching `(a|ab)*' against `aab'
4137            requires that we match the `ab' alternative.  */
4138         case push_dummy_failure:
4139           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4140           /* See comments just above at `dummy_failure_jump' about the
4141              two zeroes.  */
4142           PUSH_FAILURE_POINT (0, 0, -2);
4143           break;
4144 
4145         /* Have to succeed matching what follows at least n times.
4146            After that, handle like `on_failure_jump'.  */
4147         case succeed_n: 
4148           EXTRACT_NUMBER (mcnt, p + 2);
4149           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4150 
4151           assert (mcnt >= 0);
4152           /* Originally, this is how many times we HAVE to succeed.  */
4153           if (mcnt > 0)
4154             {
4155                mcnt--;
4156                p += 2;
4157                STORE_NUMBER_AND_INCR (p, mcnt);
4158                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4159             }
4160           else if (mcnt == 0)
4161             {
4162               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4163               p[2] = (unsigned char) no_op;
4164               p[3] = (unsigned char) no_op;
4165               goto on_failure;
4166             }
4167           break;
4168         
4169         case jump_n: 
4170           EXTRACT_NUMBER (mcnt, p + 2);
4171           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4172 
4173           /* Originally, this is how many times we CAN jump.  */
4174           if (mcnt)
4175             {
4176                mcnt--;
4177                STORE_NUMBER (p + 2, mcnt);
4178                goto unconditional_jump;      
4179             }
4180           /* If don't have to jump any more, skip over the rest of command.  */
4181           else      
4182             p += 4;                  
4183           break;
4184         
4185         case set_number_at:
4186           {
4187             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4188 
4189             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4190             p1 = p + mcnt;
4191             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4192             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4193             STORE_NUMBER (p1, mcnt);
4194             break;
4195           }
4196 
4197         case wordbound:
4198           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4199           if (AT_WORD_BOUNDARY (d))
4200             break;
4201           goto fail;
4202 
4203         case notwordbound:
4204           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4205           if (AT_WORD_BOUNDARY (d))
4206             goto fail;
4207           break;
4208 
4209         case wordbeg:
4210           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4211           if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4212             break;
4213           goto fail;
4214 
4215         case wordend:
4216           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4217           if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4218               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4219             break;
4220           goto fail;
4221 
4222 #ifdef emacs
4223 #ifdef emacs19
4224         case before_dot:
4225           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4226           if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4227             goto fail;
4228           break;
4229   
4230         case at_dot:
4231           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4232           if (PTR_CHAR_POS ((unsigned char *) d) != point)
4233             goto fail;
4234           break;
4235   
4236         case after_dot:
4237           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4238           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4239             goto fail;
4240           break;
4241 #else /* not emacs19 */
4242         case at_dot:
4243           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4244           if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4245             goto fail;
4246           break;
4247 #endif /* not emacs19 */
4248 
4249         case syntaxspec:
4250           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4251           mcnt = *p++;
4252           goto matchsyntax;
4253 
4254         case wordchar:
4255           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4256           mcnt = (int) Sword;
4257         matchsyntax:
4258           PREFETCH ();
4259           if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4260             goto fail;
4261           SET_REGS_MATCHED ();
4262           break;
4263 
4264         case notsyntaxspec:
4265           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4266           mcnt = *p++;
4267           goto matchnotsyntax;
4268 
4269         case notwordchar:
4270           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4271           mcnt = (int) Sword;
4272         matchnotsyntax:
4273           PREFETCH ();
4274           if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4275             goto fail;
4276           SET_REGS_MATCHED ();
4277           break;
4278 
4279 #else /* not emacs */
4280         case wordchar:
4281           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4282           PREFETCH ();
4283           if (!WORDCHAR_P (d))
4284             goto fail;
4285           SET_REGS_MATCHED ();
4286           d++;
4287           break;
4288           
4289         case notwordchar:
4290           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4291           PREFETCH ();
4292           if (WORDCHAR_P (d))
4293             goto fail;
4294           SET_REGS_MATCHED ();
4295           d++;
4296           break;
4297 #endif /* not emacs */
4298           
4299         default:
4300           abort ();
4301         }
4302       continue;  /* Successfully executed one pattern command; keep going.  */
4303 
4304 
4305     /* We goto here if a matching operation fails. */
4306     fail:
4307       if (!FAIL_STACK_EMPTY ())
4308         { /* A restart point is known.  Restore to that state.  */
4309           DEBUG_PRINT1 ("\nFAIL:\n");
4310           POP_FAILURE_POINT (d, p,
4311                              lowest_active_reg, highest_active_reg,
4312                              regstart, regend, reg_info);
4313 
4314           /* If this failure point is a dummy, try the next one.  */
4315           if (!p)
4316             goto fail;
4317 
4318           /* If we failed to the end of the pattern, don't examine *p.  */
4319           assert (p <= pend);
4320           if (p < pend)
4321             {
4322               boolean is_a_jump_n = false;
4323               
4324               /* If failed to a backwards jump that's part of a repetition
4325                  loop, need to pop this failure point and use the next one.  */
4326               switch ((re_opcode_t) *p)
4327                 {
4328                 case jump_n:
4329                   is_a_jump_n = true;
4330                 case maybe_pop_jump:
4331                 case pop_failure_jump:
4332                 case jump:
4333                   p1 = p + 1;
4334                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4335                   p1 += mcnt;   
4336 
4337                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4338                       || (!is_a_jump_n
4339                           && (re_opcode_t) *p1 == on_failure_jump))
4340                     goto fail;
4341                   break;
4342                 default:
4343                   /* do nothing */ ;
4344                 }
4345             }
4346 
4347           if (d >= string1 && d <= end1)
4348             dend = end_match_1;
4349         }
4350       else
4351         break;   /* Matching at this starting point really fails.  */
4352     } /* for (;;) */
4353 
4354   if (best_regs_set)
4355     goto restore_best_regs;
4356 
4357   FREE_VARIABLES ();
4358 
4359   return -1;                            /* Failure to match.  */
4360 } /* re_match_2 */
4361 
4362 /* Subroutine definitions for re_match_2.  */
4363 
4364 
4365 /* We are passed P pointing to a register number after a start_memory.
4366    
4367    Return true if the pattern up to the corresponding stop_memory can
4368    match the empty string, and false otherwise.
4369    
4370    If we find the matching stop_memory, sets P to point to one past its number.
4371    Otherwise, sets P to an undefined byte less than or equal to END.
4372 
4373    We don't handle duplicates properly (yet).  */
4374 
4375 static boolean
4376 group_match_null_string_p (p, end, reg_info)
     /* [<][>][^][v][top][bottom][index][help] */
4377     unsigned char **p, *end;
4378     register_info_type *reg_info;
4379 {
4380   int mcnt;
4381   /* Point to after the args to the start_memory.  */
4382   unsigned char *p1 = *p + 2;
4383   
4384   while (p1 < end)
4385     {
4386       /* Skip over opcodes that can match nothing, and return true or
4387          false, as appropriate, when we get to one that can't, or to the
4388          matching stop_memory.  */
4389       
4390       switch ((re_opcode_t) *p1)
4391         {
4392         /* Could be either a loop or a series of alternatives.  */
4393         case on_failure_jump:
4394           p1++;
4395           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4396           
4397           /* If the next operation is not a jump backwards in the
4398              pattern.  */
4399 
4400           if (mcnt >= 0)
4401             {
4402               /* Go through the on_failure_jumps of the alternatives,
4403                  seeing if any of the alternatives cannot match nothing.
4404                  The last alternative starts with only a jump,
4405                  whereas the rest start with on_failure_jump and end
4406                  with a jump, e.g., here is the pattern for `a|b|c':
4407 
4408                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4409                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4410                  /exactn/1/c                                            
4411 
4412                  So, we have to first go through the first (n-1)
4413                  alternatives and then deal with the last one separately.  */
4414 
4415 
4416               /* Deal with the first (n-1) alternatives, which start
4417                  with an on_failure_jump (see above) that jumps to right
4418                  past a jump_past_alt.  */
4419 
4420               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4421                 {
4422                   /* `mcnt' holds how many bytes long the alternative
4423                      is, including the ending `jump_past_alt' and
4424                      its number.  */
4425 
4426                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 
4427                                                       reg_info))
4428                     return false;
4429 
4430                   /* Move to right after this alternative, including the
4431                      jump_past_alt.  */
4432                   p1 += mcnt;   
4433 
4434                   /* Break if it's the beginning of an n-th alternative
4435                      that doesn't begin with an on_failure_jump.  */
4436                   if ((re_opcode_t) *p1 != on_failure_jump)
4437                     break;
4438                 
4439                   /* Still have to check that it's not an n-th
4440                      alternative that starts with an on_failure_jump.  */
4441                   p1++;
4442                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4443                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4444                     {
4445                       /* Get to the beginning of the n-th alternative.  */
4446                       p1 -= 3;
4447                       break;
4448                     }
4449                 }
4450 
4451               /* Deal with the last alternative: go back and get number
4452                  of the `jump_past_alt' just before it.  `mcnt' contains
4453                  the length of the alternative.  */
4454               EXTRACT_NUMBER (mcnt, p1 - 2);
4455 
4456               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4457                 return false;
4458 
4459               p1 += mcnt;       /* Get past the n-th alternative.  */
4460             } /* if mcnt > 0 */
4461           break;
4462 
4463           
4464         case stop_memory:
4465           assert (p1[1] == **p);
4466           *p = p1 + 2;
4467           return true;
4468 
4469         
4470         default: 
4471           if (!common_op_match_null_string_p (&p1, end, reg_info))
4472             return false;
4473         }
4474     } /* while p1 < end */
4475 
4476   return false;
4477 } /* group_match_null_string_p */
4478 
4479 
4480 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4481    It expects P to be the first byte of a single alternative and END one
4482    byte past the last. The alternative can contain groups.  */
4483    
4484 static boolean
4485 alt_match_null_string_p (p, end, reg_info)
     /* [<][>][^][v][top][bottom][index][help] */
4486     unsigned char *p, *end;
4487     register_info_type *reg_info;
4488 {
4489   int mcnt;
4490   unsigned char *p1 = p;
4491   
4492   while (p1 < end)
4493     {
4494       /* Skip over opcodes that can match nothing, and break when we get 
4495          to one that can't.  */
4496       
4497       switch ((re_opcode_t) *p1)
4498         {
4499         /* It's a loop.  */
4500         case on_failure_jump:
4501           p1++;
4502           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4503           p1 += mcnt;
4504           break;
4505           
4506         default: 
4507           if (!common_op_match_null_string_p (&p1, end, reg_info))
4508             return false;
4509         }
4510     }  /* while p1 < end */
4511 
4512   return true;
4513 } /* alt_match_null_string_p */
4514 
4515 
4516 /* Deals with the ops common to group_match_null_string_p and
4517    alt_match_null_string_p.  
4518    
4519    Sets P to one after the op and its arguments, if any.  */
4520 
4521 static boolean
4522 common_op_match_null_string_p (p, end, reg_info)
     /* [<][>][^][v][top][bottom][index][help] */
4523     unsigned char **p, *end;
4524     register_info_type *reg_info;
4525 {
4526   int mcnt;
4527   boolean ret;
4528   int reg_no;
4529   unsigned char *p1 = *p;
4530 
4531   switch ((re_opcode_t) *p1++)
4532     {
4533     case no_op:
4534     case begline:
4535     case endline:
4536     case begbuf:
4537     case endbuf:
4538     case wordbeg:
4539     case wordend:
4540     case wordbound:
4541     case notwordbound:
4542 #ifdef emacs
4543     case before_dot:
4544     case at_dot:
4545     case after_dot:
4546 #endif
4547       break;
4548 
4549     case start_memory:
4550       reg_no = *p1;
4551       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4552       ret = group_match_null_string_p (&p1, end, reg_info);
4553       
4554       /* Have to set this here in case we're checking a group which
4555          contains a group and a back reference to it.  */
4556 
4557       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4558         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4559 
4560       if (!ret)
4561         return false;
4562       break;
4563           
4564     /* If this is an optimized succeed_n for zero times, make the jump.  */
4565     case jump:
4566       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4567       if (mcnt >= 0)
4568         p1 += mcnt;
4569       else
4570         return false;
4571       break;
4572 
4573     case succeed_n:
4574       /* Get to the number of times to succeed.  */
4575       p1 += 2;          
4576       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4577 
4578       if (mcnt == 0)
4579         {
4580           p1 -= 4;
4581           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4582           p1 += mcnt;
4583         }
4584       else
4585         return false;
4586       break;
4587 
4588     case duplicate: 
4589       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4590         return false;
4591       break;
4592 
4593     case set_number_at:
4594       p1 += 4;
4595 
4596     default:
4597       /* All other opcodes mean we cannot match the empty string.  */
4598       return false;
4599   }
4600 
4601   *p = p1;
4602   return true;
4603 } /* common_op_match_null_string_p */
4604 
4605 
4606 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4607    bytes; nonzero otherwise.  */
4608    
4609 static int
4610 bcmp_translate (s1, s2, len, translate)
     /* [<][>][^][v][top][bottom][index][help] */
4611      unsigned char *s1, *s2;
4612      register int len;
4613      char *translate;
4614 {
4615   register unsigned char *p1 = s1, *p2 = s2;
4616   while (len)
4617     {
4618       if (translate[*p1++] != translate[*p2++]) return 1;
4619       len--;
4620     }
4621   return 0;
4622 }
4623 
4624 /* Entry points for GNU code.  */
4625 
4626 /* re_compile_pattern is the GNU regular expression compiler: it
4627    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4628    Returns 0 if the pattern was valid, otherwise an error string.
4629    
4630    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4631    are set in BUFP on entry.
4632    
4633    We call regex_compile to do the actual compilation.  */
4634 
4635 const char *
4636 re_compile_pattern (pattern, length, bufp)
     /* [<][>][^][v][top][bottom][index][help] */
4637      const char *pattern;
4638      int length;
4639      struct re_pattern_buffer *bufp;
4640 {
4641   reg_errcode_t ret;
4642   
4643   /* GNU code is written to assume at least RE_NREGS registers will be set
4644      (and at least one extra will be -1).  */
4645   bufp->regs_allocated = REGS_UNALLOCATED;
4646   
4647   /* And GNU code determines whether or not to get register information
4648      by passing null for the REGS argument to re_match, etc., not by
4649      setting no_sub.  */
4650   bufp->no_sub = 0;
4651   
4652   /* Match anchors at newline.  */
4653   bufp->newline_anchor = 1;
4654   
4655   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4656 
4657   return re_error_msg[(int) ret];
4658 }     
4659 
4660 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4661    them if this is an Emacs or POSIX compilation.  */
4662 
4663 #if !defined (emacs) && !defined (_POSIX_SOURCE)
4664 
4665 /* BSD has one and only one pattern buffer.  */
4666 static struct re_pattern_buffer re_comp_buf;
4667 
4668 char *
4669 re_comp (s)
     /* [<][>][^][v][top][bottom][index][help] */
4670     const char *s;
4671 {
4672   reg_errcode_t ret;
4673   
4674   if (!s)
4675     {
4676       if (!re_comp_buf.buffer)
4677         return "No previous regular expression";
4678       return 0;
4679     }
4680 
4681   if (!re_comp_buf.buffer)
4682     {
4683       re_comp_buf.buffer = (unsigned char *) malloc (200);
4684       if (re_comp_buf.buffer == NULL)
4685         return "Memory exhausted";
4686       re_comp_buf.allocated = 200;
4687 
4688       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4689       if (re_comp_buf.fastmap == NULL)
4690         return "Memory exhausted";
4691     }
4692 
4693   /* Since `re_exec' always passes NULL for the `regs' argument, we
4694      don't need to initialize the pattern buffer fields which affect it.  */
4695 
4696   /* Match anchors at newlines.  */
4697   re_comp_buf.newline_anchor = 1;
4698 
4699   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4700   
4701   /* Yes, we're discarding `const' here.  */
4702   return (char *) re_error_msg[(int) ret];
4703 }
4704 
4705 
4706 int
4707 re_exec (s)
     /* [<][>][^][v][top][bottom][index][help] */
4708     const char *s;
4709 {
4710   const int len = strlen (s);
4711   return
4712     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4713 }
4714 #endif /* not emacs and not _POSIX_SOURCE */
4715 
4716 /* POSIX.2 functions.  Don't define these for Emacs.  */
4717 
4718 #ifndef emacs
4719 
4720 /* regcomp takes a regular expression as a string and compiles it.
4721 
4722    PREG is a regex_t *.  We do not expect any fields to be initialized,
4723    since POSIX says we shouldn't.  Thus, we set
4724 
4725      `buffer' to the compiled pattern;
4726      `used' to the length of the compiled pattern;
4727      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4728        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4729        RE_SYNTAX_POSIX_BASIC;
4730      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4731      `fastmap' and `fastmap_accurate' to zero;
4732      `re_nsub' to the number of subexpressions in PATTERN.
4733 
4734    PATTERN is the address of the pattern string.
4735 
4736    CFLAGS is a series of bits which affect compilation.
4737 
4738      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4739      use POSIX basic syntax.
4740 
4741      If REG_NEWLINE is set, then . and [^...] don't match newline.
4742      Also, regexec will try a match beginning after every newline.
4743 
4744      If REG_ICASE is set, then we considers upper- and lowercase
4745      versions of letters to be equivalent when matching.
4746 
4747      If REG_NOSUB is set, then when PREG is passed to regexec, that
4748      routine will report only success or failure, and nothing about the
4749      registers.
4750 
4751    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4752    the return codes and their meanings.)  */
4753 
4754 int
4755 regcomp (preg, pattern, cflags)
     /* [<][>][^][v][top][bottom][index][help] */
4756     regex_t *preg;
4757     const char *pattern; 
4758     int cflags;
4759 {
4760   reg_errcode_t ret;
4761   unsigned syntax
4762     = (cflags & REG_EXTENDED) ?
4763       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4764 
4765   /* regex_compile will allocate the space for the compiled pattern.  */
4766   preg->buffer = 0;
4767   preg->allocated = 0;
4768   
4769   /* Don't bother to use a fastmap when searching.  This simplifies the
4770      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4771      characters after newlines into the fastmap.  This way, we just try
4772      every character.  */
4773   preg->fastmap = 0;
4774   
4775   if (cflags & REG_ICASE)
4776     {
4777       unsigned i;
4778       
4779       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4780       if (preg->translate == NULL)
4781         return (int) REG_ESPACE;
4782 
4783       /* Map uppercase characters to corresponding lowercase ones.  */
4784       for (i = 0; i < CHAR_SET_SIZE; i++)
4785         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4786     }
4787   else
4788     preg->translate = NULL;
4789 
4790   /* If REG_NEWLINE is set, newlines are treated differently.  */
4791   if (cflags & REG_NEWLINE)
4792     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4793       syntax &= ~RE_DOT_NEWLINE;
4794       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4795       /* It also changes the matching behavior.  */
4796       preg->newline_anchor = 1;
4797     }
4798   else
4799     preg->newline_anchor = 0;
4800 
4801   preg->no_sub = !!(cflags & REG_NOSUB);
4802 
4803   /* POSIX says a null character in the pattern terminates it, so we 
4804      can use strlen here in compiling the pattern.  */
4805   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4806   
4807   /* POSIX doesn't distinguish between an unmatched open-group and an
4808      unmatched close-group: both are REG_EPAREN.  */
4809   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4810   
4811   return (int) ret;
4812 }
4813 
4814 
4815 /* regexec searches for a given pattern, specified by PREG, in the
4816    string STRING.
4817    
4818    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4819    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4820    least NMATCH elements, and we set them to the offsets of the
4821    corresponding matched substrings.
4822    
4823    EFLAGS specifies `execution flags' which affect matching: if
4824    REG_NOTBOL is set, then ^ does not match at the beginning of the
4825    string; if REG_NOTEOL is set, then $ does not match at the end.
4826    
4827    We return 0 if we find a match and REG_NOMATCH if not.  */
4828 
4829 int
4830 regexec (preg, string, nmatch, pmatch, eflags)
     /* [<][>][^][v][top][bottom][index][help] */
4831     const regex_t *preg;
4832     const char *string; 
4833     size_t nmatch; 
4834     regmatch_t pmatch[]; 
4835     int eflags;
4836 {
4837   int ret;
4838   struct re_registers regs;
4839   regex_t private_preg;
4840   int len = strlen (string);
4841   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4842 
4843   private_preg = *preg;
4844   
4845   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4846   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4847   
4848   /* The user has told us exactly how many registers to return
4849      information about, via `nmatch'.  We have to pass that on to the
4850      matching routines.  */
4851   private_preg.regs_allocated = REGS_FIXED;
4852   
4853   if (want_reg_info)
4854     {
4855       regs.num_regs = nmatch;
4856       regs.start = TALLOC (nmatch, regoff_t);
4857       regs.end = TALLOC (nmatch, regoff_t);
4858       if (regs.start == NULL || regs.end == NULL)
4859         return (int) REG_NOMATCH;
4860     }
4861 
4862   /* Perform the searching operation.  */
4863   ret = re_search (&private_preg, string, len,
4864                    /* start: */ 0, /* range: */ len,
4865                    want_reg_info ? &regs : (struct re_registers *) 0);
4866   
4867   /* Copy the register information to the POSIX structure.  */
4868   if (want_reg_info)
4869     {
4870       if (ret >= 0)
4871         {
4872           unsigned r;
4873 
4874           for (r = 0; r < nmatch; r++)
4875             {
4876               pmatch[r].rm_so = regs.start[r];
4877               pmatch[r].rm_eo = regs.end[r];
4878             }
4879         }
4880 
4881       /* If we needed the temporary register info, free the space now.  */
4882       free (regs.start);
4883       free (regs.end);
4884     }
4885 
4886   /* We want zero return to mean success, unlike `re_search'.  */
4887   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4888 }
4889 
4890 
4891 /* Returns a message corresponding to an error code, ERRCODE, returned
4892    from either regcomp or regexec.   We don't use PREG here.  */
4893 
4894 size_t
4895 regerror (errcode, preg, errbuf, errbuf_size)
     /* [<][>][^][v][top][bottom][index][help] */
4896     int errcode;
4897     const regex_t *preg;
4898     char *errbuf;
4899     size_t errbuf_size;
4900 {
4901   const char *msg;
4902   size_t msg_size;
4903 
4904   if (errcode < 0
4905       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4906     /* Only error codes returned by the rest of the code should be passed 
4907        to this routine.  If we are given anything else, or if other regex
4908        code generates an invalid error code, then the program has a bug.
4909        Dump core so we can fix it.  */
4910     abort ();
4911 
4912   msg = re_error_msg[errcode];
4913 
4914   /* POSIX doesn't require that we do anything in this case, but why
4915      not be nice.  */
4916   if (! msg)
4917     msg = "Success";
4918 
4919   msg_size = strlen (msg) + 1; /* Includes the null.  */
4920   
4921   if (errbuf_size != 0)
4922     {
4923       if (msg_size > errbuf_size)
4924         {
4925           strncpy (errbuf, msg, errbuf_size - 1);
4926           errbuf[errbuf_size - 1] = 0;
4927         }
4928       else
4929         strcpy (errbuf, msg);
4930     }
4931 
4932   return msg_size;
4933 }
4934 
4935 
4936 /* Free dynamically allocated space used by PREG.  */
4937 
4938 void
4939 regfree (preg)
     /* [<][>][^][v][top][bottom][index][help] */
4940     regex_t *preg;
4941 {
4942   if (preg->buffer != NULL)
4943     free (preg->buffer);
4944   preg->buffer = NULL;
4945   
4946   preg->allocated = 0;
4947   preg->used = 0;
4948 
4949   if (preg->fastmap != NULL)
4950     free (preg->fastmap);
4951   preg->fastmap = NULL;
4952   preg->fastmap_accurate = 0;
4953 
4954   if (preg->translate != NULL)
4955     free (preg->translate);
4956   preg->translate = NULL;
4957 }
4958 
4959 #endif /* not emacs  */
4960 
4961 /*
4962 Local variables:
4963 make-backup-files: t
4964 version-control: t
4965 trim-versions-without-asking: nil
4966 End:
4967 */

/* [<][>][^][v][top][bottom][index][help] */