coreboot/payloads/libpayload/util/kconfig/regex.c
<<
>>
Prefs
   1/* Extended regular expression matching and search library,
   2   version 0.12.
   3   (Implements POSIX draft P10003.2/D11.2, except for
   4   internationalization features.)
   5
   6   Copyright (C) 1993 Free Software Foundation, Inc.
   7
   8   This program is free software; you can redistribute it and/or modify
   9   it under the terms of the GNU General Public License as published by
  10   the Free Software Foundation; either version 2, or (at your option)
  11   any later version.
  12
  13   This program is distributed in the hope that it will be useful,
  14   but WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16   GNU General Public License for more details.
  17
  18   You should have received a copy of the GNU General Public License
  19   along with this program; if not, write to the Free Software
  20   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  21
  22/* AIX requires this to be the first thing in the file. */
  23#if defined (_AIX) && !defined (REGEX_MALLOC)
  24  #pragma alloca
  25#endif
  26
  27#define _GNU_SOURCE
  28
  29/* We need this for `regex.h', and perhaps for the Emacs include files.  */
  30#include <sys/types.h>
  31
  32#ifdef HAVE_CONFIG_H
  33#include "config.h"
  34#endif
  35
  36/* The `emacs' switch turns on certain matching commands
  37   that make sense only in Emacs. */
  38#ifdef emacs
  39
  40#include "lisp.h"
  41#include "buffer.h"
  42#include "syntax.h"
  43
  44/* Emacs uses `NULL' as a predicate.  */
  45#undef NULL
  46
  47#else  /* not emacs */
  48
  49/* We used to test for `BSTRING' here, but only GCC and Emacs define
  50   `BSTRING', as far as I know, and neither of them use this code.  */
  51#if HAVE_STRING_H || STDC_HEADERS
  52#include <string.h>
  53#ifndef bcmp
  54#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  55#endif
  56#ifndef bcopy
  57#define bcopy(s, d, n)  memcpy ((d), (s), (n))
  58#endif
  59#ifndef bzero
  60#define bzero(s, n)     memset ((s), 0, (n))
  61#endif
  62#else
  63#include <strings.h>
  64#endif
  65
  66#ifdef STDC_HEADERS
  67#include <stdlib.h>
  68#else
  69char *malloc ();
  70char *realloc ();
  71#endif
  72
  73
  74/* Define the syntax stuff for \<, \>, etc.  */
  75
  76/* This must be nonzero for the wordchar and notwordchar pattern
  77   commands in re_match_2.  */
  78#ifndef Sword
  79#define Sword 1
  80#endif
  81
  82#ifdef SYNTAX_TABLE
  83
  84extern char *re_syntax_table;
  85
  86#else /* not SYNTAX_TABLE */
  87
  88/* How many characters in the character set.  */
  89#define CHAR_SET_SIZE 256
  90
  91static char re_syntax_table[CHAR_SET_SIZE];
  92
  93static void
  94init_syntax_once ()
  95{
  96   register int c;
  97   static int done = 0;
  98
  99   if (done)
 100     return;
 101
 102   bzero (re_syntax_table, sizeof re_syntax_table);
 103
 104   for (c = 'a'; c <= 'z'; c++)
 105     re_syntax_table[c] = Sword;
 106
 107   for (c = 'A'; c <= 'Z'; c++)
 108     re_syntax_table[c] = Sword;
 109
 110   for (c = '0'; c <= '9'; c++)
 111     re_syntax_table[c] = Sword;
 112
 113   re_syntax_table['_'] = Sword;
 114
 115   done = 1;
 116}
 117
 118#endif /* not SYNTAX_TABLE */
 119
 120#define SYNTAX(c) re_syntax_table[c]
 121
 122#endif /* not emacs */
 123
 124/* Get the interface, including the syntax bits.  */
 125#include "regex.h"
 126
 127/* isalpha etc. are used for the character classes.  */
 128#include <ctype.h>
 129
 130#ifndef isascii
 131#define isascii(c) 1
 132#endif
 133
 134#ifdef isblank
 135#define ISBLANK(c) (isascii (c) && isblank (c))
 136#else
 137#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 138#endif
 139#ifdef isgraph
 140#define ISGRAPH(c) (isascii (c) && isgraph (c))
 141#else
 142#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
 143#endif
 144
 145#define ISPRINT(c) (isascii (c) && isprint (c))
 146#define ISDIGIT(c) (isascii (c) && isdigit (c))
 147#define ISALNUM(c) (isascii (c) && isalnum (c))
 148#define ISALPHA(c) (isascii (c) && isalpha (c))
 149#define ISCNTRL(c) (isascii (c) && iscntrl (c))
 150#define ISLOWER(c) (isascii (c) && islower (c))
 151#define ISPUNCT(c) (isascii (c) && ispunct (c))
 152#define ISSPACE(c) (isascii (c) && isspace (c))
 153#define ISUPPER(c) (isascii (c) && isupper (c))
 154#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
 155
 156#ifndef NULL
 157#define NULL 0
 158#endif
 159
 160/* We remove any previous definition of `SIGN_EXTEND_CHAR',
 161   since ours (we hope) works properly with all combinations of
 162   machines, compilers, `char' and `unsigned char' argument types.
 163   (Per Bothner suggested the basic approach.)  */
 164#undef SIGN_EXTEND_CHAR
 165#if __STDC__
 166#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
 167#else  /* not __STDC__ */
 168/* As in Harbison and Steele.  */
 169#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
 170#endif
 171
 172/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
 173   use `alloca' instead of `malloc'.  This is because using malloc in
 174   re_search* or re_match* could cause memory leaks when C-g is used in
 175   Emacs; also, malloc is slower and causes storage fragmentation.  On
 176   the other hand, malloc is more portable, and easier to debug.
 177
 178   Because we sometimes use alloca, some routines have to be macros,
 179   not functions -- `alloca'-allocated space disappears at the end of the
 180   function it is called in.  */
 181
 182#ifdef REGEX_MALLOC
 183
 184#define REGEX_ALLOCATE malloc
 185#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
 186
 187#else /* not REGEX_MALLOC  */
 188
 189/* Emacs already defines alloca, sometimes.  */
 190#ifndef alloca
 191
 192/* Make alloca work the best possible way.  */
 193#ifdef __GNUC__
 194#define alloca __builtin_alloca
 195#else /* not __GNUC__ */
 196#if HAVE_ALLOCA_H
 197#include <alloca.h>
 198#else /* not __GNUC__ or HAVE_ALLOCA_H */
 199#ifndef _AIX /* Already did AIX, up at the top.  */
 200char *alloca ();
 201#endif /* not _AIX */
 202#endif /* not HAVE_ALLOCA_H */
 203#endif /* not __GNUC__ */
 204
 205#endif /* not alloca */
 206
 207#define REGEX_ALLOCATE alloca
 208
 209/* Assumes a `char *destination' variable.  */
 210#define REGEX_REALLOCATE(source, osize, nsize)                          \
 211  (destination = (char *) alloca (nsize),                               \
 212   bcopy (source, destination, osize),                                  \
 213   destination)
 214
 215#endif /* not REGEX_MALLOC */
 216
 217
 218/* True if `size1' is non-NULL and PTR is pointing anywhere inside
 219   `string1' or just past its end.  This works if PTR is NULL, which is
 220   a good thing.  */
 221#define FIRST_STRING_P(ptr)                                     \
 222  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 223
 224/* (Re)Allocate N items of type T using malloc, or fail.  */
 225#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
 226#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
 227#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
 228
 229#define BYTEWIDTH 8 /* In bits.  */
 230
 231#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
 232
 233#define MAX(a, b) ((a) > (b) ? (a) : (b))
 234#define MIN(a, b) ((a) < (b) ? (a) : (b))
 235
 236typedef char boolean;
 237#define false 0
 238#define true 1
 239
 240/* These are the command codes that appear in compiled regular
 241   expressions.  Some opcodes are followed by argument bytes.  A
 242   command code can specify any interpretation whatsoever for its
 243   arguments.  Zero bytes may appear in the compiled regular expression.
 244
 245   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
 246   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
 247   `exactn' we use here must also be 1.  */
 248
 249typedef enum
 250{
 251  no_op = 0,
 252
 253        /* Followed by one byte giving n, then by n literal bytes.  */
 254  exactn = 1,
 255
 256        /* Matches any (more or less) character.  */
 257  anychar,
 258
 259        /* Matches any one char belonging to specified set.  First
 260           following byte is number of bitmap bytes.  Then come bytes
 261           for a bitmap saying which chars are in.  Bits in each byte
 262           are ordered low-bit-first.  A character is in the set if its
 263           bit is 1.  A character too large to have a bit in the map is
 264           automatically not in the set.  */
 265  charset,
 266
 267        /* Same parameters as charset, but match any character that is
 268           not one of those specified.  */
 269  charset_not,
 270
 271        /* Start remembering the text that is matched, for storing in a
 272           register.  Followed by one byte with the register number, in
 273           the range 0 to one less than the pattern buffer's re_nsub
 274           field.  Then followed by one byte with the number of groups
 275           inner to this one.  (This last has to be part of the
 276           start_memory only because we need it in the on_failure_jump
 277           of re_match_2.)  */
 278  start_memory,
 279
 280        /* Stop remembering the text that is matched and store it in a
 281           memory register.  Followed by one byte with the register
 282           number, in the range 0 to one less than `re_nsub' in the
 283           pattern buffer, and one byte with the number of inner groups,
 284           just like `start_memory'.  (We need the number of inner
 285           groups here because we don't have any easy way of finding the
 286           corresponding start_memory when we're at a stop_memory.)  */
 287  stop_memory,
 288
 289        /* Match a duplicate of something remembered. Followed by one
 290           byte containing the register number.  */
 291  duplicate,
 292
 293        /* Fail unless at beginning of line.  */
 294  begline,
 295
 296        /* Fail unless at end of line.  */
 297  endline,
 298
 299        /* Succeeds if at beginning of buffer (if emacs) or at beginning
 300           of string to be matched (if not).  */
 301  begbuf,
 302
 303        /* Analogously, for end of buffer/string.  */
 304  endbuf,
 305
 306        /* Followed by two byte relative address to which to jump.  */
 307  jump,
 308
 309        /* Same as jump, but marks the end of an alternative.  */
 310  jump_past_alt,
 311
 312        /* Followed by two-byte relative address of place to resume at
 313           in case of failure.  */
 314  on_failure_jump,
 315
 316        /* Like on_failure_jump, but pushes a placeholder instead of the
 317           current string position when executed.  */
 318  on_failure_keep_string_jump,
 319
 320        /* Throw away latest failure point and then jump to following
 321           two-byte relative address.  */
 322  pop_failure_jump,
 323
 324        /* Change to pop_failure_jump if know won't have to backtrack to
 325           match; otherwise change to jump.  This is used to jump
 326           back to the beginning of a repeat.  If what follows this jump
 327           clearly won't match what the repeat does, such that we can be
 328           sure that there is no use backtracking out of repetitions
 329           already matched, then we change it to a pop_failure_jump.
 330           Followed by two-byte address.  */
 331  maybe_pop_jump,
 332
 333        /* Jump to following two-byte address, and push a dummy failure
 334           point. This failure point will be thrown away if an attempt
 335           is made to use it for a failure.  A `+' construct makes this
 336           before the first repeat.  Also used as an intermediary kind
 337           of jump when compiling an alternative.  */
 338  dummy_failure_jump,
 339
 340        /* Push a dummy failure point and continue.  Used at the end of
 341           alternatives.  */
 342  push_dummy_failure,
 343
 344        /* Followed by two-byte relative address and two-byte number n.
 345           After matching N times, jump to the address upon failure.  */
 346  succeed_n,
 347
 348        /* Followed by two-byte relative address, and two-byte number n.
 349           Jump to the address N times, then fail.  */
 350  jump_n,
 351
 352        /* Set the following two-byte relative address to the
 353           subsequent two-byte number.  The address *includes* the two
 354           bytes of number.  */
 355  set_number_at,
 356
 357  wordchar,     /* Matches any word-constituent character.  */
 358  notwordchar,  /* Matches any char that is not a word-constituent.  */
 359
 360  wordbeg,      /* Succeeds if at word beginning.  */
 361  wordend,      /* Succeeds if at word end.  */
 362
 363  wordbound,    /* Succeeds if at a word boundary.  */
 364  notwordbound  /* Succeeds if not at a word boundary.  */
 365
 366#ifdef emacs
 367  ,before_dot,  /* Succeeds if before point.  */
 368  at_dot,       /* Succeeds if at point.  */
 369  after_dot,    /* Succeeds if after point.  */
 370
 371        /* Matches any character whose syntax is specified.  Followed by
 372           a byte which contains a syntax code, e.g., Sword.  */
 373  syntaxspec,
 374
 375        /* Matches any character whose syntax is not that specified.  */
 376  notsyntaxspec
 377#endif /* emacs */
 378} re_opcode_t;
 379
 380/* Common operations on the compiled pattern.  */
 381
 382/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 383
 384#define STORE_NUMBER(destination, number)                               \
 385  do {                                                                  \
 386    (destination)[0] = (number) & 0377;                                 \
 387    (destination)[1] = (number) >> 8;                                   \
 388  } while (0)
 389
 390/* Same as STORE_NUMBER, except increment DESTINATION to
 391   the byte after where the number is stored.  Therefore, DESTINATION
 392   must be an lvalue.  */
 393
 394#define STORE_NUMBER_AND_INCR(destination, number)                      \
 395  do {                                                                  \
 396    STORE_NUMBER (destination, number);                                 \
 397    (destination) += 2;                                                 \
 398  } while (0)
 399
 400/* Put into DESTINATION a number stored in two contiguous bytes starting
 401   at SOURCE.  */
 402
 403#define EXTRACT_NUMBER(destination, source)                             \
 404  do {                                                                  \
 405    (destination) = *(source) & 0377;                                   \
 406    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
 407  } while (0)
 408
 409#ifdef DEBUG
 410static void
 411extract_number (dest, source)
 412    int *dest;
 413    unsigned char *source;
 414{
 415  int temp = SIGN_EXTEND_CHAR (*(source + 1));
 416  *dest = *source & 0377;
 417  *dest += temp << 8;
 418}
 419
 420#ifndef EXTRACT_MACROS /* To debug the macros.  */
 421#undef EXTRACT_NUMBER
 422#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
 423#endif /* not EXTRACT_MACROS */
 424
 425#endif /* DEBUG */
 426
 427/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
 428   SOURCE must be an lvalue.  */
 429
 430#define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
 431  do {                                                                  \
 432    EXTRACT_NUMBER (destination, source);                               \
 433    (source) += 2;                                                      \
 434  } while (0)
 435
 436#ifdef DEBUG
 437static void
 438extract_number_and_incr (destination, source)
 439    int *destination;
 440    unsigned char **source;
 441{
 442  extract_number (destination, *source);
 443  *source += 2;
 444}
 445
 446#ifndef EXTRACT_MACROS
 447#undef EXTRACT_NUMBER_AND_INCR
 448#define EXTRACT_NUMBER_AND_INCR(dest, src) \
 449  extract_number_and_incr (&dest, &src)
 450#endif /* not EXTRACT_MACROS */
 451
 452#endif /* DEBUG */
 453
 454/* If DEBUG is defined, Regex prints many voluminous messages about what
 455   it is doing (if the variable `debug' is nonzero).  If linked with the
 456   main program in `iregex.c', you can enter patterns and strings
 457   interactively.  And if linked with the main program in `main.c' and
 458   the other test files, you can run the already-written tests.  */
 459
 460#ifdef DEBUG
 461
 462/* We use standard I/O for debugging.  */
 463#include <stdio.h>
 464
 465/* It is useful to test things that ``must'' be true when debugging.  */
 466#include <assert.h>
 467
 468static int debug = 0;
 469
 470#define DEBUG_STATEMENT(e) e
 471#define DEBUG_PRINT1(x) if (debug) printf (x)
 472#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
 473#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
 474#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
 475#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
 476  if (debug) print_partial_compiled_pattern (s, e)
 477#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
 478  if (debug) print_double_string (w, s1, sz1, s2, sz2)
 479
 480
 481extern void printchar ();
 482
 483/* Print the fastmap in human-readable form.  */
 484
 485void
 486print_fastmap (fastmap)
 487    char *fastmap;
 488{
 489  unsigned was_a_range = 0;
 490  unsigned i = 0;
 491
 492  while (i < (1 << BYTEWIDTH))
 493    {
 494      if (fastmap[i++])
 495        {
 496          was_a_range = 0;
 497          printchar (i - 1);
 498          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
 499            {
 500              was_a_range = 1;
 501              i++;
 502            }
 503          if (was_a_range)
 504            {
 505              printf ("-");
 506              printchar (i - 1);
 507            }
 508        }
 509    }
 510  putchar ('\n');
 511}
 512
 513
 514/* Print a compiled pattern string in human-readable form, starting at
 515   the START pointer into it and ending just before the pointer END.  */
 516
 517void
 518print_partial_compiled_pattern (start, end)
 519    unsigned char *start;
 520    unsigned char *end;
 521{
 522  int mcnt, mcnt2;
 523  unsigned char *p = start;
 524  unsigned char *pend = end;
 525
 526  if (start == NULL)
 527    {
 528      printf ("(null)\n");
 529      return;
 530    }
 531
 532  /* Loop over pattern commands.  */
 533  while (p < pend)
 534    {
 535      switch ((re_opcode_t) *p++)
 536        {
 537        case no_op:
 538          printf ("/no_op");
 539          break;
 540
 541        case exactn:
 542          mcnt = *p++;
 543          printf ("/exactn/%d", mcnt);
 544          do
 545            {
 546              putchar ('/');
 547              printchar (*p++);
 548            }
 549          while (--mcnt);
 550          break;
 551
 552        case start_memory:
 553          mcnt = *p++;
 554          printf ("/start_memory/%d/%d", mcnt, *p++);
 555          break;
 556
 557        case stop_memory:
 558          mcnt = *p++;
 559          printf ("/stop_memory/%d/%d", mcnt, *p++);
 560          break;
 561
 562        case duplicate:
 563          printf ("/duplicate/%d", *p++);
 564          break;
 565
 566        case anychar:
 567          printf ("/anychar");
 568          break;
 569
 570        case charset:
 571        case charset_not:
 572          {
 573            register int c;
 574
 575            printf ("/charset%s",
 576                    (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
 577
 578            assert (p + *p < pend);
 579
 580            for (c = 0; c < *p; c++)
 581              {
 582                unsigned bit;
 583                unsigned char map_byte = p[1 + c];
 584
 585                putchar ('/');
 586
 587                for (bit = 0; bit < BYTEWIDTH; bit++)
 588                  if (map_byte & (1 << bit))
 589                    printchar (c * BYTEWIDTH + bit);
 590              }
 591            p += 1 + *p;
 592            break;
 593          }
 594
 595        case begline:
 596          printf ("/begline");
 597          break;
 598
 599        case endline:
 600          printf ("/endline");
 601          break;
 602
 603        case on_failure_jump:
 604          extract_number_and_incr (&mcnt, &p);
 605          printf ("/on_failure_jump/0/%d", mcnt);
 606          break;
 607
 608        case on_failure_keep_string_jump:
 609          extract_number_and_incr (&mcnt, &p);
 610          printf ("/on_failure_keep_string_jump/0/%d", mcnt);
 611          break;
 612
 613        case dummy_failure_jump:
 614          extract_number_and_incr (&mcnt, &p);
 615          printf ("/dummy_failure_jump/0/%d", mcnt);
 616          break;
 617
 618        case push_dummy_failure:
 619          printf ("/push_dummy_failure");
 620          break;
 621
 622        case maybe_pop_jump:
 623          extract_number_and_incr (&mcnt, &p);
 624          printf ("/maybe_pop_jump/0/%d", mcnt);
 625          break;
 626
 627        case pop_failure_jump:
 628          extract_number_and_incr (&mcnt, &p);
 629          printf ("/pop_failure_jump/0/%d", mcnt);
 630          break;
 631
 632        case jump_past_alt:
 633          extract_number_and_incr (&mcnt, &p);
 634          printf ("/jump_past_alt/0/%d", mcnt);
 635          break;
 636
 637        case jump:
 638          extract_number_and_incr (&mcnt, &p);
 639          printf ("/jump/0/%d", mcnt);
 640          break;
 641
 642        case succeed_n:
 643          extract_number_and_incr (&mcnt, &p);
 644          extract_number_and_incr (&mcnt2, &p);
 645          printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
 646          break;
 647
 648        case jump_n:
 649          extract_number_and_incr (&mcnt, &p);
 650          extract_number_and_incr (&mcnt2, &p);
 651          printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
 652          break;
 653
 654        case set_number_at:
 655          extract_number_and_incr (&mcnt, &p);
 656          extract_number_and_incr (&mcnt2, &p);
 657          printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
 658          break;
 659
 660        case wordbound:
 661          printf ("/wordbound");
 662          break;
 663
 664        case notwordbound:
 665          printf ("/notwordbound");
 666          break;
 667
 668        case wordbeg:
 669          printf ("/wordbeg");
 670          break;
 671
 672        case wordend:
 673          printf ("/wordend");
 674
 675#ifdef emacs
 676        case before_dot:
 677          printf ("/before_dot");
 678          break;
 679
 680        case at_dot:
 681          printf ("/at_dot");
 682          break;
 683
 684        case after_dot:
 685          printf ("/after_dot");
 686          break;
 687
 688        case syntaxspec:
 689          printf ("/syntaxspec");
 690          mcnt = *p++;
 691          printf ("/%d", mcnt);
 692          break;
 693
 694        case notsyntaxspec:
 695          printf ("/notsyntaxspec");
 696          mcnt = *p++;
 697          printf ("/%d", mcnt);
 698          break;
 699#endif /* emacs */
 700
 701        case wordchar:
 702          printf ("/wordchar");
 703          break;
 704
 705        case notwordchar:
 706          printf ("/notwordchar");
 707          break;
 708
 709        case begbuf:
 710          printf ("/begbuf");
 711          break;
 712
 713        case endbuf:
 714          printf ("/endbuf");
 715          break;
 716
 717        default:
 718          printf ("?%d", *(p-1));
 719        }
 720    }
 721  printf ("/\n");
 722}
 723
 724
 725void
 726print_compiled_pattern (bufp)
 727    struct re_pattern_buffer *bufp;
 728{
 729  unsigned char *buffer = bufp->buffer;
 730
 731  print_partial_compiled_pattern (buffer, buffer + bufp->used);
 732  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
 733
 734  if (bufp->fastmap_accurate && bufp->fastmap)
 735    {
 736      printf ("fastmap: ");
 737      print_fastmap (bufp->fastmap);
 738    }
 739
 740  printf ("re_nsub: %d\t", bufp->re_nsub);
 741  printf ("regs_alloc: %d\t", bufp->regs_allocated);
 742  printf ("can_be_null: %d\t", bufp->can_be_null);
 743  printf ("newline_anchor: %d\n", bufp->newline_anchor);
 744  printf ("no_sub: %d\t", bufp->no_sub);
 745  printf ("not_bol: %d\t", bufp->not_bol);
 746  printf ("not_eol: %d\t", bufp->not_eol);
 747  printf ("syntax: %d\n", bufp->syntax);
 748  /* Perhaps we should print the translate table?  */
 749}
 750
 751
 752void
 753print_double_string (where, string1, size1, string2, size2)
 754    const char *where;
 755    const char *string1;
 756    const char *string2;
 757    int size1;
 758    int size2;
 759{
 760  unsigned this_char;
 761
 762  if (where == NULL)
 763    printf ("(null)");
 764  else
 765    {
 766      if (FIRST_STRING_P (where))
 767        {
 768          for (this_char = where - string1; this_char < size1; this_char++)
 769            printchar (string1[this_char]);
 770
 771          where = string2;
 772        }
 773
 774      for (this_char = where - string2; this_char < size2; this_char++)
 775        printchar (string2[this_char]);
 776    }
 777}
 778
 779#else /* not DEBUG */
 780
 781#undef assert
 782#define assert(e)
 783
 784#define DEBUG_STATEMENT(e)
 785#define DEBUG_PRINT1(x)
 786#define DEBUG_PRINT2(x1, x2)
 787#define DEBUG_PRINT3(x1, x2, x3)
 788#define DEBUG_PRINT4(x1, x2, x3, x4)
 789#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
 790#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
 791
 792#endif /* not DEBUG */
 793
 794/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
 795   also be assigned to arbitrarily: each pattern buffer stores its own
 796   syntax, so it can be changed between regex compilations.  */
 797reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
 798
 799
 800/* Specify the precise syntax of regexps for compilation.  This provides
 801   for compatibility for various utilities which historically have
 802   different, incompatible syntaxes.
 803
 804   The argument SYNTAX is a bit mask comprised of the various bits
 805   defined in regex.h.  We return the old syntax.  */
 806
 807reg_syntax_t
 808re_set_syntax (syntax)
 809    reg_syntax_t syntax;
 810{
 811  reg_syntax_t ret = re_syntax_options;
 812
 813  re_syntax_options = syntax;
 814  return ret;
 815}
 816
 817/* This table gives an error message for each of the error codes listed
 818   in regex.h.  Obviously the order here has to be same as there.  */
 819
 820static const char *re_error_msg[] =
 821  { NULL,                                       /* REG_NOERROR */
 822    "No match",                                 /* REG_NOMATCH */
 823    "Invalid regular expression",               /* REG_BADPAT */
 824    "Invalid collation character",              /* REG_ECOLLATE */
 825    "Invalid character class name",             /* REG_ECTYPE */
 826    "Trailing backslash",                       /* REG_EESCAPE */
 827    "Invalid back reference",                   /* REG_ESUBREG */
 828    "Unmatched [ or [^",                        /* REG_EBRACK */
 829    "Unmatched ( or \\(",                       /* REG_EPAREN */
 830    "Unmatched \\{",                            /* REG_EBRACE */
 831    "Invalid content of \\{\\}",                /* REG_BADBR */
 832    "Invalid range end",                        /* REG_ERANGE */
 833    "Memory exhausted",                         /* REG_ESPACE */
 834    "Invalid preceding regular expression",     /* REG_BADRPT */
 835    "Premature end of regular expression",      /* REG_EEND */
 836    "Regular expression too big",               /* REG_ESIZE */
 837    "Unmatched ) or \\)",                       /* REG_ERPAREN */
 838  };
 839
 840/* Subroutine declarations and macros for regex_compile.  */
 841
 842static void store_op1 (), store_op2 ();
 843static void insert_op1 (), insert_op2 ();
 844static boolean at_begline_loc_p (), at_endline_loc_p ();
 845static boolean group_in_compile_stack ();
 846static reg_errcode_t compile_range ();
 847
 848/* Fetch the next character in the uncompiled pattern---translating it
 849   if necessary.  Also cast from a signed character in the constant
 850   string passed to us by the user to an unsigned char that we can use
 851   as an array index (in, e.g., `translate').  */
 852#define PATFETCH(c)                                                     \
 853  do {if (p == pend) return REG_EEND;                                   \
 854    c = (unsigned char) *p++;                                           \
 855    if (translate) c = translate[c];                                    \
 856  } while (0)
 857
 858/* Fetch the next character in the uncompiled pattern, with no
 859   translation.  */
 860#define PATFETCH_RAW(c)                                                 \
 861  do {if (p == pend) return REG_EEND;                                   \
 862    c = (unsigned char) *p++;                                           \
 863  } while (0)
 864
 865/* Go backwards one character in the pattern.  */
 866#define PATUNFETCH p--
 867
 868
 869/* If `translate' is non-null, return translate[D], else just D.  We
 870   cast the subscript to translate because some data is declared as
 871   `char *', to avoid warnings when a string constant is passed.  But
 872   when we use a character as a subscript we must make it unsigned.  */
 873#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
 874
 875
 876/* Macros for outputting the compiled pattern into `buffer'.  */
 877
 878/* If the buffer isn't allocated when it comes in, use this.  */
 879#define INIT_BUF_SIZE  32
 880
 881/* Make sure we have at least N more bytes of space in buffer.  */
 882#define GET_BUFFER_SPACE(n)                                             \
 883    while (b - bufp->buffer + (n) > bufp->allocated)                    \
 884      EXTEND_BUFFER ()
 885
 886/* Make sure we have one more byte of buffer space and then add C to it.  */
 887#define BUF_PUSH(c)                                                     \
 888  do {                                                                  \
 889    GET_BUFFER_SPACE (1);                                               \
 890    *b++ = (unsigned char) (c);                                         \
 891  } while (0)
 892
 893
 894/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
 895#define BUF_PUSH_2(c1, c2)                                              \
 896  do {                                                                  \
 897    GET_BUFFER_SPACE (2);                                               \
 898    *b++ = (unsigned char) (c1);                                        \
 899    *b++ = (unsigned char) (c2);                                        \
 900  } while (0)
 901
 902
 903/* As with BUF_PUSH_2, except for three bytes.  */
 904#define BUF_PUSH_3(c1, c2, c3)                                          \
 905  do {                                                                  \
 906    GET_BUFFER_SPACE (3);                                               \
 907    *b++ = (unsigned char) (c1);                                        \
 908    *b++ = (unsigned char) (c2);                                        \
 909    *b++ = (unsigned char) (c3);                                        \
 910  } while (0)
 911
 912
 913/* Store a jump with opcode OP at LOC to location TO.  We store a
 914   relative address offset by the three bytes the jump itself occupies.  */
 915#define STORE_JUMP(op, loc, to) \
 916  store_op1 (op, loc, (to) - (loc) - 3)
 917
 918/* Likewise, for a two-argument jump.  */
 919#define STORE_JUMP2(op, loc, to, arg) \
 920  store_op2 (op, loc, (to) - (loc) - 3, arg)
 921
 922/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
 923#define INSERT_JUMP(op, loc, to) \
 924  insert_op1 (op, loc, (to) - (loc) - 3, b)
 925
 926/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
 927#define INSERT_JUMP2(op, loc, to, arg) \
 928  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
 929
 930
 931/* This is not an arbitrary limit: the arguments which represent offsets
 932   into the pattern are two bytes long.  So if 2^16 bytes turns out to
 933   be too small, many things would have to change.  */
 934#define MAX_BUF_SIZE (1L << 16)
 935
 936
 937/* Extend the buffer by twice its current size via realloc and
 938   reset the pointers that pointed into the old block to point to the
 939   correct places in the new one.  If extending the buffer results in it
 940   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
 941#define EXTEND_BUFFER()                                                 \
 942  do {                                                                  \
 943    unsigned char *old_buffer = bufp->buffer;                           \
 944    if (bufp->allocated == MAX_BUF_SIZE)                                \
 945      return REG_ESIZE;                                                 \
 946    bufp->allocated <<= 1;                                              \
 947    if (bufp->allocated > MAX_BUF_SIZE)                                 \
 948      bufp->allocated = MAX_BUF_SIZE;                                   \
 949    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
 950    if (bufp->buffer == NULL)                                           \
 951      return REG_ESPACE;                                                \
 952    /* If the buffer moved, move all the pointers into it.  */          \
 953    if (old_buffer != bufp->buffer)                                     \
 954      {                                                                 \
 955        b = (b - old_buffer) + bufp->buffer;                            \
 956        begalt = (begalt - old_buffer) + bufp->buffer;                  \
 957        if (fixup_alt_jump)                                             \
 958          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
 959        if (laststart)                                                  \
 960          laststart = (laststart - old_buffer) + bufp->buffer;          \
 961        if (pending_exact)                                              \
 962          pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
 963      }                                                                 \
 964  } while (0)
 965
 966
 967/* Since we have one byte reserved for the register number argument to
 968   {start,stop}_memory, the maximum number of groups we can report
 969   things about is what fits in that byte.  */
 970#define MAX_REGNUM 255
 971
 972/* But patterns can have more than `MAX_REGNUM' registers.  We just
 973   ignore the excess.  */
 974typedef unsigned regnum_t;
 975
 976
 977/* Macros for the compile stack.  */
 978
 979/* Since offsets can go either forwards or backwards, this type needs to
 980   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
 981typedef int pattern_offset_t;
 982
 983typedef struct
 984{
 985  pattern_offset_t begalt_offset;
 986  pattern_offset_t fixup_alt_jump;
 987  pattern_offset_t inner_group_offset;
 988  pattern_offset_t laststart_offset;
 989  regnum_t regnum;
 990} compile_stack_elt_t;
 991
 992
 993typedef struct
 994{
 995  compile_stack_elt_t *stack;
 996  unsigned size;
 997  unsigned avail;                       /* Offset of next open position.  */
 998} compile_stack_type;
 999
1000
1001#define INIT_COMPILE_STACK_SIZE 32
1002
1003#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1004#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1005
1006/* The next available element.  */
1007#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1008
1009
1010/* Set the bit for character C in a list.  */
1011#define SET_LIST_BIT(c)                               \
1012  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1013   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1014
1015
1016/* Get the next unsigned number in the uncompiled pattern.  */
1017#define GET_UNSIGNED_NUMBER(num)                                        \
1018  { if (p != pend)                                                      \
1019     {                                                                  \
1020       PATFETCH (c);                                                    \
1021       while (ISDIGIT (c))                                              \
1022         {                                                              \
1023           if (num < 0)                                                 \
1024              num = 0;                                                  \
1025           num = num * 10 + c - '0';                                    \
1026           if (p == pend)                                               \
1027              break;                                                    \
1028           PATFETCH (c);                                                \
1029         }                                                              \
1030       }                                                                \
1031    }
1032
1033#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1034
1035#define IS_CHAR_CLASS(string)                                           \
1036   (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1037    || STREQ (string, "lower") || STREQ (string, "digit")               \
1038    || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1039    || STREQ (string, "space") || STREQ (string, "print")               \
1040    || STREQ (string, "punct") || STREQ (string, "graph")               \
1041    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1042
1043/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1044   Returns one of error codes defined in `regex.h', or zero for success.
1045
1046   Assumes the `allocated' (and perhaps `buffer') and `translate'
1047   fields are set in BUFP on entry.
1048
1049   If it succeeds, results are put in BUFP (if it returns an error, the
1050   contents of BUFP are undefined):
1051     `buffer' is the compiled pattern;
1052     `syntax' is set to SYNTAX;
1053     `used' is set to the length of the compiled pattern;
1054     `fastmap_accurate' is zero;
1055     `re_nsub' is the number of subexpressions in PATTERN;
1056     `not_bol' and `not_eol' are zero;
1057
1058   The `fastmap' and `newline_anchor' fields are neither
1059   examined nor set.  */
1060
1061static reg_errcode_t
1062regex_compile (pattern, size, syntax, bufp)
1063     const char *pattern;
1064     int size;
1065     reg_syntax_t syntax;
1066     struct re_pattern_buffer *bufp;
1067{
1068  /* We fetch characters from PATTERN here.  Even though PATTERN is
1069     `char *' (i.e., signed), we declare these variables as unsigned, so
1070     they can be reliably used as array indices.  */
1071  register unsigned char c, c1;
1072
1073  /* A random tempory spot in PATTERN.  */
1074  const char *p1;
1075
1076  /* Points to the end of the buffer, where we should append.  */
1077  register unsigned char *b;
1078
1079  /* Keeps track of unclosed groups.  */
1080  compile_stack_type compile_stack;
1081
1082  /* Points to the current (ending) position in the pattern.  */
1083  const char *p = pattern;
1084  const char *pend = pattern + size;
1085
1086  /* How to translate the characters in the pattern.  */
1087  char *translate = bufp->translate;
1088
1089  /* Address of the count-byte of the most recently inserted `exactn'
1090     command.  This makes it possible to tell if a new exact-match
1091     character can be added to that command or if the character requires
1092     a new `exactn' command.  */
1093  unsigned char *pending_exact = 0;
1094
1095  /* Address of start of the most recently finished expression.
1096     This tells, e.g., postfix * where to find the start of its
1097     operand.  Reset at the beginning of groups and alternatives.  */
1098  unsigned char *laststart = 0;
1099
1100  /* Address of beginning of regexp, or inside of last group.  */
1101  unsigned char *begalt;
1102
1103  /* Place in the uncompiled pattern (i.e., the {) to
1104     which to go back if the interval is invalid.  */
1105  const char *beg_interval;
1106
1107  /* Address of the place where a forward jump should go to the end of
1108     the containing expression.  Each alternative of an `or' -- except the
1109     last -- ends with a forward jump of this sort.  */
1110  unsigned char *fixup_alt_jump = 0;
1111
1112  /* Counts open-groups as they are encountered.  Remembered for the
1113     matching close-group on the compile stack, so the same register
1114     number is put in the stop_memory as the start_memory.  */
1115  regnum_t regnum = 0;
1116
1117#ifdef DEBUG
1118  DEBUG_PRINT1 ("\nCompiling pattern: ");
1119  if (debug)
1120    {
1121      unsigned debug_count;
1122
1123      for (debug_count = 0; debug_count < size; debug_count++)
1124        printchar (pattern[debug_count]);
1125      putchar ('\n');
1126    }
1127#endif /* DEBUG */
1128
1129  /* Initialize the compile stack.  */
1130  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1131  if (compile_stack.stack == NULL)
1132    return REG_ESPACE;
1133
1134  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1135  compile_stack.avail = 0;
1136
1137  /* Initialize the pattern buffer.  */
1138  bufp->syntax = syntax;
1139  bufp->fastmap_accurate = 0;
1140  bufp->not_bol = bufp->not_eol = 0;
1141
1142  /* Set `used' to zero, so that if we return an error, the pattern
1143     printer (for debugging) will think there's no pattern.  We reset it
1144     at the end.  */
1145  bufp->used = 0;
1146
1147  /* Always count groups, whether or not bufp->no_sub is set.  */
1148  bufp->re_nsub = 0;
1149
1150#if !defined (emacs) && !defined (SYNTAX_TABLE)
1151  /* Initialize the syntax table.  */
1152   init_syntax_once ();
1153#endif
1154
1155  if (bufp->allocated == 0)
1156    {
1157      if (bufp->buffer)
1158        { /* If zero allocated, but buffer is non-null, try to realloc
1159             enough space.  This loses if buffer's address is bogus, but
1160             that is the user's responsibility.  */
1161          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1162        }
1163      else
1164        { /* Caller did not allocate a buffer.  Do it for them.  */
1165          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1166        }
1167      if (!bufp->buffer) return REG_ESPACE;
1168
1169      bufp->allocated = INIT_BUF_SIZE;
1170    }
1171
1172  begalt = b = bufp->buffer;
1173
1174  /* Loop through the uncompiled pattern until we're at the end.  */
1175  while (p != pend)
1176    {
1177      PATFETCH (c);
1178
1179      switch (c)
1180        {
1181        case '^':
1182          {
1183            if (   /* If at start of pattern, it's an operator.  */
1184                   p == pattern + 1
1185                   /* If context independent, it's an operator.  */
1186                || syntax & RE_CONTEXT_INDEP_ANCHORS
1187                   /* Otherwise, depends on what's come before.  */
1188                || at_begline_loc_p (pattern, p, syntax))
1189              BUF_PUSH (begline);
1190            else
1191              goto normal_char;
1192          }
1193          break;
1194
1195
1196        case '$':
1197          {
1198            if (   /* If at end of pattern, it's an operator.  */
1199                   p == pend
1200                   /* If context independent, it's an operator.  */
1201                || syntax & RE_CONTEXT_INDEP_ANCHORS
1202                   /* Otherwise, depends on what's next.  */
1203                || at_endline_loc_p (p, pend, syntax))
1204               BUF_PUSH (endline);
1205             else
1206               goto normal_char;
1207           }
1208           break;
1209
1210
1211        case '+':
1212        case '?':
1213          if ((syntax & RE_BK_PLUS_QM)
1214              || (syntax & RE_LIMITED_OPS))
1215            goto normal_char;
1216        handle_plus:
1217        case '*':
1218          /* If there is no previous pattern... */
1219          if (!laststart)
1220            {
1221              if (syntax & RE_CONTEXT_INVALID_OPS)
1222                return REG_BADRPT;
1223              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1224                goto normal_char;
1225            }
1226
1227          {
1228            /* Are we optimizing this jump?  */
1229            boolean keep_string_p = false;
1230
1231            /* 1 means zero (many) matches is allowed.  */
1232            char zero_times_ok = 0, many_times_ok = 0;
1233
1234            /* If there is a sequence of repetition chars, collapse it
1235               down to just one (the right one).  We can't combine
1236               interval operators with these because of, e.g., `a{2}*',
1237               which should only match an even number of `a's.  */
1238
1239            for (;;)
1240              {
1241                zero_times_ok |= c != '+';
1242                many_times_ok |= c != '?';
1243
1244                if (p == pend)
1245                  break;
1246
1247                PATFETCH (c);
1248
1249                if (c == '*'
1250                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1251                  ;
1252
1253                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1254                  {
1255                    if (p == pend) return REG_EESCAPE;
1256
1257                    PATFETCH (c1);
1258                    if (!(c1 == '+' || c1 == '?'))
1259                      {
1260                        PATUNFETCH;
1261                        PATUNFETCH;
1262                        break;
1263                      }
1264
1265                    c = c1;
1266                  }
1267                else
1268                  {
1269                    PATUNFETCH;
1270                    break;
1271                  }
1272
1273                /* If we get here, we found another repeat character.  */
1274               }
1275
1276            /* Star, etc. applied to an empty pattern is equivalent
1277               to an empty pattern.  */
1278            if (!laststart)
1279              break;
1280
1281            /* Now we know whether or not zero matches is allowed
1282               and also whether or not two or more matches is allowed.  */
1283            if (many_times_ok)
1284              { /* More than one repetition is allowed, so put in at the
1285                   end a backward relative jump from `b' to before the next
1286                   jump we're going to put in below (which jumps from
1287                   laststart to after this jump).
1288
1289                   But if we are at the `*' in the exact sequence `.*\n',
1290                   insert an unconditional jump backwards to the .,
1291                   instead of the beginning of the loop.  This way we only
1292                   push a failure point once, instead of every time
1293                   through the loop.  */
1294                assert (p - 1 > pattern);
1295
1296                /* Allocate the space for the jump.  */
1297                GET_BUFFER_SPACE (3);
1298
1299                /* We know we are not at the first character of the pattern,
1300                   because laststart was nonzero.  And we've already
1301                   incremented `p', by the way, to be the character after
1302                   the `*'.  Do we have to do something analogous here
1303                   for null bytes, because of RE_DOT_NOT_NULL?  */
1304                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1305                    && zero_times_ok
1306                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1307                    && !(syntax & RE_DOT_NEWLINE))
1308                  { /* We have .*\n.  */
1309                    STORE_JUMP (jump, b, laststart);
1310                    keep_string_p = true;
1311                  }
1312                else
1313                  /* Anything else.  */
1314                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1315
1316                /* We've added more stuff to the buffer.  */
1317                b += 3;
1318              }
1319
1320            /* On failure, jump from laststart to b + 3, which will be the
1321               end of the buffer after this jump is inserted.  */
1322            GET_BUFFER_SPACE (3);
1323            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1324                                       : on_failure_jump,
1325                         laststart, b + 3);
1326            pending_exact = 0;
1327            b += 3;
1328
1329            if (!zero_times_ok)
1330              {
1331                /* At least one repetition is required, so insert a
1332                   `dummy_failure_jump' before the initial
1333                   `on_failure_jump' instruction of the loop. This
1334                   effects a skip over that instruction the first time
1335                   we hit that loop.  */
1336                GET_BUFFER_SPACE (3);
1337                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1338                b += 3;
1339              }
1340            }
1341          break;
1342
1343
1344        case '.':
1345          laststart = b;
1346          BUF_PUSH (anychar);
1347          break;
1348
1349
1350        case '[':
1351          {
1352            boolean had_char_class = false;
1353
1354            if (p == pend) return REG_EBRACK;
1355
1356            /* Ensure that we have enough space to push a charset: the
1357               opcode, the length count, and the bitset; 34 bytes in all.  */
1358            GET_BUFFER_SPACE (34);
1359
1360            laststart = b;
1361
1362            /* We test `*p == '^' twice, instead of using an if
1363               statement, so we only need one BUF_PUSH.  */
1364            BUF_PUSH (*p == '^' ? charset_not : charset);
1365            if (*p == '^')
1366              p++;
1367
1368            /* Remember the first position in the bracket expression.  */
1369            p1 = p;
1370
1371            /* Push the number of bytes in the bitmap.  */
1372            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1373
1374            /* Clear the whole map.  */
1375            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1376
1377            /* charset_not matches newline according to a syntax bit.  */
1378            if ((re_opcode_t) b[-2] == charset_not
1379                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1380              SET_LIST_BIT ('\n');
1381
1382            /* Read in characters and ranges, setting map bits.  */
1383            for (;;)
1384              {
1385                if (p == pend) return REG_EBRACK;
1386
1387                PATFETCH (c);
1388
1389                /* \ might escape characters inside [...] and [^...].  */
1390                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1391                  {
1392                    if (p == pend) return REG_EESCAPE;
1393
1394                    PATFETCH (c1);
1395                    SET_LIST_BIT (c1);
1396                    continue;
1397                  }
1398
1399                /* Could be the end of the bracket expression.  If it's
1400                   not (i.e., when the bracket expression is `[]' so
1401                   far), the ']' character bit gets set way below.  */
1402                if (c == ']' && p != p1 + 1)
1403                  break;
1404
1405                /* Look ahead to see if it's a range when the last thing
1406                   was a character class.  */
1407                if (had_char_class && c == '-' && *p != ']')
1408                  return REG_ERANGE;
1409
1410                /* Look ahead to see if it's a range when the last thing
1411                   was a character: if this is a hyphen not at the
1412                   beginning or the end of a list, then it's the range
1413                   operator.  */
1414                if (c == '-'
1415                    && !(p - 2 >= pattern && p[-2] == '[')
1416                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1417                    && *p != ']')
1418                  {
1419                    reg_errcode_t ret
1420                      = compile_range (&p, pend, translate, syntax, b);
1421                    if (ret != REG_NOERROR) return ret;
1422                  }
1423
1424                else if (p[0] == '-' && p[1] != ']')
1425                  { /* This handles ranges made up of characters only.  */
1426                    reg_errcode_t ret;
1427
1428                    /* Move past the `-'.  */
1429                    PATFETCH (c1);
1430
1431                    ret = compile_range (&p, pend, translate, syntax, b);
1432                    if (ret != REG_NOERROR) return ret;
1433                  }
1434
1435                /* See if we're at the beginning of a possible character
1436                   class.  */
1437
1438                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1439                  { /* Leave room for the null.  */
1440                    char str[CHAR_CLASS_MAX_LENGTH + 1];
1441
1442                    PATFETCH (c);
1443                    c1 = 0;
1444
1445                    /* If pattern is `[[:'.  */
1446                    if (p == pend) return REG_EBRACK;
1447
1448                    for (;;)
1449                      {
1450                        PATFETCH (c);
1451                        if (c == ':' || c == ']' || p == pend
1452                            || c1 == CHAR_CLASS_MAX_LENGTH)
1453                          break;
1454                        str[c1++] = c;
1455                      }
1456                    str[c1] = '\0';
1457
1458                    /* If isn't a word bracketed by `[:' and:`]':
1459                       undo the ending character, the letters, and leave
1460                       the leading `:' and `[' (but set bits for them).  */
1461                    if (c == ':' && *p == ']')
1462                      {
1463                        int ch;
1464                        boolean is_alnum = STREQ (str, "alnum");
1465                        boolean is_alpha = STREQ (str, "alpha");
1466                        boolean is_blank = STREQ (str, "blank");
1467                        boolean is_cntrl = STREQ (str, "cntrl");
1468                        boolean is_digit = STREQ (str, "digit");
1469                        boolean is_graph = STREQ (str, "graph");
1470                        boolean is_lower = STREQ (str, "lower");
1471                        boolean is_print = STREQ (str, "print");
1472                        boolean is_punct = STREQ (str, "punct");
1473                        boolean is_space = STREQ (str, "space");
1474                        boolean is_upper = STREQ (str, "upper");
1475                        boolean is_xdigit = STREQ (str, "xdigit");
1476
1477                        if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1478
1479                        /* Throw away the ] at the end of the character
1480                           class.  */
1481                        PATFETCH (c);
1482
1483                        if (p == pend) return REG_EBRACK;
1484
1485                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1486                          {
1487                            if (   (is_alnum  && ISALNUM (ch))
1488                                || (is_alpha  && ISALPHA (ch))
1489                                || (is_blank  && ISBLANK (ch))
1490                                || (is_cntrl  && ISCNTRL (ch))
1491                                || (is_digit  && ISDIGIT (ch))
1492                                || (is_graph  && ISGRAPH (ch))
1493                                || (is_lower  && ISLOWER (ch))
1494                                || (is_print  && ISPRINT (ch))
1495                                || (is_punct  && ISPUNCT (ch))
1496                                || (is_space  && ISSPACE (ch))
1497                                || (is_upper  && ISUPPER (ch))
1498                                || (is_xdigit && ISXDIGIT (ch)))
1499                            SET_LIST_BIT (ch);
1500                          }
1501                        had_char_class = true;
1502                      }
1503                    else
1504                      {
1505                        c1++;
1506                        while (c1--)
1507                          PATUNFETCH;
1508                        SET_LIST_BIT ('[');
1509                        SET_LIST_BIT (':');
1510                        had_char_class = false;
1511                      }
1512                  }
1513                else
1514                  {
1515                    had_char_class = false;
1516                    SET_LIST_BIT (c);
1517                  }
1518              }
1519
1520            /* Discard any (non)matching list bytes that are all 0 at the
1521               end of the map.  Decrease the map-length byte too.  */
1522            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1523              b[-1]--;
1524            b += b[-1];
1525          }
1526          break;
1527
1528
1529        case '(':
1530          if (syntax & RE_NO_BK_PARENS)
1531            goto handle_open;
1532          else
1533            goto normal_char;
1534
1535
1536        case ')':
1537          if (syntax & RE_NO_BK_PARENS)
1538            goto handle_close;
1539          else
1540            goto normal_char;
1541
1542
1543        case '\n':
1544          if (syntax & RE_NEWLINE_ALT)
1545            goto handle_alt;
1546          else
1547            goto normal_char;
1548
1549
1550        case '|':
1551          if (syntax & RE_NO_BK_VBAR)
1552            goto handle_alt;
1553          else
1554            goto normal_char;
1555
1556
1557        case '{':
1558           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1559             goto handle_interval;
1560           else
1561             goto normal_char;
1562
1563
1564        case '\\':
1565          if (p == pend) return REG_EESCAPE;
1566
1567          /* Do not translate the character after the \, so that we can
1568             distinguish, e.g., \B from \b, even if we normally would
1569             translate, e.g., B to b.  */
1570          PATFETCH_RAW (c);
1571
1572          switch (c)
1573            {
1574            case '(':
1575              if (syntax & RE_NO_BK_PARENS)
1576                goto normal_backslash;
1577
1578            handle_open:
1579              bufp->re_nsub++;
1580              regnum++;
1581
1582              if (COMPILE_STACK_FULL)
1583                {
1584                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
1585                            compile_stack_elt_t);
1586                  if (compile_stack.stack == NULL) return REG_ESPACE;
1587
1588                  compile_stack.size <<= 1;
1589                }
1590
1591              /* These are the values to restore when we hit end of this
1592                 group.  They are all relative offsets, so that if the
1593                 whole pattern moves because of realloc, they will still
1594                 be valid.  */
1595              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1596              COMPILE_STACK_TOP.fixup_alt_jump
1597                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1598              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1599              COMPILE_STACK_TOP.regnum = regnum;
1600
1601              /* We will eventually replace the 0 with the number of
1602                 groups inner to this one.  But do not push a
1603                 start_memory for groups beyond the last one we can
1604                 represent in the compiled pattern.  */
1605              if (regnum <= MAX_REGNUM)
1606                {
1607                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1608                  BUF_PUSH_3 (start_memory, regnum, 0);
1609                }
1610
1611              compile_stack.avail++;
1612
1613              fixup_alt_jump = 0;
1614              laststart = 0;
1615              begalt = b;
1616              /* If we've reached MAX_REGNUM groups, then this open
1617                 won't actually generate any code, so we'll have to
1618                 clear pending_exact explicitly.  */
1619              pending_exact = 0;
1620              break;
1621
1622
1623            case ')':
1624              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1625
1626              if (COMPILE_STACK_EMPTY)
1627                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1628                  goto normal_backslash;
1629                else
1630                  return REG_ERPAREN;
1631
1632            handle_close:
1633              if (fixup_alt_jump)
1634                { /* Push a dummy failure point at the end of the
1635                     alternative for a possible future
1636                     `pop_failure_jump' to pop.  See comments at
1637                     `push_dummy_failure' in `re_match_2'.  */
1638                  BUF_PUSH (push_dummy_failure);
1639
1640                  /* We allocated space for this jump when we assigned
1641                     to `fixup_alt_jump', in the `handle_alt' case below.  */
1642                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1643                }
1644
1645              /* See similar code for backslashed left paren above.  */
1646              if (COMPILE_STACK_EMPTY)
1647                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1648                  goto normal_char;
1649                else
1650                  return REG_ERPAREN;
1651
1652              /* Since we just checked for an empty stack above, this
1653                 ``can't happen''.  */
1654              assert (compile_stack.avail != 0);
1655              {
1656                /* We don't just want to restore into `regnum', because
1657                   later groups should continue to be numbered higher,
1658                   as in `(ab)c(de)' -- the second group is #2.  */
1659                regnum_t this_group_regnum;
1660
1661                compile_stack.avail--;
1662                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1663                fixup_alt_jump
1664                  = COMPILE_STACK_TOP.fixup_alt_jump
1665                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1666                    : 0;
1667                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1668                this_group_regnum = COMPILE_STACK_TOP.regnum;
1669                /* If we've reached MAX_REGNUM groups, then this open
1670                   won't actually generate any code, so we'll have to
1671                   clear pending_exact explicitly.  */
1672                pending_exact = 0;
1673
1674                /* We're at the end of the group, so now we know how many
1675                   groups were inside this one.  */
1676                if (this_group_regnum <= MAX_REGNUM)
1677                  {
1678                    unsigned char *inner_group_loc
1679                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1680
1681                    *inner_group_loc = regnum - this_group_regnum;
1682                    BUF_PUSH_3 (stop_memory, this_group_regnum,
1683                                regnum - this_group_regnum);
1684                  }
1685              }
1686              break;
1687
1688
1689            case '|':                                   /* `\|'.  */
1690              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1691                goto normal_backslash;
1692            handle_alt:
1693              if (syntax & RE_LIMITED_OPS)
1694                goto normal_char;
1695
1696              /* Insert before the previous alternative a jump which
1697                 jumps to this alternative if the former fails.  */
1698              GET_BUFFER_SPACE (3);
1699              INSERT_JUMP (on_failure_jump, begalt, b + 6);
1700              pending_exact = 0;
1701              b += 3;
1702
1703              /* The alternative before this one has a jump after it
1704                 which gets executed if it gets matched.  Adjust that
1705                 jump so it will jump to this alternative's analogous
1706                 jump (put in below, which in turn will jump to the next
1707                 (if any) alternative's such jump, etc.).  The last such
1708                 jump jumps to the correct final destination.  A picture:
1709                          _____ _____
1710                          |   | |   |
1711                          |   v |   v
1712                         a | b   | c
1713
1714                 If we are at `b', then fixup_alt_jump right now points to a
1715                 three-byte space after `a'.  We'll put in the jump, set
1716                 fixup_alt_jump to right after `b', and leave behind three
1717                 bytes which we'll fill in when we get to after `c'.  */
1718
1719              if (fixup_alt_jump)
1720                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1721
1722              /* Mark and leave space for a jump after this alternative,
1723                 to be filled in later either by next alternative or
1724                 when know we're at the end of a series of alternatives.  */
1725              fixup_alt_jump = b;
1726              GET_BUFFER_SPACE (3);
1727              b += 3;
1728
1729              laststart = 0;
1730              begalt = b;
1731              break;
1732
1733
1734            case '{':
1735              /* If \{ is a literal.  */
1736              if (!(syntax & RE_INTERVALS)
1737                     /* If we're at `\{' and it's not the open-interval
1738                        operator.  */
1739                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1740                  || (p - 2 == pattern  &&  p == pend))
1741                goto normal_backslash;
1742
1743            handle_interval:
1744              {
1745                /* If got here, then the syntax allows intervals.  */
1746
1747                /* At least (most) this many matches must be made.  */
1748                int lower_bound = -1, upper_bound = -1;
1749
1750                beg_interval = p - 1;
1751
1752                if (p == pend)
1753                  {
1754                    if (syntax & RE_NO_BK_BRACES)
1755                      goto unfetch_interval;
1756                    else
1757                      return REG_EBRACE;
1758                  }
1759
1760                GET_UNSIGNED_NUMBER (lower_bound);
1761
1762                if (c == ',')
1763                  {
1764                    GET_UNSIGNED_NUMBER (upper_bound);
1765                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1766                  }
1767                else
1768                  /* Interval such as `{1}' => match exactly once. */
1769                  upper_bound = lower_bound;
1770
1771                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1772                    || lower_bound > upper_bound)
1773                  {
1774                    if (syntax & RE_NO_BK_BRACES)
1775                      goto unfetch_interval;
1776                    else
1777                      return REG_BADBR;
1778                  }
1779
1780                if (!(syntax & RE_NO_BK_BRACES))
1781                  {
1782                    if (c != '\\') return REG_EBRACE;
1783
1784                    PATFETCH (c);
1785                  }
1786
1787                if (c != '}')
1788                  {
1789                    if (syntax & RE_NO_BK_BRACES)
1790                      goto unfetch_interval;
1791                    else
1792                      return REG_BADBR;
1793                  }
1794
1795                /* We just parsed a valid interval.  */
1796
1797                /* If it's invalid to have no preceding re.  */
1798                if (!laststart)
1799                  {
1800                    if (syntax & RE_CONTEXT_INVALID_OPS)
1801                      return REG_BADRPT;
1802                    else if (syntax & RE_CONTEXT_INDEP_OPS)
1803                      laststart = b;
1804                    else
1805                      goto unfetch_interval;
1806                  }
1807
1808                /* If the upper bound is zero, don't want to succeed at
1809                   all; jump from `laststart' to `b + 3', which will be
1810                   the end of the buffer after we insert the jump.  */
1811                 if (upper_bound == 0)
1812                   {
1813                     GET_BUFFER_SPACE (3);
1814                     INSERT_JUMP (jump, laststart, b + 3);
1815                     b += 3;
1816                   }
1817
1818                 /* Otherwise, we have a nontrivial interval.  When
1819                    we're all done, the pattern will look like:
1820                      set_number_at <jump count> <upper bound>
1821                      set_number_at <succeed_n count> <lower bound>
1822                      succeed_n <after jump addr> <succed_n count>
1823                      <body of loop>
1824                      jump_n <succeed_n addr> <jump count>
1825                    (The upper bound and `jump_n' are omitted if
1826                    `upper_bound' is 1, though.)  */
1827                 else
1828                   { /* If the upper bound is > 1, we need to insert
1829                        more at the end of the loop.  */
1830                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
1831
1832                     GET_BUFFER_SPACE (nbytes);
1833
1834                     /* Initialize lower bound of the `succeed_n', even
1835                        though it will be set during matching by its
1836                        attendant `set_number_at' (inserted next),
1837                        because `re_compile_fastmap' needs to know.
1838                        Jump to the `jump_n' we might insert below.  */
1839                     INSERT_JUMP2 (succeed_n, laststart,
1840                                   b + 5 + (upper_bound > 1) * 5,
1841                                   lower_bound);
1842                     b += 5;
1843
1844                     /* Code to initialize the lower bound.  Insert
1845                        before the `succeed_n'.  The `5' is the last two
1846                        bytes of this `set_number_at', plus 3 bytes of
1847                        the following `succeed_n'.  */
1848                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1849                     b += 5;
1850
1851                     if (upper_bound > 1)
1852                       { /* More than one repetition is allowed, so
1853                            append a backward jump to the `succeed_n'
1854                            that starts this interval.
1855
1856                            When we've reached this during matching,
1857                            we'll have matched the interval once, so
1858                            jump back only `upper_bound - 1' times.  */
1859                         STORE_JUMP2 (jump_n, b, laststart + 5,
1860                                      upper_bound - 1);
1861                         b += 5;
1862
1863                         /* The location we want to set is the second
1864                            parameter of the `jump_n'; that is `b-2' as
1865                            an absolute address.  `laststart' will be
1866                            the `set_number_at' we're about to insert;
1867                            `laststart+3' the number to set, the source
1868                            for the relative address.  But we are
1869                            inserting into the middle of the pattern --
1870                            so everything is getting moved up by 5.
1871                            Conclusion: (b - 2) - (laststart + 3) + 5,
1872                            i.e., b - laststart.
1873
1874                            We insert this at the beginning of the loop
1875                            so that if we fail during matching, we'll
1876                            reinitialize the bounds.  */
1877                         insert_op2 (set_number_at, laststart, b - laststart,
1878                                     upper_bound - 1, b);
1879                         b += 5;
1880                       }
1881                   }
1882                pending_exact = 0;
1883                beg_interval = NULL;
1884              }
1885              break;
1886
1887            unfetch_interval:
1888              /* If an invalid interval, match the characters as literals.  */
1889               assert (beg_interval);
1890               p = beg_interval;
1891               beg_interval = NULL;
1892
1893               /* normal_char and normal_backslash need `c'.  */
1894               PATFETCH (c);
1895
1896               if (!(syntax & RE_NO_BK_BRACES))
1897                 {
1898                   if (p > pattern  &&  p[-1] == '\\')
1899                     goto normal_backslash;
1900                 }
1901               goto normal_char;
1902
1903#ifdef emacs
1904            /* There is no way to specify the before_dot and after_dot
1905               operators.  rms says this is ok.  --karl  */
1906            case '=':
1907              BUF_PUSH (at_dot);
1908              break;
1909
1910            case 's':
1911              laststart = b;
1912              PATFETCH (c);
1913              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1914              break;
1915
1916            case 'S':
1917              laststart = b;
1918              PATFETCH (c);
1919              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1920              break;
1921#endif /* emacs */
1922
1923
1924            case 'w':
1925              laststart = b;
1926              BUF_PUSH (wordchar);
1927              break;
1928
1929
1930            case 'W':
1931              laststart = b;
1932              BUF_PUSH (notwordchar);
1933              break;
1934
1935
1936            case '<':
1937              BUF_PUSH (wordbeg);
1938              break;
1939
1940            case '>':
1941              BUF_PUSH (wordend);
1942              break;
1943
1944            case 'b':
1945              BUF_PUSH (wordbound);
1946              break;
1947
1948            case 'B':
1949              BUF_PUSH (notwordbound);
1950              break;
1951
1952            case '`':
1953              BUF_PUSH (begbuf);
1954              break;
1955
1956            case '\'':
1957              BUF_PUSH (endbuf);
1958              break;
1959
1960            case '1': case '2': case '3': case '4': case '5':
1961            case '6': case '7': case '8': case '9':
1962              if (syntax & RE_NO_BK_REFS)
1963                goto normal_char;
1964
1965              c1 = c - '0';
1966
1967              if (c1 > regnum)
1968                return REG_ESUBREG;
1969
1970              /* Can't back reference to a subexpression if inside of it.  */
1971              if (group_in_compile_stack (compile_stack, c1))
1972                goto normal_char;
1973
1974              laststart = b;
1975              BUF_PUSH_2 (duplicate, c1);
1976              break;
1977
1978
1979            case '+':
1980            case '?':
1981              if (syntax & RE_BK_PLUS_QM)
1982                goto handle_plus;
1983              else
1984                goto normal_backslash;
1985
1986            default:
1987            normal_backslash:
1988              /* You might think it would be useful for \ to mean
1989                 not to translate; but if we don't translate it
1990                 it will never match anything.  */
1991              c = TRANSLATE (c);
1992              goto normal_char;
1993            }
1994          break;
1995
1996
1997        default:
1998        /* Expects the character in `c'.  */
1999        normal_char:
2000              /* If no exactn currently being built.  */
2001          if (!pending_exact
2002
2003              /* If last exactn not at current position.  */
2004              || pending_exact + *pending_exact + 1 != b
2005
2006              /* We have only one byte following the exactn for the count.  */
2007              || *pending_exact == (1 << BYTEWIDTH) - 1
2008
2009              /* If followed by a repetition operator.  */
2010              || *p == '*' || *p == '^'
2011              || ((syntax & RE_BK_PLUS_QM)
2012                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2013                  : (*p == '+' || *p == '?'))
2014              || ((syntax & RE_INTERVALS)
2015                  && ((syntax & RE_NO_BK_BRACES)
2016                      ? *p == '{'
2017                      : (p[0] == '\\' && p[1] == '{'))))
2018            {
2019              /* Start building a new exactn.  */
2020
2021              laststart = b;
2022
2023              BUF_PUSH_2 (exactn, 0);
2024              pending_exact = b - 1;
2025            }
2026
2027          BUF_PUSH (c);
2028          (*pending_exact)++;
2029          break;
2030        } /* switch (c) */
2031    } /* while p != pend */
2032
2033
2034  /* Through the pattern now.  */
2035
2036  if (fixup_alt_jump)
2037    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2038
2039  if (!COMPILE_STACK_EMPTY)
2040    return REG_EPAREN;
2041
2042  free (compile_stack.stack);
2043
2044  /* We have succeeded; set the length of the buffer.  */
2045  bufp->used = b - bufp->buffer;
2046
2047#ifdef DEBUG
2048  if (debug)
2049    {
2050      DEBUG_PRINT1 ("\nCompiled pattern: ");
2051      print_compiled_pattern (bufp);
2052    }
2053#endif /* DEBUG */
2054
2055  return REG_NOERROR;
2056} /* regex_compile */
2057
2058/* Subroutines for `regex_compile'.  */
2059
2060/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2061
2062static void
2063store_op1 (op, loc, arg)
2064    re_opcode_t op;
2065    unsigned char *loc;
2066    int arg;
2067{
2068  *loc = (unsigned char) op;
2069  STORE_NUMBER (loc + 1, arg);
2070}
2071
2072
2073/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2074
2075static void
2076store_op2 (op, loc, arg1, arg2)
2077    re_opcode_t op;
2078    unsigned char *loc;
2079    int arg1, arg2;
2080{
2081  *loc = (unsigned char) op;
2082  STORE_NUMBER (loc + 1, arg1);
2083  STORE_NUMBER (loc + 3, arg2);
2084}
2085
2086
2087/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2088   for OP followed by two-byte integer parameter ARG.  */
2089
2090static void
2091insert_op1 (op, loc, arg, end)
2092    re_opcode_t op;
2093    unsigned char *loc;
2094    int arg;
2095    unsigned char *end;
2096{
2097  register unsigned char *pfrom = end;
2098  register unsigned char *pto = end + 3;
2099
2100  while (pfrom != loc)
2101    *--pto = *--pfrom;
2102
2103  store_op1 (op, loc, arg);
2104}
2105
2106
2107/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2108
2109static void
2110insert_op2 (op, loc, arg1, arg2, end)
2111    re_opcode_t op;
2112    unsigned char *loc;
2113    int arg1, arg2;
2114    unsigned char *end;
2115{
2116  register unsigned char *pfrom = end;
2117  register unsigned char *pto = end + 5;
2118
2119  while (pfrom != loc)
2120    *--pto = *--pfrom;
2121
2122  store_op2 (op, loc, arg1, arg2);
2123}
2124
2125
2126/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2127   after an alternative or a begin-subexpression.  We assume there is at
2128   least one character before the ^.  */
2129
2130static boolean
2131at_begline_loc_p (pattern, p, syntax)
2132    const char *pattern, *p;
2133    reg_syntax_t syntax;
2134{
2135  const char *prev = p - 2;
2136  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2137
2138  return
2139       /* After a subexpression?  */
2140       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2141       /* After an alternative?  */
2142    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2143}
2144
2145
2146/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2147   at least one character after the $, i.e., `P < PEND'.  */
2148