perl/regexec.c
<<
>>
Prefs
   1/*    regexec.c
   2 */
   3
   4/*
   5 *      One Ring to rule them all, One Ring to find them
   6 &
   7 *     [p.v of _The Lord of the Rings_, opening poem]
   8 *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
   9 *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
  10 */
  11
  12/* This file contains functions for executing a regular expression.  See
  13 * also regcomp.c which funnily enough, contains functions for compiling
  14 * a regular expression.
  15 *
  16 * This file is also copied at build time to ext/re/re_exec.c, where
  17 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  18 * This causes the main functions to be compiled under new names and with
  19 * debugging support added, which makes "use re 'debug'" work.
  20 */
  21
  22/* NOTE: this is derived from Henry Spencer's regexp code, and should not
  23 * confused with the original package (see point 3 below).  Thanks, Henry!
  24 */
  25
  26/* Additional note: this code is very heavily munged from Henry's version
  27 * in places.  In some spots I've traded clarity for efficiency, so don't
  28 * blame Henry for some of the lack of readability.
  29 */
  30
  31/* The names of the functions have been changed from regcomp and
  32 * regexec to  pregcomp and pregexec in order to avoid conflicts
  33 * with the POSIX routines of the same names.
  34*/
  35
  36#ifdef PERL_EXT_RE_BUILD
  37#include "re_top.h"
  38#endif
  39
  40/*
  41 * pregcomp and pregexec -- regsub and regerror are not used in perl
  42 *
  43 *      Copyright (c) 1986 by University of Toronto.
  44 *      Written by Henry Spencer.  Not derived from licensed software.
  45 *
  46 *      Permission is granted to anyone to use this software for any
  47 *      purpose on any computer system, and to redistribute it freely,
  48 *      subject to the following restrictions:
  49 *
  50 *      1. The author is not responsible for the consequences of use of
  51 *              this software, no matter how awful, even if they arise
  52 *              from defects in it.
  53 *
  54 *      2. The origin of this software must not be misrepresented, either
  55 *              by explicit claim or by omission.
  56 *
  57 *      3. Altered versions must be plainly marked as such, and must not
  58 *              be misrepresented as being the original software.
  59 *
  60 ****    Alterations to Henry's code are...
  61 ****
  62 ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  63 ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  64 ****    by Larry Wall and others
  65 ****
  66 ****    You may distribute under the terms of either the GNU General Public
  67 ****    License or the Artistic License, as specified in the README file.
  68 *
  69 * Beware that some of this code is subtly aware of the way operator
  70 * precedence is structured in regular expressions.  Serious changes in
  71 * regular-expression syntax might require a total rethink.
  72 */
  73#include "EXTERN.h"
  74#define PERL_IN_REGEXEC_C
  75#include "perl.h"
  76
  77#ifdef PERL_IN_XSUB_RE
  78#  include "re_comp.h"
  79#else
  80#  include "regcomp.h"
  81#endif
  82
  83#define RF_tainted      1               /* tainted information used? */
  84#define RF_warned       2               /* warned about big count? */
  85
  86#define RF_utf8         8               /* Pattern contains multibyte chars? */
  87
  88#define UTF ((PL_reg_flags & RF_utf8) != 0)
  89
  90#define RS_init         1               /* eval environment created */
  91#define RS_set          2               /* replsv value is set */
  92
  93#ifndef STATIC
  94#define STATIC  static
  95#endif
  96
  97#define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  98
  99/*
 100 * Forwards.
 101 */
 102
 103#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 105
 106#define HOPc(pos,off) \
 107        (char *)(PL_reg_match_utf8 \
 108            ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 109            : (U8*)(pos + off))
 110#define HOPBACKc(pos, off) \
 111        (char*)(PL_reg_match_utf8\
 112            ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 113            : (pos - off >= PL_bostr)           \
 114                ? (U8*)pos - off                \
 115                : NULL)
 116
 117#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 118#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 119
 120#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 121    if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 122#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 123#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 124#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 125#define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 126
 127/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 128
 129/* for use after a quantifier and before an EXACT-like node -- japhy */
 130/* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 131#define JUMPABLE(rn) (      \
 132    OP(rn) == OPEN ||       \
 133    (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 134    OP(rn) == EVAL ||   \
 135    OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 136    OP(rn) == PLUS || OP(rn) == MINMOD || \
 137    OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 138    (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 139)
 140#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 141
 142#define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 143
 144#if 0 
 145/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 146   we don't need this definition. */
 147#define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 148#define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 149#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 150
 151#else
 152/* ... so we use this as its faster. */
 153#define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 154#define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 155#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 156
 157#endif
 158
 159/*
 160  Search for mandatory following text node; for lookahead, the text must
 161  follow but for lookbehind (rn->flags != 0) we skip to the next step.
 162*/
 163#define FIND_NEXT_IMPT(rn) STMT_START { \
 164    while (JUMPABLE(rn)) { \
 165        const OPCODE type = OP(rn); \
 166        if (type == SUSPEND || PL_regkind[type] == CURLY) \
 167            rn = NEXTOPER(NEXTOPER(rn)); \
 168        else if (type == PLUS) \
 169            rn = NEXTOPER(rn); \
 170        else if (type == IFMATCH) \
 171            rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 172        else rn += NEXT_OFF(rn); \
 173    } \
 174} STMT_END 
 175
 176
 177static void restore_pos(pTHX_ void *arg);
 178
 179STATIC CHECKPOINT
 180S_regcppush(pTHX_ I32 parenfloor)
 181{
 182    dVAR;
 183    const int retval = PL_savestack_ix;
 184#define REGCP_PAREN_ELEMS 4
 185    const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 186    int p;
 187    GET_RE_DEBUG_FLAGS_DECL;
 188
 189    if (paren_elems_to_push < 0)
 190        Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 191
 192#define REGCP_OTHER_ELEMS 7
 193    SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 194    
 195    for (p = PL_regsize; p > parenfloor; p--) {
 196/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 197        SSPUSHINT(PL_regoffs[p].end);
 198        SSPUSHINT(PL_regoffs[p].start);
 199        SSPUSHPTR(PL_reg_start_tmp[p]);
 200        SSPUSHINT(p);
 201        DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 202          "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 203                      (UV)p, (IV)PL_regoffs[p].start,
 204                      (IV)(PL_reg_start_tmp[p] - PL_bostr),
 205                      (IV)PL_regoffs[p].end
 206        ));
 207    }
 208/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 209    SSPUSHPTR(PL_regoffs);
 210    SSPUSHINT(PL_regsize);
 211    SSPUSHINT(*PL_reglastparen);
 212    SSPUSHINT(*PL_reglastcloseparen);
 213    SSPUSHPTR(PL_reginput);
 214#define REGCP_FRAME_ELEMS 2
 215/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 216 * are needed for the regexp context stack bookkeeping. */
 217    SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 218    SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 219
 220    return retval;
 221}
 222
 223/* These are needed since we do not localize EVAL nodes: */
 224#define REGCP_SET(cp)                                           \
 225    DEBUG_STATE_r(                                              \
 226            PerlIO_printf(Perl_debug_log,                       \
 227                "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 228                (IV)PL_savestack_ix));                          \
 229    cp = PL_savestack_ix
 230
 231#define REGCP_UNWIND(cp)                                        \
 232    DEBUG_STATE_r(                                              \
 233        if (cp != PL_savestack_ix)                              \
 234            PerlIO_printf(Perl_debug_log,                       \
 235                "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 236                (IV)(cp), (IV)PL_savestack_ix));                \
 237    regcpblow(cp)
 238
 239STATIC char *
 240S_regcppop(pTHX_ const regexp *rex)
 241{
 242    dVAR;
 243    U32 i;
 244    char *input;
 245    GET_RE_DEBUG_FLAGS_DECL;
 246
 247    PERL_ARGS_ASSERT_REGCPPOP;
 248
 249    /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 250    i = SSPOPINT;
 251    assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 252    i = SSPOPINT; /* Parentheses elements to pop. */
 253    input = (char *) SSPOPPTR;
 254    *PL_reglastcloseparen = SSPOPINT;
 255    *PL_reglastparen = SSPOPINT;
 256    PL_regsize = SSPOPINT;
 257    PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 258
 259    
 260    /* Now restore the parentheses context. */
 261    for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 262         i > 0; i -= REGCP_PAREN_ELEMS) {
 263        I32 tmps;
 264        U32 paren = (U32)SSPOPINT;
 265        PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 266        PL_regoffs[paren].start = SSPOPINT;
 267        tmps = SSPOPINT;
 268        if (paren <= *PL_reglastparen)
 269            PL_regoffs[paren].end = tmps;
 270        DEBUG_BUFFERS_r(
 271            PerlIO_printf(Perl_debug_log,
 272                          "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 273                          (UV)paren, (IV)PL_regoffs[paren].start,
 274                          (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 275                          (IV)PL_regoffs[paren].end,
 276                          (paren > *PL_reglastparen ? "(no)" : ""));
 277        );
 278    }
 279    DEBUG_BUFFERS_r(
 280        if (*PL_reglastparen + 1 <= rex->nparens) {
 281            PerlIO_printf(Perl_debug_log,
 282                          "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 283                          (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 284        }
 285    );
 286#if 1
 287    /* It would seem that the similar code in regtry()
 288     * already takes care of this, and in fact it is in
 289     * a better location to since this code can #if 0-ed out
 290     * but the code in regtry() is needed or otherwise tests
 291     * requiring null fields (pat.t#187 and split.t#{13,14}
 292     * (as of patchlevel 7877)  will fail.  Then again,
 293     * this code seems to be necessary or otherwise
 294     * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 295     * --jhi updated by dapm */
 296    for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 297        if (i > PL_regsize)
 298            PL_regoffs[i].start = -1;
 299        PL_regoffs[i].end = -1;
 300    }
 301#endif
 302    return input;
 303}
 304
 305#define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 306
 307/*
 308 * pregexec and friends
 309 */
 310
 311#ifndef PERL_IN_XSUB_RE
 312/*
 313 - pregexec - match a regexp against a string
 314 */
 315I32
 316Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 317         char *strbeg, I32 minend, SV *screamer, U32 nosave)
 318/* strend: pointer to null at end of string */
 319/* strbeg: real beginning of string */
 320/* minend: end of match must be >=minend after stringarg. */
 321/* nosave: For optimizations. */
 322{
 323    PERL_ARGS_ASSERT_PREGEXEC;
 324
 325    return
 326        regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 327                      nosave ? 0 : REXEC_COPY_STR);
 328}
 329#endif
 330
 331/*
 332 * Need to implement the following flags for reg_anch:
 333 *
 334 * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 335 * USE_INTUIT_ML
 336 * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 337 * INTUIT_AUTORITATIVE_ML
 338 * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 339 * INTUIT_ONCE_ML
 340 *
 341 * Another flag for this function: SECOND_TIME (so that float substrs
 342 * with giant delta may be not rechecked).
 343 */
 344
 345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 346
 347/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 348   Otherwise, only SvCUR(sv) is used to get strbeg. */
 349
 350/* XXXX We assume that strpos is strbeg unless sv. */
 351
 352/* XXXX Some places assume that there is a fixed substring.
 353        An update may be needed if optimizer marks as "INTUITable"
 354        RExen without fixed substrings.  Similarly, it is assumed that
 355        lengths of all the strings are no more than minlen, thus they
 356        cannot come from lookahead.
 357        (Or minlen should take into account lookahead.) 
 358  NOTE: Some of this comment is not correct. minlen does now take account
 359  of lookahead/behind. Further research is required. -- demerphq
 360
 361*/
 362
 363/* A failure to find a constant substring means that there is no need to make
 364   an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 365   finding a substring too deep into the string means that less calls to
 366   regtry() should be needed.
 367
 368   REx compiler's optimizer found 4 possible hints:
 369        a) Anchored substring;
 370        b) Fixed substring;
 371        c) Whether we are anchored (beginning-of-line or \G);
 372        d) First node (of those at offset 0) which may distingush positions;
 373   We use a)b)d) and multiline-part of c), and try to find a position in the
 374   string which does not contradict any of them.
 375 */
 376
 377/* Most of decisions we do here should have been done at compile time.
 378   The nodes of the REx which we used for the search should have been
 379   deleted from the finite automaton. */
 380
 381char *
 382Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
 383                     char *strend, const U32 flags, re_scream_pos_data *data)
 384{
 385    dVAR;
 386    register I32 start_shift = 0;
 387    /* Should be nonnegative! */
 388    register I32 end_shift   = 0;
 389    register char *s;
 390    register SV *check;
 391    char *strbeg;
 392    char *t;
 393    const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 394    I32 ml_anch;
 395    register char *other_last = NULL;   /* other substr checked before this */
 396    char *check_at = NULL;              /* check substr found at this pos */
 397    const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 398    RXi_GET_DECL(prog,progi);
 399#ifdef DEBUGGING
 400    const char * const i_strpos = strpos;
 401#endif
 402    GET_RE_DEBUG_FLAGS_DECL;
 403
 404    PERL_ARGS_ASSERT_RE_INTUIT_START;
 405
 406    RX_MATCH_UTF8_set(prog,do_utf8);
 407
 408    if (RX_UTF8(prog)) {
 409        PL_reg_flags |= RF_utf8;
 410    }
 411    DEBUG_EXECUTE_r( 
 412        debug_start_match(prog, do_utf8, strpos, strend, 
 413            sv ? "Guessing start of match in sv for"
 414               : "Guessing start of match in string for");
 415              );
 416
 417    /* CHR_DIST() would be more correct here but it makes things slow. */
 418    if (prog->minlen > strend - strpos) {
 419        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 420                              "String too short... [re_intuit_start]\n"));
 421        goto fail;
 422    }
 423                
 424    strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 425    PL_regeol = strend;
 426    if (do_utf8) {
 427        if (!prog->check_utf8 && prog->check_substr)
 428            to_utf8_substr(prog);
 429        check = prog->check_utf8;
 430    } else {
 431        if (!prog->check_substr && prog->check_utf8)
 432            to_byte_substr(prog);
 433        check = prog->check_substr;
 434    }
 435    if (check == &PL_sv_undef) {
 436        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 437                "Non-utf8 string cannot match utf8 check string\n"));
 438        goto fail;
 439    }
 440    if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 441        ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 442                     || ( (prog->extflags & RXf_ANCH_BOL)
 443                          && !multiline ) );    /* Check after \n? */
 444
 445        if (!ml_anch) {
 446          if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 447                && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 448               /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 449               && sv && !SvROK(sv)
 450               && (strpos != strbeg)) {
 451              DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 452              goto fail;
 453          }
 454          if (prog->check_offset_min == prog->check_offset_max &&
 455              !(prog->extflags & RXf_CANY_SEEN)) {
 456            /* Substring at constant offset from beg-of-str... */
 457            I32 slen;
 458
 459            s = HOP3c(strpos, prog->check_offset_min, strend);
 460            
 461            if (SvTAIL(check)) {
 462                slen = SvCUR(check);    /* >= 1 */
 463
 464                if ( strend - s > slen || strend - s < slen - 1
 465                     || (strend - s == slen && strend[-1] != '\n')) {
 466                    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 467                    goto fail_finish;
 468                }
 469                /* Now should match s[0..slen-2] */
 470                slen--;
 471                if (slen && (*SvPVX_const(check) != *s
 472                             || (slen > 1
 473                                 && memNE(SvPVX_const(check), s, slen)))) {
 474                  report_neq:
 475                    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 476                    goto fail_finish;
 477                }
 478            }
 479            else if (*SvPVX_const(check) != *s
 480                     || ((slen = SvCUR(check)) > 1
 481                         && memNE(SvPVX_const(check), s, slen)))
 482                goto report_neq;
 483            check_at = s;
 484            goto success_at_start;
 485          }
 486        }
 487        /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 488        s = strpos;
 489        start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 490        end_shift = prog->check_end_shift;
 491        
 492        if (!ml_anch) {
 493            const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 494                                         - (SvTAIL(check) != 0);
 495            const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 496
 497            if (end_shift < eshift)
 498                end_shift = eshift;
 499        }
 500    }
 501    else {                              /* Can match at random position */
 502        ml_anch = 0;
 503        s = strpos;
 504        start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 505        end_shift = prog->check_end_shift;
 506        
 507        /* end shift should be non negative here */
 508    }
 509
 510#ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 511    if (end_shift < 0)
 512        Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 513                   (IV)end_shift, RX_PRECOMP(prog));
 514#endif
 515
 516  restart:
 517    /* Find a possible match in the region s..strend by looking for
 518       the "check" substring in the region corrected by start/end_shift. */
 519    
 520    {
 521        I32 srch_start_shift = start_shift;
 522        I32 srch_end_shift = end_shift;
 523        if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 524            srch_end_shift -= ((strbeg - s) - srch_start_shift); 
 525            srch_start_shift = strbeg - s;
 526        }
 527    DEBUG_OPTIMISE_MORE_r({
 528        PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 529            (IV)prog->check_offset_min,
 530            (IV)srch_start_shift,
 531            (IV)srch_end_shift, 
 532            (IV)prog->check_end_shift);
 533    });       
 534        
 535    if (flags & REXEC_SCREAM) {
 536        I32 p = -1;                     /* Internal iterator of scream. */
 537        I32 * const pp = data ? data->scream_pos : &p;
 538
 539        if (PL_screamfirst[BmRARE(check)] >= 0
 540            || ( BmRARE(check) == '\n'
 541                 && (BmPREVIOUS(check) == SvCUR(check) - 1)
 542                 && SvTAIL(check) ))
 543            s = screaminstr(sv, check,
 544                            srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 545        else
 546            goto fail_finish;
 547        /* we may be pointing at the wrong string */
 548        if (s && RXp_MATCH_COPIED(prog))
 549            s = strbeg + (s - SvPVX_const(sv));
 550        if (data)
 551            *data->scream_olds = s;
 552    }
 553    else {
 554        U8* start_point;
 555        U8* end_point;
 556        if (prog->extflags & RXf_CANY_SEEN) {
 557            start_point= (U8*)(s + srch_start_shift);
 558            end_point= (U8*)(strend - srch_end_shift);
 559        } else {
 560            start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 561            end_point= HOP3(strend, -srch_end_shift, strbeg);
 562        }
 563        DEBUG_OPTIMISE_MORE_r({
 564            PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n", 
 565                (int)(end_point - start_point),
 566                (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point), 
 567                start_point);
 568        });
 569
 570        s = fbm_instr( start_point, end_point,
 571                      check, multiline ? FBMrf_MULTILINE : 0);
 572    }
 573    }
 574    /* Update the count-of-usability, remove useless subpatterns,
 575        unshift s.  */
 576
 577    DEBUG_EXECUTE_r({
 578        RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0), 
 579            SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 580        PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 581                          (s ? "Found" : "Did not find"),
 582            (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) 
 583                ? "anchored" : "floating"),
 584            quoted,
 585            RE_SV_TAIL(check),
 586            (s ? " at offset " : "...\n") ); 
 587    });
 588
 589    if (!s)
 590        goto fail_finish;
 591    /* Finish the diagnostic message */
 592    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 593
 594    /* XXX dmq: first branch is for positive lookbehind...
 595       Our check string is offset from the beginning of the pattern.
 596       So we need to do any stclass tests offset forward from that 
 597       point. I think. :-(
 598     */
 599    
 600        
 601    
 602    check_at=s;
 603     
 604
 605    /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 606       Start with the other substr.
 607       XXXX no SCREAM optimization yet - and a very coarse implementation
 608       XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 609                *always* match.  Probably should be marked during compile...
 610       Probably it is right to do no SCREAM here...
 611     */
 612
 613    if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) 
 614                : (prog->float_substr && prog->anchored_substr)) 
 615    {
 616        /* Take into account the "other" substring. */
 617        /* XXXX May be hopelessly wrong for UTF... */
 618        if (!other_last)
 619            other_last = strpos;
 620        if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 621          do_other_anchored:
 622            {
 623                char * const last = HOP3c(s, -start_shift, strbeg);
 624                char *last1, *last2;
 625                char * const saved_s = s;
 626                SV* must;
 627
 628                t = s - prog->check_offset_max;
 629                if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 630                    && (!do_utf8
 631                        || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 632                            && t > strpos)))
 633                    NOOP;
 634                else
 635                    t = strpos;
 636                t = HOP3c(t, prog->anchored_offset, strend);
 637                if (t < other_last)     /* These positions already checked */
 638                    t = other_last;
 639                last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 640                if (last < last1)
 641                    last1 = last;
 642                /* XXXX It is not documented what units *_offsets are in.  
 643                   We assume bytes, but this is clearly wrong. 
 644                   Meaning this code needs to be carefully reviewed for errors.
 645                   dmq.
 646                  */
 647 
 648                /* On end-of-str: see comment below. */
 649                must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 650                if (must == &PL_sv_undef) {
 651                    s = (char*)NULL;
 652                    DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 653                }
 654                else
 655                    s = fbm_instr(
 656                        (unsigned char*)t,
 657                        HOP3(HOP3(last1, prog->anchored_offset, strend)
 658                                + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 659                        must,
 660                        multiline ? FBMrf_MULTILINE : 0
 661                    );
 662                DEBUG_EXECUTE_r({
 663                    RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0), 
 664                        SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 665                    PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 666                        (s ? "Found" : "Contradicts"),
 667                        quoted, RE_SV_TAIL(must));
 668                });                 
 669                
 670                            
 671                if (!s) {
 672                    if (last1 >= last2) {
 673                        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 674                                                ", giving up...\n"));
 675                        goto fail_finish;
 676                    }
 677                    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 678                        ", trying floating at offset %ld...\n",
 679                        (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 680                    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 681                    s = HOP3c(last, 1, strend);
 682                    goto restart;
 683                }
 684                else {
 685                    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 686                          (long)(s - i_strpos)));
 687                    t = HOP3c(s, -prog->anchored_offset, strbeg);
 688                    other_last = HOP3c(s, 1, strend);
 689                    s = saved_s;
 690                    if (t == strpos)
 691                        goto try_at_start;
 692                    goto try_at_offset;
 693                }
 694            }
 695        }
 696        else {          /* Take into account the floating substring. */
 697            char *last, *last1;
 698            char * const saved_s = s;
 699            SV* must;
 700
 701            t = HOP3c(s, -start_shift, strbeg);
 702            last1 = last =
 703                HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 704            if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 705                last = HOP3c(t, prog->float_max_offset, strend);
 706            s = HOP3c(t, prog->float_min_offset, strend);
 707            if (s < other_last)
 708                s = other_last;
 709 /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 710            must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 711            /* fbm_instr() takes into account exact value of end-of-str
 712               if the check is SvTAIL(ed).  Since false positives are OK,
 713               and end-of-str is not later than strend we are OK. */
 714            if (must == &PL_sv_undef) {
 715                s = (char*)NULL;
 716                DEBUG_r(must = prog->float_utf8);       /* for debug message */
 717            }
 718            else
 719                s = fbm_instr((unsigned char*)s,
 720                              (unsigned char*)last + SvCUR(must)
 721                                  - (SvTAIL(must)!=0),
 722                              must, multiline ? FBMrf_MULTILINE : 0);
 723            DEBUG_EXECUTE_r({
 724                RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0), 
 725                    SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 726                PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 727                    (s ? "Found" : "Contradicts"),
 728                    quoted, RE_SV_TAIL(must));
 729            });
 730            if (!s) {
 731                if (last1 == last) {
 732                    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 733                                            ", giving up...\n"));
 734                    goto fail_finish;
 735                }
 736                DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 737                    ", trying anchored starting at offset %ld...\n",
 738                    (long)(saved_s + 1 - i_strpos)));
 739                other_last = last;
 740                s = HOP3c(t, 1, strend);
 741                goto restart;
 742            }
 743            else {
 744                DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 745                      (long)(s - i_strpos)));
 746                other_last = s; /* Fix this later. --Hugo */
 747                s = saved_s;
 748                if (t == strpos)
 749                    goto try_at_start;
 750                goto try_at_offset;
 751            }
 752        }
 753    }
 754
 755    
 756    t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 757        
 758    DEBUG_OPTIMISE_MORE_r(
 759        PerlIO_printf(Perl_debug_log, 
 760            "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 761            (IV)prog->check_offset_min,
 762            (IV)prog->check_offset_max,
 763            (IV)(s-strpos),
 764            (IV)(t-strpos),
 765            (IV)(t-s),
 766            (IV)(strend-strpos)
 767        )
 768    );
 769
 770    if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 771        && (!do_utf8
 772            || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 773                 && t > strpos))) 
 774    {
 775        /* Fixed substring is found far enough so that the match
 776           cannot start at strpos. */
 777      try_at_offset:
 778        if (ml_anch && t[-1] != '\n') {
 779            /* Eventually fbm_*() should handle this, but often
 780               anchored_offset is not 0, so this check will not be wasted. */
 781            /* XXXX In the code below we prefer to look for "^" even in
 782               presence of anchored substrings.  And we search even
 783               beyond the found float position.  These pessimizations
 784               are historical artefacts only.  */
 785          find_anchor:
 786            while (t < strend - prog->minlen) {
 787                if (*t == '\n') {
 788                    if (t < check_at - prog->check_offset_min) {
 789                        if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 790                            /* Since we moved from the found position,
 791                               we definitely contradict the found anchored
 792                               substr.  Due to the above check we do not
 793                               contradict "check" substr.
 794                               Thus we can arrive here only if check substr
 795                               is float.  Redo checking for "other"=="fixed".
 796                             */
 797                            strpos = t + 1;                     
 798                            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 799                                PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 800                            goto do_other_anchored;
 801                        }
 802                        /* We don't contradict the found floating substring. */
 803                        /* XXXX Why not check for STCLASS? */
 804                        s = t + 1;
 805                        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 806                            PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 807                        goto set_useful;
 808                    }
 809                    /* Position contradicts check-string */
 810                    /* XXXX probably better to look for check-string
 811                       than for "\n", so one should lower the limit for t? */
 812                    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 813                        PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 814                    other_last = strpos = s = t + 1;
 815                    goto restart;
 816                }
 817                t++;
 818            }
 819            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 820                        PL_colors[0], PL_colors[1]));
 821            goto fail_finish;
 822        }
 823        else {
 824            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 825                        PL_colors[0], PL_colors[1]));
 826        }
 827        s = t;
 828      set_useful:
 829        ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 830    }
 831    else {
 832        /* The found string does not prohibit matching at strpos,
 833           - no optimization of calling REx engine can be performed,
 834           unless it was an MBOL and we are not after MBOL,
 835           or a future STCLASS check will fail this. */
 836      try_at_start:
 837        /* Even in this situation we may use MBOL flag if strpos is offset
 838           wrt the start of the string. */
 839        if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 840            && (strpos != strbeg) && strpos[-1] != '\n'
 841            /* May be due to an implicit anchor of m{.*foo}  */
 842            && !(prog->intflags & PREGf_IMPLICIT))
 843        {
 844            t = strpos;
 845            goto find_anchor;
 846        }
 847        DEBUG_EXECUTE_r( if (ml_anch)
 848            PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 849                          (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 850        );
 851      success_at_start:
 852        if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 853            && (do_utf8 ? (
 854                prog->check_utf8                /* Could be deleted already */
 855                && --BmUSEFUL(prog->check_utf8) < 0
 856                && (prog->check_utf8 == prog->float_utf8)
 857            ) : (
 858                prog->check_substr              /* Could be deleted already */
 859                && --BmUSEFUL(prog->check_substr) < 0
 860                && (prog->check_substr == prog->float_substr)
 861            )))
 862        {
 863            /* If flags & SOMETHING - do not do it many times on the same match */
 864            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 865            SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 866            if (do_utf8 ? prog->check_substr : prog->check_utf8)
 867                SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 868            prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 869            prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 870            check = NULL;                       /* abort */
 871            s = strpos;
 872            /* XXXX This is a remnant of the old implementation.  It
 873                    looks wasteful, since now INTUIT can use many
 874                    other heuristics. */
 875            prog->extflags &= ~RXf_USE_INTUIT;
 876        }
 877        else
 878            s = strpos;
 879    }
 880
 881    /* Last resort... */
 882    /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 883    /* trie stclasses are too expensive to use here, we are better off to
 884       leave it to regmatch itself */
 885    if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 886        /* minlen == 0 is possible if regstclass is \b or \B,
 887           and the fixed substr is ''$.
 888           Since minlen is already taken into account, s+1 is before strend;
 889           accidentally, minlen >= 1 guaranties no false positives at s + 1
 890           even for \b or \B.  But (minlen? 1 : 0) below assumes that
 891           regstclass does not come from lookahead...  */
 892        /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 893           This leaves EXACTF only, which is dealt with in find_byclass().  */
 894        const U8* const str = (U8*)STRING(progi->regstclass);
 895        const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 896                    ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 897                    : 1);
 898        char * endpos;
 899        if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 900            endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 901        else if (prog->float_substr || prog->float_utf8)
 902            endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 903        else 
 904            endpos= strend;
 905                    
 906        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 907                                      (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 908        
 909        t = s;
 910        s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 911        if (!s) {
 912#ifdef DEBUGGING
 913            const char *what = NULL;
 914#endif
 915            if (endpos == strend) {
 916                DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 917                                "Could not match STCLASS...\n") );
 918                goto fail;
 919            }
 920            DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 921                                   "This position contradicts STCLASS...\n") );
 922            if ((prog->extflags & RXf_ANCH) && !ml_anch)
 923                goto fail;
 924            /* Contradict one of substrings */
 925            if (prog->anchored_substr || prog->anchored_utf8) {
 926                if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 927                    DEBUG_EXECUTE_r( what = "anchored" );
 928                  hop_and_restart:
 929                    s = HOP3c(t, 1, strend);
 930                    if (s + start_shift + end_shift > strend) {
 931                        /* XXXX Should be taken into account earlier? */
 932                        DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 933                                               "Could not match STCLASS...\n") );
 934                        goto fail;
 935                    }
 936                    if (!check)
 937                        goto giveup;
 938                    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 939                                "Looking for %s substr starting at offset %ld...\n",
 940                                 what, (long)(s + start_shift - i_strpos)) );
 941                    goto restart;
 942                }
 943                /* Have both, check_string is floating */
 944                if (t + start_shift >= check_at) /* Contradicts floating=check */
 945                    goto retry_floating_check;
 946                /* Recheck anchored substring, but not floating... */
 947                s = check_at;
 948                if (!check)
 949                    goto giveup;
 950                DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 951                          "Looking for anchored substr starting at offset %ld...\n",
 952                          (long)(other_last - i_strpos)) );
 953                goto do_other_anchored;
 954            }
 955            /* Another way we could have checked stclass at the
 956               current position only: */
 957            if (ml_anch) {
 958                s = t = t + 1;
 959                if (!check)
 960                    goto giveup;
 961                DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 962                          "Looking for /%s^%s/m starting at offset %ld...\n",
 963                          PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 964                goto try_at_offset;
 965            }
 966            if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 967                goto fail;
 968            /* Check is floating subtring. */
 969          retry_floating_check:
 970            t = check_at - start_shift;
 971            DEBUG_EXECUTE_r( what = "floating" );
 972            goto hop_and_restart;
 973        }
 974        if (t != s) {
 975            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 976                        "By STCLASS: moving %ld --> %ld\n",
 977                                  (long)(t - i_strpos), (long)(s - i_strpos))
 978                   );
 979        }
 980        else {
 981            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 982                                  "Does not contradict STCLASS...\n"); 
 983                   );
 984        }
 985    }
 986  giveup:
 987    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 988                          PL_colors[4], (check ? "Guessed" : "Giving up"),
 989                          PL_colors[5], (long)(s - i_strpos)) );
 990    return s;
 991
 992  fail_finish:                          /* Substring not found */
 993    if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 994        BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 995  fail:
 996    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 997                          PL_colors[4], PL_colors[5]));
 998    return NULL;
 999}
1000
1001#define DECL_TRIE_TYPE(scan) \
1002    const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1003                    trie_type = (scan->flags != EXACT) \
1004                              ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1005                              : (do_utf8 ? trie_utf8 : trie_plain)
1006
1007#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1008uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1009    UV uvc_unfolded = 0;                                                    \
1010    switch (trie_type) {                                                    \
1011    case trie_utf8_fold:                                                    \
1012        if ( foldlen>0 ) {                                                  \
1013            uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1014            foldlen -= len;                                                 \
1015            uscan += len;                                                   \
1016            len=0;                                                          \
1017        } else {                                                            \
1018            uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1019            uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1020            foldlen -= UNISKIP( uvc );                                      \
1021            uscan = foldbuf + UNISKIP( uvc );                               \
1022        }                                                                   \
1023        break;                                                              \
1024    case trie_latin_utf8_fold:                                              \
1025        if ( foldlen>0 ) {                                                  \
1026            uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1027            foldlen -= len;                                                 \
1028            uscan += len;                                                   \
1029            len=0;                                                          \
1030        } else {                                                            \
1031            len = 1;                                                        \
1032            uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1033            foldlen -= UNISKIP( uvc );                                      \
1034            uscan = foldbuf + UNISKIP( uvc );                               \
1035        }                                                                   \
1036        break;                                                              \
1037    case trie_utf8:                                                         \
1038        uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1039        break;                                                              \
1040    case trie_plain:                                                        \
1041        uvc = (UV)*uc;                                                      \
1042        len = 1;                                                            \
1043    }                                                                       \
1044                                                                            \
1045    if (uvc < 256) {                                                        \
1046        charid = trie->charmap[ uvc ];                                      \
1047    }                                                                       \
1048    else {                                                                  \
1049        charid = 0;                                                         \
1050        if (widecharmap) {                                                  \
1051            SV** const svpp = hv_fetch(widecharmap,                         \
1052                        (char*)&uvc, sizeof(UV), 0);                        \
1053            if (svpp)                                                       \
1054                charid = (U16)SvIV(*svpp);                                  \
1055        }                                                                   \
1056    }                                                                       \
1057    if (!charid && trie_type == trie_utf8_fold && !UTF) {                   \
1058        charid = trie->charmap[uvc_unfolded];                               \
1059    }                                                                       \
1060} STMT_END
1061
1062#define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1063{                                                      \
1064    char *my_strend= (char *)strend;                   \
1065    if ( (CoNd)                                        \
1066         && (ln == len ||                              \
1067             !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1068                        m, NULL, ln, (bool)UTF))       \
1069         && (!reginfo || regtry(reginfo, &s)) )        \
1070        goto got_it;                                   \
1071    else {                                             \
1072         U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1073         uvchr_to_utf8(tmpbuf, c);                     \
1074         f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1075         if ( f != c                                   \
1076              && (f == c1 || f == c2)                  \
1077              && (ln == len ||                         \
1078                !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1079                              m, NULL, ln, (bool)UTF)) \
1080              && (!reginfo || regtry(reginfo, &s)) )   \
1081              goto got_it;                             \
1082    }                                                  \
1083}                                                      \
1084s += len
1085
1086#define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1087STMT_START {                                              \
1088    while (s <= e) {                                      \
1089        if ( (CoNd)                                       \
1090             && (ln == 1 || !(OP(c) == EXACTF             \
1091                              ? ibcmp(s, m, ln)           \
1092                              : ibcmp_locale(s, m, ln)))  \
1093             && (!reginfo || regtry(reginfo, &s)) )        \
1094            goto got_it;                                  \
1095        s++;                                              \
1096    }                                                     \
1097} STMT_END
1098
1099#define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1100STMT_START {                                          \
1101    while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1102        CoDe                                          \
1103        s += uskip;                                   \
1104    }                                                 \
1105} STMT_END
1106
1107#define REXEC_FBC_SCAN(CoDe)                          \
1108STMT_START {                                          \
1109    while (s < strend) {                              \
1110        CoDe                                          \
1111        s++;                                          \
1112    }                                                 \
1113} STMT_END
1114
1115#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1116REXEC_FBC_UTF8_SCAN(                                  \
1117    if (CoNd) {                                       \
1118        if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1119            goto got_it;                              \
1120        else                                          \
1121            tmp = doevery;                            \
1122    }                                                 \
1123    else                                              \
1124        tmp = 1;                                      \
1125)
1126
1127#define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1128REXEC_FBC_SCAN(                                       \
1129    if (CoNd) {                                       \
1130        if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1131            goto got_it;                              \
1132        else                                          \
1133            tmp = doevery;                            \
1134    }                                                 \
1135    else                                              \
1136        tmp = 1;                                      \
1137)
1138
1139#define REXEC_FBC_TRYIT               \
1140if ((!reginfo || regtry(reginfo, &s))) \
1141    goto got_it
1142
1143#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1144    if (do_utf8) {                                             \
1145        REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1146    }                                                          \
1147    else {                                                     \
1148        REXEC_FBC_CLASS_SCAN(CoNd);                            \
1149    }                                                          \
1150    break
1151    
1152#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1153    if (do_utf8) {                                             \
1154        UtFpReLoAd;                                            \
1155        REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1156    }                                                          \
1157    else {                                                     \
1158        REXEC_FBC_CLASS_SCAN(CoNd);                            \
1159    }                                                          \
1160    break
1161
1162#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1163    PL_reg_flags |= RF_tainted;                                \
1164    if (do_utf8) {                                             \
1165        REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1166    }                                                          \
1167    else {                                                     \
1168        REXEC_FBC_CLASS_SCAN(CoNd);                            \
1169    }                                                          \
1170    break
1171
1172#define DUMP_EXEC_POS(li,s,doutf8) \
1173    dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1174
1175/* We know what class REx starts with.  Try to find this position... */
1176/* if reginfo is NULL, its a dryrun */
1177/* annoyingly all the vars in this routine have different names from their counterparts
1178   in regmatch. /grrr */
1179
1180STATIC char *
1181S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
1182    const char *strend, regmatch_info *reginfo)
1183{
1184        dVAR;
1185        const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1186        char *m;
1187        STRLEN ln;
1188        STRLEN lnc;
1189        register STRLEN uskip;
1190        unsigned int c1;
1191        unsigned int c2;
1192        char *e;
1193        register I32 tmp = 1;   /* Scratch variable? */
1194        register const bool do_utf8 = PL_reg_match_utf8;
1195        RXi_GET_DECL(prog,progi);
1196
1197        PERL_ARGS_ASSERT_FIND_BYCLASS;
1198        
1199        /* We know what class it must start with. */
1200        switch (OP(c)) {
1201        case ANYOF:
1202            if (do_utf8) {
1203                 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1204                          !UTF8_IS_INVARIANT((U8)s[0]) ?
1205                          reginclass(prog, c, (U8*)s, 0, do_utf8) :
1206                          REGINCLASS(prog, c, (U8*)s));
1207            }
1208            else {
1209                 while (s < strend) {
1210                      STRLEN skip = 1;
1211
1212                      if (REGINCLASS(prog, c, (U8*)s) ||
1213                          (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1214                           /* The assignment of 2 is intentional:
1215                            * for the folded sharp s, the skip is 2. */
1216                           (skip = SHARP_S_SKIP))) {
1217                           if (tmp && (!reginfo || regtry(reginfo, &s)))
1218                                goto got_it;
1219                           else
1220                                tmp = doevery;
1221                      }
1222                      else 
1223                           tmp = 1;
1224                      s += skip;
1225                 }
1226            }
1227            break;
1228        case CANY:
1229            REXEC_FBC_SCAN(
1230                if (tmp && (!reginfo || regtry(reginfo, &s)))
1231                    goto got_it;
1232                else
1233                    tmp = doevery;
1234            );
1235            break;
1236        case EXACTF:
1237            m   = STRING(c);
1238            ln  = STR_LEN(c);   /* length to match in octets/bytes */
1239            lnc = (I32) ln;     /* length to match in characters */
1240            if (UTF) {
1241                STRLEN ulen1, ulen2;
1242                U8 *sm = (U8 *) m;
1243                U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1244                U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1245                /* used by commented-out code below */
1246                /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1247                
1248                /* XXX: Since the node will be case folded at compile
1249                   time this logic is a little odd, although im not 
1250                   sure that its actually wrong. --dmq */
1251                   
1252                c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1253                c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1254
1255                /* XXX: This is kinda strange. to_utf8_XYZ returns the 
1256                   codepoint of the first character in the converted
1257                   form, yet originally we did the extra step. 
1258                   No tests fail by commenting this code out however
1259                   so Ive left it out. -- dmq.
1260                   
1261                c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE, 
1262                                    0, uniflags);
1263                c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1264                                    0, uniflags);
1265                */
1266                
1267                lnc = 0;
1268                while (sm < ((U8 *) m + ln)) {
1269                    lnc++;
1270                    sm += UTF8SKIP(sm);
1271                }
1272            }
1273            else {
1274                c1 = *(U8*)m;
1275                c2 = PL_fold[c1];
1276            }
1277            goto do_exactf;
1278        case EXACTFL:
1279            m   = STRING(c);
1280            ln  = STR_LEN(c);
1281            lnc = (I32) ln;
1282            c1 = *(U8*)m;
1283            c2 = PL_fold_locale[c1];
1284          do_exactf:
1285            e = HOP3c(strend, -((I32)lnc), s);
1286
1287            if (!reginfo && e < s)
1288                e = s;                  /* Due to minlen logic of intuit() */
1289
1290            /* The idea in the EXACTF* cases is to first find the
1291             * first character of the EXACTF* node and then, if
1292             * necessary, case-insensitively compare the full
1293             * text of the node.  The c1 and c2 are the first
1294             * characters (though in Unicode it gets a bit
1295             * more complicated because there are more cases
1296             * than just upper and lower: one needs to use
1297             * the so-called folding case for case-insensitive
1298             * matching (called "loose matching" in Unicode).
1299             * ibcmp_utf8() will do just that. */
1300
1301            if (do_utf8 || UTF) {
1302                UV c, f;
1303                U8 tmpbuf [UTF8_MAXBYTES+1];
1304                STRLEN len = 1;
1305                STRLEN foldlen;
1306                const U32 uniflags = UTF8_ALLOW_DEFAULT;
1307                if (c1 == c2) {
1308                    /* Upper and lower of 1st char are equal -
1309                     * probably not a "letter". */
1310                    while (s <= e) {
1311                        if (do_utf8) {
1312                            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1313                                           uniflags);
1314                        } else {
1315                            c = *((U8*)s);
1316                        }                                         
1317                        REXEC_FBC_EXACTISH_CHECK(c == c1);
1318                    }
1319                }
1320                else {
1321                    while (s <= e) {
1322                        if (do_utf8) {
1323                            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1324                                           uniflags);
1325                        } else {
1326                            c = *((U8*)s);
1327                        }
1328
1329                        /* Handle some of the three Greek sigmas cases.
1330                         * Note that not all the possible combinations
1331                         * are handled here: some of them are handled
1332                         * by the standard folding rules, and some of
1333                         * them (the character class or ANYOF cases)
1334                         * are handled during compiletime in
1335                         * regexec.c:S_regclass(). */
1336                        if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1337                            c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1338                            c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1339
1340                        REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1341                    }
1342                }
1343            }
1344            else {
1345                /* Neither pattern nor string are UTF8 */
1346                if (c1 == c2)
1347                    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1348                else
1349                    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1350            }
1351            break;
1352        case BOUNDL:
1353            PL_reg_flags |= RF_tainted;
1354            /* FALL THROUGH */
1355        case BOUND:
1356            if (do_utf8) {
1357                if (s == PL_bostr)
1358                    tmp = '\n';
1359                else {
1360                    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1361                    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1362                }
1363                tmp = ((OP(c) == BOUND ?
1364                        isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1365                LOAD_UTF8_CHARCLASS_ALNUM();
1366                REXEC_FBC_UTF8_SCAN(
1367                    if (tmp == !(OP(c) == BOUND ?
1368                                 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1369                                 isALNUM_LC_utf8((U8*)s)))
1370                    {
1371                        tmp = !tmp;
1372                        REXEC_FBC_TRYIT;
1373                }
1374                );
1375            }
1376            else {
1377                tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1378                tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1379                REXEC_FBC_SCAN(
1380                    if (tmp ==
1381                        !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1382                        tmp = !tmp;
1383                        REXEC_FBC_TRYIT;
1384                }
1385                );
1386            }
1387            if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1388                goto got_it;
1389            break;
1390        case NBOUNDL:
1391            PL_reg_flags |= RF_tainted;
1392            /* FALL THROUGH */
1393        case NBOUND:
1394            if (do_utf8) {
1395                if (s == PL_bostr)
1396                    tmp = '\n';
1397                else {
1398                    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1399                    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1400                }
1401                tmp = ((OP(c) == NBOUND ?
1402                        isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1403                LOAD_UTF8_CHARCLASS_ALNUM();
1404                REXEC_FBC_UTF8_SCAN(
1405                    if (tmp == !(OP(c) == NBOUND ?
1406                                 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1407                                 isALNUM_LC_utf8((U8*)s)))
1408                        tmp = !tmp;
1409                    else REXEC_FBC_TRYIT;
1410                );
1411            }
1412            else {
1413                tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1414                tmp = ((OP(c) == NBOUND ?
1415                        isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1416                REXEC_FBC_SCAN(
1417                    if (tmp ==
1418                        !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1419                        tmp = !tmp;
1420                    else REXEC_FBC_TRYIT;
1421                );
1422            }
1423            if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1424                goto got_it;
1425            break;
1426        case ALNUM:
1427            REXEC_FBC_CSCAN_PRELOAD(
1428                LOAD_UTF8_CHARCLASS_ALNUM(),
1429                swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1430                isALNUM(*s)
1431            );
1432        case ALNUML:
1433            REXEC_FBC_CSCAN_TAINT(
1434                isALNUM_LC_utf8((U8*)s),
1435                isALNUM_LC(*s)
1436            );
1437        case NALNUM:
1438            REXEC_FBC_CSCAN_PRELOAD(
1439                LOAD_UTF8_CHARCLASS_ALNUM(),
1440                !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1441                !isALNUM(*s)
1442            );
1443        case NALNUML:
1444            REXEC_FBC_CSCAN_TAINT(
1445                !isALNUM_LC_utf8((U8*)s),
1446                !isALNUM_LC(*s)
1447            );
1448        case SPACE:
1449            REXEC_FBC_CSCAN_PRELOAD(
1450                LOAD_UTF8_CHARCLASS_SPACE(),
1451                *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1452                isSPACE(*s)
1453            );
1454        case SPACEL:
1455            REXEC_FBC_CSCAN_TAINT(
1456                *s == ' ' || isSPACE_LC_utf8((U8*)s),
1457                isSPACE_LC(*s)
1458            );
1459        case NSPACE:
1460            REXEC_FBC_CSCAN_PRELOAD(
1461                LOAD_UTF8_CHARCLASS_SPACE(),
1462                !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1463                !isSPACE(*s)
1464            );
1465        case NSPACEL:
1466            REXEC_FBC_CSCAN_TAINT(
1467                !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1468                !isSPACE_LC(*s)
1469            );
1470        case DIGIT:
1471            REXEC_FBC_CSCAN_PRELOAD(
1472                LOAD_UTF8_CHARCLASS_DIGIT(),
1473                swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1474                isDIGIT(*s)
1475            );
1476        case DIGITL:
1477            REXEC_FBC_CSCAN_TAINT(
1478                isDIGIT_LC_utf8((U8*)s),
1479                isDIGIT_LC(*s)
1480            );
1481        case NDIGIT:
1482            REXEC_FBC_CSCAN_PRELOAD(
1483                LOAD_UTF8_CHARCLASS_DIGIT(),
1484                !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1485                !isDIGIT(*s)
1486            );
1487        case NDIGITL:
1488            REXEC_FBC_CSCAN_TAINT(
1489                !isDIGIT_LC_utf8((U8*)s),
1490                !isDIGIT_LC(*s)
1491            );
1492        case LNBREAK:
1493            REXEC_FBC_CSCAN(
1494                is_LNBREAK_utf8(s),
1495                is_LNBREAK_latin1(s)
1496            );
1497        case VERTWS:
1498            REXEC_FBC_CSCAN(
1499                is_VERTWS_utf8(s),
1500                is_VERTWS_latin1(s)
1501            );
1502        case NVERTWS:
1503            REXEC_FBC_CSCAN(
1504                !is_VERTWS_utf8(s),
1505                !is_VERTWS_latin1(s)
1506            );
1507        case HORIZWS:
1508            REXEC_FBC_CSCAN(
1509                is_HORIZWS_utf8(s),
1510                is_HORIZWS_latin1(s)
1511            );
1512        case NHORIZWS:
1513            REXEC_FBC_CSCAN(
1514                !is_HORIZWS_utf8(s),
1515                !is_HORIZWS_latin1(s)
1516            );      
1517        case AHOCORASICKC:
1518        case AHOCORASICK: 
1519            {
1520                DECL_TRIE_TYPE(c);
1521                /* what trie are we using right now */
1522                reg_ac_data *aho
1523                    = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1524                reg_trie_data *trie
1525                    = (reg_trie_data*)progi->data->data[ aho->trie ];
1526                HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1527
1528                const char *last_start = strend - trie->minlen;
1529#ifdef DEBUGGING
1530                const char *real_start = s;
1531#endif
1532                STRLEN maxlen = trie->maxlen;
1533                SV *sv_points;
1534                U8 **points; /* map of where we were in the input string
1535                                when reading a given char. For ASCII this
1536                                is unnecessary overhead as the relationship
1537                                is always 1:1, but for Unicode, especially
1538                                case folded Unicode this is not true. */
1539                U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1540                U8 *bitmap=NULL;
1541
1542
1543                GET_RE_DEBUG_FLAGS_DECL;
1544
1545                /* We can't just allocate points here. We need to wrap it in
1546                 * an SV so it gets freed properly if there is a croak while
1547                 * running the match */
1548                ENTER;
1549                SAVETMPS;
1550                sv_points=newSV(maxlen * sizeof(U8 *));
1551                SvCUR_set(sv_points,
1552                    maxlen * sizeof(U8 *));
1553                SvPOK_on(sv_points);
1554                sv_2mortal(sv_points);
1555                points=(U8**)SvPV_nolen(sv_points );
1556                if ( trie_type != trie_utf8_fold 
1557                     && (trie->bitmap || OP(c)==AHOCORASICKC) ) 
1558                {
1559                    if (trie->bitmap) 
1560                        bitmap=(U8*)trie->bitmap;
1561                    else
1562                        bitmap=(U8*)ANYOF_BITMAP(c);
1563                }
1564                /* this is the Aho-Corasick algorithm modified a touch
1565                   to include special handling for long "unknown char" 
1566                   sequences. The basic idea being that we use AC as long
1567                   as we are dealing with a possible matching char, when
1568                   we encounter an unknown char (and we have not encountered
1569                   an accepting state) we scan forward until we find a legal 
1570                   starting char. 
1571                   AC matching is basically that of trie matching, except
1572                   that when we encounter a failing transition, we fall back
1573                   to the current states "fail state", and try the current char 
1574                   again, a process we repeat until we reach the root state, 
1575                   state 1, or a legal transition. If we fail on the root state 
1576                   then we can either terminate if we have reached an accepting 
1577                   state previously, or restart the entire process from the beginning 
1578                   if we have not.
1579
1580                 */
1581                while (s <= last_start) {
1582                    const U32 uniflags = UTF8_ALLOW_DEFAULT;
1583                    U8 *uc = (U8*)s;
1584                    U16 charid = 0;
1585                    U32 base = 1;
1586                    U32 state = 1;
1587                    UV uvc = 0;
1588                    STRLEN len = 0;
1589                    STRLEN foldlen = 0;
1590                    U8 *uscan = (U8*)NULL;
1591                    U8 *leftmost = NULL;
1592#ifdef DEBUGGING                    
1593                    U32 accepted_word= 0;
1594#endif
1595                    U32 pointpos = 0;
1596
1597                    while ( state && uc <= (U8*)strend ) {
1598                        int failed=0;
1599                        U32 word = aho->states[ state ].wordnum;
1600
1601                        if( state==1 ) {
1602                            if ( bitmap ) {
1603                                DEBUG_TRIE_EXECUTE_r(
1604                                    if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1605                                        dump_exec_pos( (char *)uc, c, strend, real_start, 
1606                                            (char *)uc, do_utf8 );
1607                                        PerlIO_printf( Perl_debug_log,
1608                                            " Scanning for legal start char...\n");
1609                                    }
1610                                );            
1611                                while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1612                                    uc++;
1613                                }
1614                                s= (char *)uc;
1615                            }
1616                            if (uc >(U8*)last_start) break;
1617                        }
1618                                            
1619                        if ( word ) {
1620                            U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1621                            if (!leftmost || lpos < leftmost) {
1622                                DEBUG_r(accepted_word=word);
1623                                leftmost= lpos;
1624                            }
1625                            if (base==0) break;
1626                            
1627                        }
1628                        points[pointpos++ % maxlen]= uc;
1629                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1630                                             uscan, len, uvc, charid, foldlen,
1631                                             foldbuf, uniflags);
1632                        DEBUG_TRIE_EXECUTE_r({
1633                            dump_exec_pos( (char *)uc, c, strend, real_start, 
1634                                s,   do_utf8 );
1635                            PerlIO_printf(Perl_debug_log,
1636                                " Charid:%3u CP:%4"UVxf" ",
1637                                 charid, uvc);
1638                        });
1639
1640                        do {
1641#ifdef DEBUGGING
1642                            word = aho->states[ state ].wordnum;
1643#endif
1644                            base = aho->states[ state ].trans.base;
1645
1646                            DEBUG_TRIE_EXECUTE_r({
1647                                if (failed) 
1648                                    dump_exec_pos( (char *)uc, c, strend, real_start, 
1649                                        s,   do_utf8 );
1650                                PerlIO_printf( Perl_debug_log,
1651                                    "%sState: %4"UVxf", word=%"UVxf,
1652                                    failed ? " Fail transition to " : "",
1653                                    (UV)state, (UV)word);
1654                            });
1655                            if ( base ) {
1656                                U32 tmp;
1657                                if (charid &&
1658                                     (base + charid > trie->uniquecharcount )
1659                                     && (base + charid - 1 - trie->uniquecharcount
1660                                            < trie->lasttrans)
1661                                     && trie->trans[base + charid - 1 -
1662                                            trie->uniquecharcount].check == state
1663                                     && (tmp=trie->trans[base + charid - 1 -
1664                                        trie->uniquecharcount ].next))
1665                                {
1666                                    DEBUG_TRIE_EXECUTE_r(
1667                                        PerlIO_printf( Perl_debug_log," - legal\n"));
1668                                    state = tmp;
1669                                    break;
1670                                }
1671                                else {
1672                                    DEBUG_TRIE_EXECUTE_r(
1673                                        PerlIO_printf( Perl_debug_log," - fail\n"));
1674                                    failed = 1;
1675                                    state = aho->fail[state];
1676                                }
1677                            }
1678                            else {
1679                                /* we must be accepting here */
1680                                DEBUG_TRIE_EXECUTE_r(
1681                                        PerlIO_printf( Perl_debug_log," - accepting\n"));
1682                                failed = 1;
1683                                break;
1684                            }
1685                        } while(state);
1686                        uc += len;
1687                        if (failed) {
1688                            if (leftmost)
1689                                break;
1690                            if (!state) state = 1;
1691                        }
1692                    }
1693                    if ( aho->states[ state ].wordnum ) {
1694                        U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1695                        if (!leftmost || lpos < leftmost) {
1696                            DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1697                            leftmost = lpos;
1698                        }
1699                    }
1700                    if (leftmost) {
1701                        s = (char*)leftmost;
1702                        DEBUG_TRIE_EXECUTE_r({
1703                            PerlIO_printf( 
1704                                Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1705                                (UV)accepted_word, (IV)(s - real_start)
1706                            );
1707                        });
1708                        if (!reginfo || regtry(reginfo, &s)) {
1709                            FREETMPS;
1710                            LEAVE;
1711                            goto got_it;
1712                        }
1713                        s = HOPc(s,1);
1714                        DEBUG_TRIE_EXECUTE_r({
1715                            PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1716                        });
1717                    } else {
1718                        DEBUG_TRIE_EXECUTE_r(
1719                            PerlIO_printf( Perl_debug_log,"No match.\n"));
1720                        break;
1721                    }
1722                }
1723                FREETMPS;
1724                LEAVE;
1725            }
1726            break;
1727        default:
1728            Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1729            break;
1730        }
1731        return 0;
1732      got_it:
1733        return s;
1734}
1735
1736static void 
1737S_swap_match_buff (pTHX_ regexp *prog)
1738{
1739    regexp_paren_pair *t;
1740
1741    PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1742
1743    if (!prog->swap) {
1744    /* We have to be careful. If the previous successful match
1745       was from this regex we don't want a subsequent paritally
1746       successful match to clobber the old results. 
1747       So when we detect this possibility we add a swap buffer
1748       to the re, and switch the buffer each match. If we fail
1749       we switch it back, otherwise we leave it swapped.
1750    */
1751        Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1752    }
1753    t = prog->swap;
1754    prog->swap = prog->offs;
1755    prog->offs = t;
1756}    
1757
1758
1759/*
1760 - regexec_flags - match a regexp against a string
1761 */
1762I32
1763Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
1764              char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1765/* strend: pointer to null at end of string */
1766/* strbeg: real beginning of string */
1767/* minend: end of match must be >=minend after stringarg. */
1768/* data: May be used for some additional optimizations. 
1769         Currently its only used, with a U32 cast, for transmitting 
1770         the ganch offset when doing a /g match. This will change */
1771/* nosave: For optimizations. */
1772{
1773    dVAR;
1774    /*register*/ char *s;
1775    register regnode *c;
1776    /*register*/ char *startpos = stringarg;
1777    I32 minlen;         /* must match at least this many chars */
1778    I32 dontbother = 0; /* how many characters not to try at end */
1779    I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1780    I32 scream_pos = -1;                /* Internal iterator of scream. */
1781    char *scream_olds = NULL;
1782    const bool do_utf8 = (bool)DO_UTF8(sv);
1783    I32 multiline;
1784    RXi_GET_DECL(prog,progi);
1785    regmatch_info reginfo;  /* create some info to pass to regtry etc */
1786    bool swap_on_fail = 0;
1787    GET_RE_DEBUG_FLAGS_DECL;
1788
1789    PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1790    PERL_UNUSED_ARG(data);
1791
1792    /* Be paranoid... */
1793    if (prog == NULL || startpos == NULL) {
1794        Perl_croak(aTHX_ "NULL regexp parameter");
1795        return 0;
1796    }
1797
1798    multiline = prog->extflags & RXf_PMf_MULTILINE;
1799    reginfo.prog = prog;
1800
1801    RX_MATCH_UTF8_set(prog, do_utf8);
1802    DEBUG_EXECUTE_r( 
1803        debug_start_match(prog, do_utf8, startpos, strend, 
1804        "Matching");
1805    );
1806
1807    minlen = prog->minlen;
1808    
1809    if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1810        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1811                              "String too short [regexec_flags]...\n"));
1812        goto phooey;
1813    }
1814
1815    
1816    /* Check validity of program. */
1817    if (UCHARAT(progi->program) != REG_MAGIC) {
1818        Perl_croak(aTHX_ "corrupted regexp program");
1819    }
1820
1821    PL_reg_flags = 0;
1822    PL_reg_eval_set = 0;
1823    PL_reg_maxiter = 0;
1824
1825    if (RX_UTF8(prog))
1826        PL_reg_flags |= RF_utf8;
1827
1828    /* Mark beginning of line for ^ and lookbehind. */
1829    reginfo.bol = startpos; /* XXX not used ??? */
1830    PL_bostr  = strbeg;
1831    reginfo.sv = sv;
1832
1833    /* Mark end of line for $ (and such) */
1834    PL_regeol = strend;
1835
1836    /* see how far we have to get to not match where we matched before */
1837    reginfo.till = startpos+minend;
1838
1839    /* If there is a "must appear" string, look for it. */
1840    s = startpos;
1841
1842    if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1843        MAGIC *mg;
1844
1845        if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1846            reginfo.ganch = startpos + prog->gofs;
1847        else if (sv && SvTYPE(sv) >= SVt_PVMG
1848                  && SvMAGIC(sv)
1849                  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1850                  && mg->mg_len >= 0) {
1851            reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1852            if (prog->extflags & RXf_ANCH_GPOS) {
1853                if (s > reginfo.ganch)
1854                    goto phooey;
1855                s = reginfo.ganch - prog->gofs;
1856            }
1857        }
1858        else if (data) {
1859            reginfo.ganch = strbeg + PTR2UV(data);
1860        } else                          /* pos() not defined */
1861            reginfo.ganch = strbeg;
1862    }
1863    if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1864        swap_on_fail = 1;
1865        swap_match_buff(prog); /* do we need a save destructor here for
1866                                  eval dies? */
1867    }
1868    if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1869        re_scream_pos_data d;
1870
1871        d.scream_olds = &scream_olds;
1872        d.scream_pos = &scream_pos;
1873        s = re_intuit_start(prog, sv, s, strend, flags, &d);
1874        if (!s) {
1875            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1876            goto phooey;        /* not present */
1877        }
1878    }
1879
1880
1881
1882    /* Simplest case:  anchored match need be tried only once. */
1883    /*  [unless only anchor is BOL and multiline is set] */
1884    if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1885        if (s == startpos && regtry(&reginfo, &startpos))
1886            goto got_it;
1887        else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1888                 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1889        {
1890            char *end;
1891
1892            if (minlen)
1893                dontbother = minlen - 1;
1894            end = HOP3c(strend, -dontbother, strbeg) - 1;
1895            /* for multiline we only have to try after newlines */
1896            if (prog->check_substr || prog->check_utf8) {
1897                if (s == startpos)
1898                    goto after_try;
1899                while (1) {
1900                    if (regtry(&reginfo, &s))
1901                        goto got_it;
1902                  after_try:
1903                    if (s > end)
1904                        goto phooey;
1905                    if (prog->extflags & RXf_USE_INTUIT) {
1906                        s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1907                        if (!s)
1908                            goto phooey;
1909                    }
1910                    else
1911                        s++;
1912                }               
1913            } else {
1914                if (s > startpos)
1915                    s--;
1916                while (s < end) {
1917                    if (*s++ == '\n') { /* don't need PL_utf8skip here */
1918                        if (regtry(&reginfo, &s))
1919                            goto got_it;
1920                    }
1921                }               
1922            }
1923        }
1924        goto phooey;
1925    } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK)) 
1926    {
1927        /* the warning about reginfo.ganch being used without intialization
1928           is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN 
1929           and we only enter this block when the same bit is set. */
1930        char *tmp_s = reginfo.ganch - prog->gofs;
1931        if (regtry(&reginfo, &tmp_s))
1932            goto got_it;
1933        goto phooey;
1934    }
1935
1936    /* Messy cases:  unanchored match. */
1937    if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1938        /* we have /x+whatever/ */
1939        /* it must be a one character string (XXXX Except UTF?) */
1940        char ch;
1941#ifdef DEBUGGING
1942        int did_match = 0;
1943#endif
1944        if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1945            do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1946        ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1947
1948        if (do_utf8) {
1949            REXEC_FBC_SCAN(
1950                if (*s == ch) {
1951                    DEBUG_EXECUTE_r( did_match = 1 );
1952                    if (regtry(&reginfo, &s)) goto got_it;
1953                    s += UTF8SKIP(s);
1954                    while (s < strend && *s == ch)
1955                        s += UTF8SKIP(s);
1956                }
1957            );
1958        }
1959        else {
1960            REXEC_FBC_SCAN(
1961                if (*s == ch) {
1962                    DEBUG_EXECUTE_r( did_match = 1 );
1963                    if (regtry(&reginfo, &s)) goto got_it;
1964                    s++;
1965                    while (s < strend && *s == ch)
1966                        s++;
1967                }
1968            );
1969        }
1970        DEBUG_EXECUTE_r(if (!did_match)
1971                PerlIO_printf(Perl_debug_log,
1972                                  "Did not find anchored character...\n")
1973               );
1974    }
1975    else if (prog->anchored_substr != NULL
1976              || prog->anchored_utf8 != NULL
1977              || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1978                  && prog->float_max_offset < strend - s)) {
1979        SV *must;
1980        I32 back_max;
1981        I32 back_min;
1982        char *last;
1983        char *last1;            /* Last position checked before */
1984#ifdef DEBUGGING
1985        int did_match = 0;
1986#endif
1987        if (prog->anchored_substr || prog->anchored_utf8) {
1988            if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1989                do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1990            must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1991            back_max = back_min = prog->anchored_offset;
1992        } else {
1993            if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1994                do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1995            must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1996            back_max = prog->float_max_offset;
1997            back_min = prog->float_min_offset;
1998        }
1999        
2000            
2001        if (must == &PL_sv_undef)
2002            /* could not downgrade utf8 check substring, so must fail */
2003            goto phooey;
2004
2005        if (back_min<0) {
2006            last = strend;
2007        } else {
2008            last = HOP3c(strend,        /* Cannot start after this */
2009                  -(I32)(CHR_SVLEN(must)
2010                         - (SvTAIL(must) != 0) + back_min), strbeg);
2011        }
2012        if (s > PL_bostr)
2013            last1 = HOPc(s, -1);
2014        else
2015            last1 = s - 1;      /* bogus */
2016
2017        /* XXXX check_substr already used to find "s", can optimize if
2018           check_substr==must. */
2019        scream_pos = -1;
2020        dontbother = end_shift;
2021        strend = HOPc(strend, -dontbother);
2022        while ( (s <= last) &&
2023                ((flags & REXEC_SCREAM)
2024                 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2025                                    end_shift, &scream_pos, 0))
2026                 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2027                                  (unsigned char*)strend, must,
2028                                  multiline ? FBMrf_MULTILINE : 0))) ) {
2029            /* we may be pointing at the wrong string */
2030            if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2031                s = strbeg + (s - SvPVX_const(sv));
2032            DEBUG_EXECUTE_r( did_match = 1 );
2033            if (HOPc(s, -back_max) > last1) {
2034                last1 = HOPc(s, -back_min);
2035                s = HOPc(s, -back_max);
2036            }
2037            else {
2038                char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2039
2040                last1 = HOPc(s, -back_min);
2041                s = t;
2042            }
2043            if (do_utf8) {
2044                while (s <= last1) {
2045                    if (regtry(&reginfo, &s))
2046                        goto got_it;
2047                    s += UTF8SKIP(s);
2048                }
2049            }
2050            else {
2051                while (s <= last1) {
2052                    if (regtry(&reginfo, &s))
2053                        goto got_it;
2054                    s++;
2055                }
2056            }
2057        }
2058        DEBUG_EXECUTE_r(if (!did_match) {
2059            RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0), 
2060                SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2061            PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2062                              ((must == prog->anchored_substr || must == prog->anchored_utf8)
2063                               ? "anchored" : "floating"),
2064                quoted, RE_SV_TAIL(must));
2065        });                 
2066        goto phooey;
2067    }
2068    else if ( (c = progi->regstclass) ) {
2069        if (minlen) {
2070            const OPCODE op = OP(progi->regstclass);
2071            /* don't bother with what can't match */
2072            if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2073                strend = HOPc(strend, -(minlen - 1));
2074        }
2075        DEBUG_EXECUTE_r({
2076            SV * const prop = sv_newmortal();
2077            regprop(prog, prop, c);
2078            {
2079                RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2080                    s,strend-s,60);
2081                PerlIO_printf(Perl_debug_log,
2082                    "Matching stclass %.*s against %s (%d chars)\n",
2083                    (int)SvCUR(prop), SvPVX_const(prop),
2084                     quoted, (int)(strend - s));
2085            }
2086        });
2087        if (find_byclass(prog, c, s, strend, &reginfo))
2088            goto got_it;
2089        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2090    }
2091    else {
2092        dontbother = 0;
2093        if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2094            /* Trim the end. */
2095            char *last;
2096            SV* float_real;
2097
2098            if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2099                do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2100            float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2101
2102            if (flags & REXEC_SCREAM) {
2103                last = screaminstr(sv, float_real, s - strbeg,
2104                                   end_shift, &scream_pos, 1); /* last one */
2105                if (!last)
2106                    last = scream_olds; /* Only one occurrence. */
2107                /* we may be pointing at the wrong string */
2108                else if (RXp_MATCH_COPIED(prog))
2109                    s = strbeg + (s - SvPVX_const(sv));
2110            }
2111            else {
2112                STRLEN len;
2113                const char * const little = SvPV_const(float_real, len);
2114
2115                if (SvTAIL(float_real)) {
2116                    if (memEQ(strend - len + 1, little, len - 1))
2117                        last = strend - len + 1;
2118                    else if (!multiline)
2119                        last = memEQ(strend - len, little, len)
2120                            ? strend - len : NULL;
2121                    else
2122                        goto find_last;
2123                } else {
2124                  find_last:
2125                    if (len)
2126                        last = rninstr(s, strend, little, little + len);
2127                    else
2128                        last = strend;  /* matching "$" */
2129                }
2130            }
2131            if (last == NULL) {
2132                DEBUG_EXECUTE_r(
2133                    PerlIO_printf(Perl_debug_log,
2134                        "%sCan't trim the tail, match fails (should not happen)%s\n",
2135                        PL_colors[4], PL_colors[5]));
2136                goto phooey; /* Should not happen! */
2137            }
2138            dontbother = strend - last + prog->float_min_offset;
2139        }
2140        if (minlen && (dontbother < minlen))
2141            dontbother = minlen - 1;
2142        strend -= dontbother;              /* this one's always in bytes! */
2143        /* We don't know much -- general case. */
2144        if (do_utf8) {
2145            for (;;) {
2146                if (regtry(&reginfo, &s))
2147                    goto got_it;
2148                if (s >= strend)
2149                    break;
2150                s += UTF8SKIP(s);
2151            };
2152        }
2153        else {
2154            do {
2155                if (regtry(&reginfo, &s))
2156                    goto got_it;
2157            } while (s++ < strend);
2158        }
2159    }
2160
2161    /* Failure. */
2162    goto phooey;
2163
2164got_it:
2165    RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2166
2167    if (PL_reg_eval_set)
2168        restore_pos(aTHX_ prog);
2169    if (RXp_PAREN_NAMES(prog)) 
2170        (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2171
2172    /* make sure $`, $&, $', and $digit will work later */
2173    if ( !(flags & REXEC_NOT_FIRST) ) {
2174        RX_MATCH_COPY_FREE(prog);
2175        if (flags & REXEC_COPY_STR) {
2176            const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2177#ifdef PERL_OLD_COPY_ON_WRITE
2178            if ((SvIsCOW(sv)
2179                 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2180                if (DEBUG_C_TEST) {
2181                    PerlIO_printf(Perl_debug_log,
2182                                  "Copy on write: regexp capture, type %d\n",
2183                                  (int) SvTYPE(sv));
2184                }
2185                prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2186                prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2187                assert (SvPOKp(prog->saved_copy));
2188            } else
2189#endif
2190            {
2191                RX_MATCH_COPIED_on(prog);
2192                s = savepvn(strbeg, i);
2193                prog->subbeg = s;
2194            }
2195            prog->sublen = i;
2196        }
2197        else {
2198            prog->subbeg = strbeg;
2199            prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2200        }
2201    }
2202
2203    return 1;
2204
2205phooey:
2206    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2207                          PL_colors[4], PL_colors[5]));
2208    if (PL_reg_eval_set)
2209        restore_pos(aTHX_ prog);
2210    if (swap_on_fail) 
2211        /* we failed :-( roll it back */
2212        swap_match_buff(prog);
2213    
2214    return 0;
2215}
2216
2217
2218/*
2219 - regtry - try match at specific point
2220 */
2221STATIC I32                      /* 0 failure, 1 success */
2222S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2223{
2224    dVAR;
2225    CHECKPOINT lastcp;
2226    regexp *prog = reginfo->prog;
2227    RXi_GET_DECL(prog,progi);
2228    GET_RE_DEBUG_FLAGS_DECL;
2229
2230    PERL_ARGS_ASSERT_REGTRY;
2231
2232    reginfo->cutpoint=NULL;
2233
2234    if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2235        MAGIC *mg;
2236
2237        PL_reg_eval_set = RS_init;
2238        DEBUG_EXECUTE_r(DEBUG_s(
2239            PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2240                          (IV)(PL_stack_sp - PL_stack_base));
2241            ));
2242        SAVESTACK_CXPOS();
2243        cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2244        /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2245        SAVETMPS;
2246        /* Apparently this is not needed, judging by wantarray. */
2247        /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2248           cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2249
2250        if (reginfo->sv) {
2251            /* Make $_ available to executed code. */
2252            if (reginfo->sv != DEFSV) {
2253                SAVE_DEFSV;
2254                DEFSV_set(reginfo->sv);
2255            }
2256        
2257            if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2258                  && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2259                /* prepare for quick setting of pos */
2260#ifdef PERL_OLD_COPY_ON_WRITE
2261                if (SvIsCOW(reginfo->sv))
2262                    sv_force_normal_flags(reginfo->sv, 0);
2263#endif
2264                mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2265                                 &PL_vtbl_mglob, NULL, 0);
2266                mg->mg_len = -1;
2267            }
2268            PL_reg_magic    = mg;
2269            PL_reg_oldpos   = mg->mg_len;
2270            SAVEDESTRUCTOR_X(restore_pos, prog);
2271        }
2272        if (!PL_reg_curpm) {
2273            Newxz(PL_reg_curpm, 1, PMOP);
2274#ifdef USE_ITHREADS
2275            {
2276                SV* const repointer = newSViv(0);
2277                /* this regexp is also owned by the new PL_reg_curpm, which
2278                   will try to free it.  */
2279                av_push(PL_regex_padav,repointer);
2280                PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2281                PL_regex_pad = AvARRAY(PL_regex_padav);
2282            }
2283#endif      
2284        }
2285#ifdef USE_ITHREADS
2286        /* It seems that non-ithreads works both with and without this code.
2287           So for efficiency reasons it seems best not to have the code
2288           compiled when it is not needed.  */
2289        /* This is safe against NULLs: */
2290        ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2291        /* PM_reg_curpm owns a reference to this regexp.  */
2292        ReREFCNT_inc(prog);
2293#endif
2294        PM_SETRE(PL_reg_curpm, prog);
2295        PL_reg_oldcurpm = PL_curpm;
2296        PL_curpm = PL_reg_curpm;
2297        if (RXp_MATCH_COPIED(prog)) {
2298            /*  Here is a serious problem: we cannot rewrite subbeg,
2299                since it may be needed if this match fails.  Thus
2300                $` inside (?{}) could fail... */
2301            PL_reg_oldsaved = prog->subbeg;
2302            PL_reg_oldsavedlen = prog->sublen;
2303#ifdef PERL_OLD_COPY_ON_WRITE
2304            PL_nrs = prog->saved_copy;
2305#endif
2306            RXp_MATCH_COPIED_off(prog);
2307        }
2308        else
2309            PL_reg_oldsaved = NULL;
2310        prog->subbeg = PL_bostr;
2311        prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2312    }
2313    DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2314    prog->offs[0].start = *startpos - PL_bostr;
2315    PL_reginput = *startpos;
2316    PL_reglastparen = &prog->lastparen;
2317    PL_reglastcloseparen = &prog->lastcloseparen;
2318    prog->lastparen = 0;
2319    prog->lastcloseparen = 0;
2320    PL_regsize = 0;
2321    PL_regoffs = prog->offs;
2322    if (PL_reg_start_tmpl <= prog->nparens) {
2323        PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2324        if(PL_reg_start_tmp)
2325            Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2326        else
2327            Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2328    }
2329
2330    /* XXXX What this code is doing here?!!!  There should be no need
2331       to do this again and again, PL_reglastparen should take care of
2332       this!  --ilya*/
2333
2334    /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2335     * Actually, the code in regcppop() (which Ilya may be meaning by
2336     * PL_reglastparen), is not needed at all by the test suite
2337     * (op/regexp, op/pat, op/split), but that code is needed otherwise
2338     * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2339     * Meanwhile, this code *is* needed for the
2340     * above-mentioned test suite tests to succeed.  The common theme
2341     * on those tests seems to be returning null fields from matches.
2342     * --jhi updated by dapm */
2343#if 1
2344    if (prog->nparens) {
2345        regexp_paren_pair *pp = PL_regoffs;
2346        register I32 i;
2347        for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2348            ++pp;
2349            pp->start = -1;
2350            pp->end = -1;
2351        }
2352    }
2353#endif
2354    REGCP_SET(lastcp);
2355    if (regmatch(reginfo, progi->program + 1)) {
2356        PL_regoffs[0].end = PL_reginput - PL_bostr;
2357        return 1;
2358    }
2359    if (reginfo->cutpoint)
2360        *startpos= reginfo->cutpoint;
2361    REGCP_UNWIND(lastcp);
2362    return 0;
2363}
2364
2365
2366#define sayYES goto yes
2367#define sayNO goto no
2368#define sayNO_SILENT goto no_silent
2369
2370/* we dont use STMT_START/END here because it leads to 
2371   "unreachable code" warnings, which are bogus, but distracting. */
2372#define CACHEsayNO \
2373    if (ST.cache_mask) \
2374       PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2375    sayNO
2376
2377/* this is used to determine how far from the left messages like
2378   'failed...' are printed. It should be set such that messages 
2379   are inline with the regop output that created them.
2380*/
2381#define REPORT_CODE_OFF 32
2382
2383
2384/* Make sure there is a test for this +1 options in re_tests */
2385#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2386
2387#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2388#define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2389
2390#define SLAB_FIRST(s) (&(s)->states[0])
2391#define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2392
2393/* grab a new slab and return the first slot in it */
2394
2395STATIC regmatch_state *
2396S_push_slab(pTHX)
2397{
2398#if PERL_VERSION < 9 && !defined(PERL_CORE)
2399    dMY_CXT;
2400#endif
2401    regmatch_slab *s = PL_regmatch_slab->next;
2402    if (!s) {
2403        Newx(s, 1, regmatch_slab);
2404        s->prev = PL_regmatch_slab;
2405        s->next = NULL;
2406        PL_regmatch_slab->next = s;
2407    }
2408    PL_regmatch_slab = s;
2409    return SLAB_FIRST(s);
2410}
2411
2412
2413/* push a new state then goto it */
2414
2415#define PUSH_STATE_GOTO(state, node) \
2416    scan = node; \
2417    st->resume_state = state; \
2418    goto push_state;
2419
2420/* push a new state with success backtracking, then goto it */
2421
2422#define PUSH_YES_STATE_GOTO(state, node) \
2423    scan = node; \
2424    st->resume_state = state; \
2425    goto push_yes_state;
2426
2427
2428
2429/*
2430
2431regmatch() - main matching routine
2432
2433This is basically one big switch statement in a loop. We execute an op,
2434set 'next' to point the next op, and continue. If we come to a point which
2435we may need to backtrack to on failure such as (A|B|C), we push a
2436backtrack state onto the backtrack stack. On failure, we pop the top
2437state, and re-enter the loop at the state indicated. If there are no more
2438states to pop, we return failure.
2439
2440Sometimes we also need to backtrack on success; for example /A+/, where
2441after successfully matching one A, we need to go back and try to
2442match another one; similarly for lookahead assertions: if the assertion
2443completes successfully, we backtrack to the state just before the assertion
2444and then carry on.  In these cases, the pushed state is marked as
2445'backtrack on success too'. This marking is in fact done by a chain of
2446pointers, each pointing to the previous 'yes' state. On success, we pop to
2447the nearest yes state, discarding any intermediate failure-only states.
2448Sometimes a yes state is pushed just to force some cleanup code to be
2449called at the end of a successful match or submatch; e.g. (??{$re}) uses
2450it to free the inner regex.
2451
2452Note that failure backtracking rewinds the cursor position, while
2453success backtracking leaves it alone.
2454
2455A pattern is complete when the END op is executed, while a subpattern
2456such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2457ops trigger the "pop to last yes state if any, otherwise return true"
2458behaviour.
2459
2460A common convention in this function is to use A and B to refer to the two
2461subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2462the subpattern to be matched possibly multiple times, while B is the entire
2463rest of the pattern. Variable and state names reflect this convention.
2464
2465The states in the main switch are the union of ops and failure/success of
2466substates associated with with that op.  For example, IFMATCH is the op
2467that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2468'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2469successfully matched A and IFMATCH_A_fail is a state saying that we have
2470just failed to match A. Resume states always come in pairs. The backtrack
2471state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2472at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2473on success or failure.
2474
2475The struct that holds a backtracking state is actually a big union, with
2476one variant for each major type of op. The variable st points to the
2477top-most backtrack struct. To make the code clearer, within each
2478block of code we #define ST to alias the relevant union.
2479
2480Here's a concrete example of a (vastly oversimplified) IFMATCH
2481implementation:
2482
2483    switch (state) {
2484    ....
2485
2486#define ST st->u.ifmatch
2487
2488    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2489        ST.foo = ...; // some state we wish to save
2490        ...
2491        // push a yes backtrack state with a resume value of
2492        // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2493        // first node of A:
2494        PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2495        // NOTREACHED
2496
2497    case IFMATCH_A: // we have successfully executed A; now continue with B
2498        next = B;
2499        bar = ST.foo; // do something with the preserved value
2500        break;
2501
2502    case IFMATCH_A_fail: // A failed, so the assertion failed
2503        ...;   // do some housekeeping, then ...
2504        sayNO; // propagate the failure
2505
2506#undef ST
2507
2508    ...
2509    }
2510
2511For any old-timers reading this who are familiar with the old recursive
2512approach, the code above is equivalent to:
2513
2514    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2515    {
2516        int foo = ...
2517        ...
2518        if (regmatch(A)) {
2519            next = B;
2520            bar = foo;
2521            break;
2522        }
2523        ...;   // do some housekeeping, then ...
2524        sayNO; // propagate the failure
2525    }
2526
2527The topmost backtrack state, pointed to by st, is usually free. If you
2528want to claim it, populate any ST.foo fields in it with values you wish to
2529save, then do one of
2530
2531        PUSH_STATE_GOTO(resume_state, node);
2532        PUSH_YES_STATE_GOTO(resume_state, node);
2533
2534which sets that backtrack state's resume value to 'resume_state', pushes a
2535new free entry to the top of the backtrack stack, then goes to 'node'.
2536On backtracking, the free slot is popped, and the saved state becomes the
2537new free state. An ST.foo field in this new top state can be temporarily
2538accessed to retrieve values, but once the main loop is re-entered, it
2539becomes available for reuse.
2540
2541Note that the depth of the backtrack stack constantly increases during the
2542left-to-right execution of the pattern, rather than going up and down with
2543the pattern nesting. For example the stack is at its maximum at Z at the
2544end of the pattern, rather than at X in the following:
2545
2546    /(((X)+)+)+....(Y)+....Z/
2547
2548The only exceptions to this are lookahead/behind assertions and the cut,
2549(?>A), which pop all the backtrack states associated with A before
2550continuing.
2551 
2552Bascktrack state structs are allocated in slabs of about 4K in size.
2553PL_regmatch_state and st always point to the currently active state,
2554and PL_regmatch_slab points to the slab currently containing
2555PL_regmatch_state.  The first time regmatch() is called, the first slab is
2556allocated, and is never freed until interpreter destruction. When the slab
2557is full, a new one is allocated and chained to the end. At exit from
2558regmatch(), slabs allocated since entry are freed.
2559
2560*/
2561 
2562
2563#define DEBUG_STATE_pp(pp)                                  \
2564    DEBUG_STATE_r({                                         \
2565        DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2566        PerlIO_printf(Perl_debug_log,                       \
2567            "    %*s"pp" %s%s%s%s%s\n",                     \
2568            depth*2, "",                                    \
2569            PL_reg_name[st->resume_state],                     \
2570            ((st==yes_state||st==mark_state) ? "[" : ""),   \
2571            ((st==yes_state) ? "Y" : ""),                   \
2572            ((st==mark_state) ? "M" : ""),                  \
2573            ((st==yes_state||st==mark_state) ? "]" : "")    \
2574        );                                                  \
2575    });
2576
2577
2578#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2579
2580#ifdef DEBUGGING
2581
2582STATIC void
2583S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8, 
2584    const char *start, const char *end, const char *blurb)
2585{
2586    const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2587
2588    PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2589
2590    if (!PL_colorset)   
2591            reginitcolors();    
2592    {
2593        RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0), 
2594            RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);   
2595        
2596        RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1), 
2597            start, end - start, 60); 
2598        
2599        PerlIO_printf(Perl_debug_log, 
2600            "%s%s REx%s %s against %s\n", 
2601                       PL_colors[4], blurb, PL_colors[5], s0, s1); 
2602        
2603        if (do_utf8||utf8_pat) 
2604            PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2605                utf8_pat ? "pattern" : "",
2606                utf8_pat && do_utf8 ? " and " : "",
2607                do_utf8 ? "string" : ""
2608            ); 
2609    }
2610}
2611
2612STATIC void
2613S_dump_exec_pos(pTHX_ const char *locinput, 
2614                      const regnode *scan, 
2615                      const char *loc_regeol, 
2616                      const char *loc_bostr, 
2617                      const char *loc_reg_starttry,
2618                      const bool do_utf8)
2619{
2620    const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2621    const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2622    int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2623    /* The part of the string before starttry has one color
2624       (pref0_len chars), between starttry and current
2625       position another one (pref_len - pref0_len chars),
2626       after the current position the third one.
2627       We assume that pref0_len <= pref_len, otherwise we
2628       decrease pref0_len.  */
2629    int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2630        ? (5 + taill) - l : locinput - loc_bostr;
2631    int pref0_len;
2632
2633    PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2634
2635    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2636        pref_len++;
2637    pref0_len = pref_len  - (locinput - loc_reg_starttry);
2638    if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2639        l = ( loc_regeol - locinput > (5 + taill) - pref_len
2640              ? (5 + taill) - pref_len : loc_regeol - locinput);
2641    while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2642        l--;
2643    if (pref0_len < 0)
2644        pref0_len = 0;
2645    if (pref0_len > pref_len)
2646        pref0_len = pref_len;
2647    {
2648        const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2649
2650        RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2651            (locinput - pref_len),pref0_len, 60, 4, 5);
2652        
2653        RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2654                    (locinput - pref_len + pref0_len),
2655                    pref_len - pref0_len, 60, 2, 3);
2656        
2657        RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2658                    locinput, loc_regeol - locinput, 10, 0, 1);
2659
2660        const STRLEN tlen=len0+len1+len2;
2661        PerlIO_printf(Perl_debug_log,
2662                    "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2663                    (IV)(locinput - loc_bostr),
2664                    len0, s0,
2665                    len1, s1,
2666                    (docolor ? "" : "> <"),
2667                    len2, s2,
2668                    (int)(tlen > 19 ? 0 :  19 - tlen),
2669                    "");
2670    }
2671}
2672
2673#endif
2674
2675/* reg_check_named_buff_matched()
2676 * Checks to see if a named buffer has matched. The data array of 
2677 * buffer numbers corresponding to the buffer is expected to reside
2678 * in the regexp->data->data array in the slot stored in the ARG() of
2679 * node involved. Note that this routine doesn't actually care about the
2680 * name, that information is not preserved from compilation to execution.
2681 * Returns the index of the leftmost defined buffer with the given name
2682 * or 0 if non of the buffers matched.
2683 */
2684STATIC I32
2685S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2686{
2687    I32 n;
2688    RXi_GET_DECL(rex,rexi);
2689    SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2690    I32 *nums=(I32*)SvPVX(sv_dat);
2691
2692    PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2693
2694    for ( n=0; n<SvIVX(sv_dat); n++ ) {
2695        if ((I32)*PL_reglastparen >= nums[n] &&
2696            PL_regoffs[nums[n]].end != -1)
2697        {
2698            return nums[n];
2699        }
2700    }
2701    return 0;
2702}
2703
2704
2705/* free all slabs above current one  - called during LEAVE_SCOPE */
2706
2707STATIC void
2708S_clear_backtrack_stack(pTHX_ void *p)
2709{
2710    regmatch_slab *s = PL_regmatch_slab->next;
2711    PERL_UNUSED_ARG(p);
2712
2713    if (!s)
2714        return;
2715    PL_regmatch_slab->next = NULL;
2716    while (s) {
2717        regmatch_slab * const osl = s;
2718        s = s->next;
2719        Safefree(osl);
2720    }
2721}
2722
2723
2724#define SETREX(Re1,Re2) \
2725    if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2726    Re1 = (Re2)
2727
2728STATIC I32                      /* 0 failure, 1 success */
2729S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2730{
2731#if PERL_VERSION < 9 && !defined(PERL_CORE)
2732    dMY_CXT;
2733#endif
2734    dVAR;
2735    register const bool do_utf8 = PL_reg_match_utf8;
2736    const U32 uniflags = UTF8_ALLOW_DEFAULT;
2737    regexp *rex = reginfo->prog;
2738    RXi_GET_DECL(rex,rexi);
2739    I32 oldsave;
2740    /* the current state. This is a cached copy of PL_regmatch_state */
2741    register regmatch_state *st;
2742    /* cache heavy used fields of st in registers */
2743    register regnode *scan;
2744    register regnode *next;
2745    register U32 n = 0; /* general value; init to avoid compiler warning */
2746    register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2747    register char *locinput = PL_reginput;
2748    register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2749
2750    bool result = 0;        /* return value of S_regmatch */
2751    int depth = 0;          /* depth of backtrack stack */
2752    U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2753    const U32 max_nochange_depth =
2754        (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2755        3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2756    regmatch_state *yes_state = NULL; /* state to pop to on success of
2757                                                            subpattern */
2758    /* mark_state piggy backs on the yes_state logic so that when we unwind 
2759       the stack on success we can update the mark_state as we go */
2760    regmatch_state *mark_state = NULL; /* last mark state we have seen */
2761    regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2762    struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2763    U32 state_num;
2764    bool no_final = 0;      /* prevent failure from backtracking? */
2765    bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2766    char *startpoint = PL_reginput;
2767    SV *popmark = NULL;     /* are we looking for a mark? */
2768    SV *sv_commit = NULL;   /* last mark name seen in failure */
2769    SV *sv_yes_mark = NULL; /* last mark name we have seen 
2770                               during a successfull match */
2771    U32 lastopen = 0;       /* last open we saw */
2772    bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;   
2773    SV* const oreplsv = GvSV(PL_replgv);
2774    /* these three flags are set by various ops to signal information to
2775     * the very next op. They have a useful lifetime of exactly one loop
2776     * iteration, and are not preserved or restored by state pushes/pops
2777     */
2778    bool sw = 0;            /* the condition value in (?(cond)a|b) */
2779    bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2780    int logical = 0;        /* the following EVAL is:
2781                                0: (?{...})
2782                                1: (?(?{...})X|Y)
2783                                2: (??{...})
2784                               or the following IFMATCH/UNLESSM is:
2785                                false: plain (?=foo)
2786                                true:  used as a condition: (?(?=foo))
2787                            */
2788#ifdef DEBUGGING
2789    GET_RE_DEBUG_FLAGS_DECL;
2790#endif
2791
2792    PERL_ARGS_ASSERT_REGMATCH;
2793
2794    DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2795            PerlIO_printf(Perl_debug_log,"regmatch start\n");
2796    }));
2797    /* on first ever call to regmatch, allocate first slab */
2798    if (!PL_regmatch_slab) {
2799        Newx(PL_regmatch_slab, 1, regmatch_slab);
2800        PL_regmatch_slab->prev = NULL;
2801        PL_regmatch_slab->next = NULL;
2802        PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2803    }
2804
2805    oldsave = PL_savestack_ix;
2806    SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2807    SAVEVPTR(PL_regmatch_slab);
2808    SAVEVPTR(PL_regmatch_state);
2809
2810    /* grab next free state slot */
2811    st = ++PL_regmatch_state;
2812    if (st >  SLAB_LAST(PL_regmatch_slab))
2813        st = PL_regmatch_state = S_push_slab(aTHX);
2814
2815    /* Note that nextchr is a byte even in UTF */
2816    nextchr = UCHARAT(locinput);
2817    scan = prog;
2818    while (scan != NULL) {
2819
2820        DEBUG_EXECUTE_r( {
2821            SV * const prop = sv_newmortal();
2822            regnode *rnext=regnext(scan);
2823            DUMP_EXEC_POS( locinput, scan, do_utf8 );
2824            regprop(rex, prop, scan);
2825            
2826            PerlIO_printf(Perl_debug_log,
2827                    "%3"IVdf":%*s%s(%"IVdf")\n",
2828                    (IV)(scan - rexi->program), depth*2, "",
2829                    SvPVX_const(prop),
2830                    (PL_regkind[OP(scan)] == END || !rnext) ? 
2831                        0 : (IV)(rnext - rexi->program));
2832        });
2833
2834        next = scan + NEXT_OFF(scan);
2835        if (next == scan)
2836            next = NULL;
2837        state_num = OP(scan);
2838
2839      reenter_switch:
2840
2841        assert(PL_reglastparen == &rex->lastparen);
2842        assert(PL_reglastcloseparen == &rex->lastcloseparen);
2843        assert(PL_regoffs == rex->offs);
2844
2845        switch (state_num) {
2846        case BOL:
2847            if (locinput == PL_bostr)
2848            {
2849                /* reginfo->till = reginfo->bol; */
2850                break;
2851            }
2852            sayNO;
2853        case MBOL:
2854            if (locinput == PL_bostr ||
2855                ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2856            {
2857                break;
2858            }
2859            sayNO;
2860        case SBOL:
2861            if (locinput == PL_bostr)
2862                break;
2863            sayNO;
2864        case GPOS:
2865            if (locinput == reginfo->ganch)
2866                break;
2867            sayNO;
2868
2869        case KEEPS:
2870            /* update the startpoint */
2871            st->u.keeper.val = PL_regoffs[0].start;
2872            PL_reginput = locinput;
2873            PL_regoffs[0].start = locinput - PL_bostr;
2874            PUSH_STATE_GOTO(KEEPS_next, next);
2875            /*NOT-REACHED*/
2876        case KEEPS_next_fail:
2877            /* rollback the start point change */
2878            PL_regoffs[0].start = st->u.keeper.val;
2879            sayNO_SILENT;
2880            /*NOT-REACHED*/
2881        case EOL:
2882                goto seol;
2883        case MEOL:
2884            if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2885                sayNO;
2886            break;
2887        case SEOL:
2888          seol:
2889            if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2890                sayNO;
2891            if (PL_regeol - locinput > 1)
2892                sayNO;
2893            break;
2894        case EOS:
2895            if (PL_regeol != locinput)
2896                sayNO;
2897            break;
2898        case SANY:
2899            if (!nextchr && locinput >= PL_regeol)
2900                sayNO;
2901            if (do_utf8) {
2902                locinput += PL_utf8skip[nextchr];
2903                if (locinput > PL_regeol)
2904                    sayNO;
2905                nextchr = UCHARAT(locinput);
2906            }
2907            else
2908                nextchr = UCHARAT(++locinput);
2909            break;
2910        case CANY:
2911            if (!nextchr && locinput >= PL_regeol)
2912                sayNO;
2913            nextchr = UCHARAT(++locinput);
2914            break;
2915        case REG_ANY:
2916            if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2917                sayNO;
2918            if (do_utf8) {
2919                locinput += PL_utf8skip[nextchr];
2920                if (locinput > PL_regeol)
2921                    sayNO;
2922                nextchr = UCHARAT(locinput);
2923            }
2924            else
2925                nextchr = UCHARAT(++locinput);
2926            break;
2927
2928#undef  ST
2929#define ST st->u.trie
2930        case TRIEC:
2931            /* In this case the charclass data is available inline so
2932               we can fail fast without a lot of extra overhead. 
2933             */
2934            if (scan->flags == EXACT || !do_utf8) {
2935                if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2936                    DEBUG_EXECUTE_r(
2937                        PerlIO_printf(Perl_debug_log,
2938                                  "%*s  %sfailed to match trie start class...%s\n",
2939                                  REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2940                    );
2941                    sayNO_SILENT;
2942                    /* NOTREACHED */
2943                }                       
2944            }
2945            /* FALL THROUGH */
2946        case TRIE:
2947            {
2948                /* what type of TRIE am I? (utf8 makes this contextual) */
2949                DECL_TRIE_TYPE(scan);
2950
2951                /* what trie are we using right now */
2952                reg_trie_data * const trie
2953                    = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2954                HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2955                U32 state = trie->startstate;
2956
2957                if (trie->bitmap && trie_type != trie_utf8_fold &&
2958                    !TRIE_BITMAP_TEST(trie,*locinput)
2959                ) {
2960                    if (trie->states[ state ].wordnum) {
2961                         DEBUG_EXECUTE_r(
2962                            PerlIO_printf(Perl_debug_log,
2963                                          "%*s  %smatched empty string...%s\n",
2964                                          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2965                        );
2966                        break;
2967                    } else {
2968                        DEBUG_EXECUTE_r(
2969                            PerlIO_printf(Perl_debug_log,
2970                                          "%*s  %sfailed to match trie start class...%s\n",
2971                                          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2972                        );
2973                        sayNO_SILENT;
2974                   }
2975                }
2976
2977            { 
2978                U8 *uc = ( U8* )locinput;
2979
2980                STRLEN len = 0;
2981                STRLEN foldlen = 0;
2982                U8 *uscan = (U8*)NULL;
2983                STRLEN bufflen=0;
2984                SV *sv_accept_buff = NULL;
2985                U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2986
2987                ST.accepted = 0; /* how many accepting states we have seen */
2988                ST.B = next;
2989                ST.jump = trie->jump;
2990                ST.me = scan;
2991                /*
2992                   traverse the TRIE keeping track of all accepting states
2993                   we transition through until we get to a failing node.
2994                */
2995
2996                while ( state && uc <= (U8*)PL_regeol ) {
2997                    U32 base = trie->states[ state ].trans.base;
2998                    UV uvc = 0;
2999                    U16 charid;
3000                    /* We use charid to hold the wordnum as we don't use it
3001                       for charid until after we have done the wordnum logic. 
3002                       We define an alias just so that the wordnum logic reads
3003                       more naturally. */
3004
3005#define got_wordnum charid
3006                    got_wordnum = trie->states[ state ].wordnum;
3007
3008                    if ( got_wordnum ) {
3009                        if ( ! ST.accepted ) {
3010                            ENTER;
3011                            SAVETMPS; /* XXX is this necessary? dmq */
3012                            bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3013                            sv_accept_buff=newSV(bufflen *
3014                                            sizeof(reg_trie_accepted) - 1);
3015                            SvCUR_set(sv_accept_buff, 0);
3016                            SvPOK_on(sv_accept_buff);
3017                            sv_2mortal(sv_accept_buff);
3018                            SAVETMPS;
3019                            ST.accept_buff =
3020                                (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3021                        }
3022                        do {
3023                            if (ST.accepted >= bufflen) {
3024                                bufflen *= 2;
3025                                ST.accept_buff =(reg_trie_accepted*)
3026                                    SvGROW(sv_accept_buff,
3027                                        bufflen * sizeof(reg_trie_accepted));
3028                            }
3029                            SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3030                                + sizeof(reg_trie_accepted));
3031
3032
3033                            ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3034                            ST.accept_buff[ST.accepted].endpos = uc;
3035                            ++ST.accepted;
3036                        } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3037                    }
3038#undef got_wordnum 
3039
3040                    DEBUG_TRIE_EXECUTE_r({
3041                                DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3042                                PerlIO_printf( Perl_debug_log,
3043                                    "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3044                                    2+depth * 2, "", PL_colors[4],
3045                                    (UV)state, (UV)ST.accepted );
3046                    });
3047
3048                    if ( base ) {
3049                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3050                                             uscan, len, uvc, charid, foldlen,
3051                                             foldbuf, uniflags);
3052
3053                        if (charid &&
3054                             (base + charid > trie->uniquecharcount )
3055                             && (base + charid - 1 - trie->uniquecharcount
3056                                    < trie->lasttrans)
3057                             && trie->trans[base + charid - 1 -
3058                                    trie->uniquecharcount].check == state)
3059                        {
3060                            state = trie->trans[base + charid - 1 -
3061                                trie->uniquecharcount ].next;
3062                        }
3063                        else {
3064                            state = 0;
3065                        }
3066                        uc += len;
3067
3068                    }
3069                    else {
3070                        state = 0;
3071                    }
3072                    DEBUG_TRIE_EXECUTE_r(
3073                        PerlIO_printf( Perl_debug_log,
3074                            "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3075                            charid, uvc, (UV)state, PL_colors[5] );
3076                    );
3077                }
3078                if (!ST.accepted )
3079                   sayNO;
3080
3081                DEBUG_EXECUTE_r(
3082                    PerlIO_printf( Perl_debug_log,
3083                        "%*s  %sgot %"IVdf" possible matches%s\n",
3084                        REPORT_CODE_OFF + depth * 2, "",
3085                        PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3086                );
3087            }}
3088            goto trie_first_try; /* jump into the fail handler */
3089            /* NOTREACHED */
3090        case TRIE_next_fail: /* we failed - try next alterative */
3091            if ( ST.jump) {
3092                REGCP_UNWIND(ST.cp);
3093                for (n = *PL_reglastparen; n > ST.lastparen; n--)
3094                    PL_regoffs[n].end = -1;
3095                *PL_reglastparen = n;
3096            }
3097          trie_first_try:
3098            if (do_cutgroup) {
3099                do_cutgroup = 0;
3100                no_final = 0;
3101            }
3102
3103            if ( ST.jump) {
3104                ST.lastparen = *PL_reglastparen;
3105                REGCP_SET(ST.cp);
3106            }           
3107            if ( ST.accepted == 1 ) {
3108                /* only one choice left - just continue */
3109                DEBUG_EXECUTE_r({
3110                    AV *const trie_words
3111                        = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3112                    SV ** const tmp = av_fetch( trie_words, 
3113                        ST.accept_buff[ 0 ].wordnum-1, 0 );
3114                    SV *sv= tmp ? sv_newmortal() : NULL;
3115                    
3116                    PerlIO_printf( Perl_debug_log,
3117                        "%*s  %sonly one match left: #%d <%s>%s\n",
3118                        REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3119                        ST.accept_buff[ 0 ].wordnum,
3120                        tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0, 
3121                                PL_colors[0], PL_colors[1],
3122                                (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3123                            ) 
3124                        : "not compiled under -Dr",
3125                        PL_colors[5] );
3126                });
3127                PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3128                /* in this case we free tmps/leave before we call regmatch
3129                   as we wont be using accept_buff again. */
3130                
3131                locinput = PL_reginput;
3132                nextchr = UCHARAT(locinput);
3133                if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum]) 
3134                    scan = ST.B;
3135                else
3136                    scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3137                if (!has_cutgroup) {
3138                    FREETMPS;
3139                    LEAVE;
3140                } else {
3141                    ST.accepted--;
3142                    PUSH_YES_STATE_GOTO(TRIE_next, scan);
3143                }
3144                
3145                continue; /* execute rest of RE */
3146            }
3147            
3148            if ( !ST.accepted-- ) {
3149                DEBUG_EXECUTE_r({
3150                    PerlIO_printf( Perl_debug_log,
3151                        "%*s  %sTRIE failed...%s\n",
3152                        REPORT_CODE_OFF+depth*2, "", 
3153                        PL_colors[4],
3154                        PL_colors[5] );
3155                });
3156                FREETMPS;
3157                LEAVE;
3158                sayNO_SILENT;
3159                /*NOTREACHED*/
3160            } 
3161
3162            /*
3163               There are at least two accepting states left.  Presumably
3164               the number of accepting states is going to be low,
3165               typically two. So we simply scan through to find the one
3166               with lowest wordnum.  Once we find it, we swap the last
3167               state into its place and decrement the size. We then try to
3168               match the rest of the pattern at the point where the word
3169               ends. If we succeed, control just continues along the
3170               regex; if we fail we return here to try the next accepting
3171               state
3172             */
3173
3174            {
3175                U32 best = 0;
3176                U32 cur;
3177                for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3178                    DEBUG_TRIE_EXECUTE_r(
3179                        PerlIO_printf( Perl_debug_log,
3180                            "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3181                            REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3182                            (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3183                            ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3184                    );
3185
3186                    if (ST.accept_buff[cur].wordnum <
3187                            ST.accept_buff[best].wordnum)
3188                        best = cur;
3189                }
3190
3191                DEBUG_EXECUTE_r({
3192                    AV *const trie_words
3193                        = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3194                    SV ** const tmp = av_fetch( trie_words, 
3195                        ST.accept_buff[ best ].wordnum - 1, 0 );
3196                    regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ? 
3197                                    ST.B : 
3198                                    ST.me + ST.jump[ST.accept_buff[best].wordnum];    
3199                    SV *sv= tmp ? sv_newmortal() : NULL;
3200                    
3201                    PerlIO_printf( Perl_debug_log, 
3202                        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3203                        REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3204                        ST.accept_buff[best].wordnum,
3205                        tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0, 
3206                                PL_colors[0], PL_colors[1],
3207                                (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3208                            ) : "not compiled under -Dr", 
3209                            REG_NODE_NUM(nextop),
3210                        PL_colors[5] );
3211                });
3212
3213                if ( best<ST.accepted ) {
3214                    reg_trie_accepted tmp = ST.accept_buff[ best ];
3215                    ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3216                    ST.accept_buff[ ST.accepted ] = tmp;
3217                    best = ST.accepted;
3218                }
3219                PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3220                if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3221                    scan = ST.B;
3222                } else {
3223                    scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3224                }
3225                PUSH_YES_STATE_GOTO(TRIE_next, scan);    
3226                /* NOTREACHED */
3227            }
3228            /* NOTREACHED */
3229        case TRIE_next:
3230            /* we dont want to throw this away, see bug 57042*/
3231            if (oreplsv != GvSV(PL_replgv))
3232                sv_setsv(oreplsv, GvSV(PL_replgv));
3233            FREETMPS;
3234            LEAVE;
3235            sayYES;
3236#undef  ST
3237
3238        case EXACT: {
3239            char *s = STRING(scan);
3240            ln = STR_LEN(scan);
3241            if (do_utf8 != UTF) {
3242                /* The target and the pattern have differing utf8ness. */
3243                char *l = locinput;
3244                const char * const e = s + ln;
3245
3246                if (do_utf8) {
3247                    /* The target is utf8, the pattern is not utf8. */
3248                    while (s < e) {
3249                        STRLEN ulen;
3250                        if (l >= PL_regeol)
3251                             sayNO;
3252                        if (NATIVE_TO_UNI(*(U8*)s) !=
3253                            utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3254                                            uniflags))
3255                             sayNO;
3256                        l += ulen;
3257                        s ++;
3258                    }
3259                }
3260                else {
3261                    /* The target is not utf8, the pattern is utf8. */
3262                    while (s < e) {
3263                        STRLEN ulen;
3264                        if (l >= PL_regeol)
3265                            sayNO;
3266                        if (NATIVE_TO_UNI(*((U8*)l)) !=
3267                            utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3268                                           uniflags))
3269                            sayNO;
3270                        s += ulen;
3271                        l ++;
3272                    }
3273                }
3274                locinput = l;
3275                nextchr = UCHARAT(locinput);
3276                break;
3277            }
3278            /* The target and the pattern have the same utf8ness. */
3279            /* Inline the first character, for speed. */
3280            if (UCHARAT(s) != nextchr)
3281                sayNO;
3282            if (PL_regeol - locinput < ln)
3283                sayNO;
3284            if (ln > 1 && memNE(s, locinput, ln))
3285                sayNO;
3286            locinput += ln;
3287            nextchr = UCHARAT(locinput);
3288            break;
3289            }
3290        case EXACTFL:
3291            PL_reg_flags |= RF_tainted;
3292            /* FALL THROUGH */
3293        case EXACTF: {
3294            char * const s = STRING(scan);
3295            ln = STR_LEN(scan);
3296
3297            if (do_utf8 || UTF) {
3298              /* Either target or the pattern are utf8. */
3299                const char * const l = locinput;
3300                char *e = PL_regeol;
3301
3302                if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3303                               l, &e, 0,  do_utf8)) {
3304                     /* One more case for the sharp s:
3305                      * pack("U0U*", 0xDF) =~ /ss/i,
3306                      * the 0xC3 0x9F are the UTF-8
3307                      * byte sequence for the U+00DF. */
3308
3309                     if (!(do_utf8 &&
3310                           toLOWER(s[0]) == 's' &&
3311                           ln >= 2 &&
3312                           toLOWER(s[1]) == 's' &&
3313                           (U8)l[0] == 0xC3 &&
3314                           e - l >= 2 &&
3315                           (U8)l[1] == 0x9F))
3316                          sayNO;
3317                }
3318                locinput = e;
3319                nextchr = UCHARAT(locinput);
3320                break;
3321            }
3322
3323            /* Neither the target and the pattern are utf8. */
3324
3325            /* Inline the first character, for speed. */
3326            if (UCHARAT(s) != nextchr &&
3327                UCHARAT(s) != ((OP(scan) == EXACTF)
3328                               ? PL_fold : PL_fold_locale)[nextchr])
3329                sayNO;
3330            if (PL_regeol - locinput < ln)
3331                sayNO;
3332            if (ln > 1 && (OP(scan) == EXACTF
3333                           ? ibcmp(s, locinput, ln)
3334                           : ibcmp_locale(s, locinput, ln)))
3335                sayNO;
3336            locinput += ln;
3337            nextchr = UCHARAT(locinput);
3338            break;
3339            }
3340        case ANYOF:
3341            if (do_utf8) {
3342                STRLEN inclasslen = PL_regeol - locinput;
3343
3344                if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3345                    goto anyof_fail;
3346                if (locinput >= PL_regeol)
3347                    sayNO;
3348                locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3349                nextchr = UCHARAT(locinput);
3350                break;
3351            }
3352            else {
3353                if (nextchr < 0)
3354                    nextchr = UCHARAT(locinput);
3355                if (!REGINCLASS(rex, scan, (U8*)locinput))
3356                    goto anyof_fail;
3357                if (!nextchr && locinput >= PL_regeol)
3358                    sayNO;
3359                nextchr = UCHARAT(++locinput);
3360                break;
3361            }
3362        anyof_fail:
3363            /* If we might have the case of the German sharp s
3364             * in a casefolding Unicode character class. */
3365
3366            if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3367                 locinput += SHARP_S_SKIP;
3368                 nextchr = UCHARAT(locinput);
3369            }
3370            else
3371                 sayNO;
3372            break;
3373        case ALNUML:
3374            PL_reg_flags |= RF_tainted;
3375            /* FALL THROUGH */
3376        case ALNUM:
3377            if (!nextchr)
3378                sayNO;
3379            if (do_utf8) {
3380                LOAD_UTF8_CHARCLASS_ALNUM();
3381                if (!(OP(scan) == ALNUM
3382                      ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3383                      : isALNUM_LC_utf8((U8*)locinput)))
3384                {
3385                    sayNO;
3386                }
3387                locinput += PL_utf8skip[nextchr];
3388                nextchr = UCHARAT(locinput);
3389                break;
3390            }
3391            if (!(OP(scan) == ALNUM
3392                  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3393                sayNO;
3394            nextchr = UCHARAT(++locinput);
3395            break;
3396        case NALNUML:
3397            PL_reg_flags |= RF_tainted;
3398            /* FALL THROUGH */
3399        case NALNUM:
3400            if (!nextchr && locinput >= PL_regeol)
3401                sayNO;
3402            if (do_utf8) {
3403                LOAD_UTF8_CHARCLASS_ALNUM();
3404                if (OP(scan) == NALNUM
3405                    ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3406                    : isALNUM_LC_utf8((U8*)locinput))
3407                {
3408                    sayNO;
3409                }
3410                locinput += PL_utf8skip[nextchr];
3411                nextchr = UCHARAT(locinput);
3412                break;
3413            }
3414            if (OP(scan) == NALNUM
3415                ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3416                sayNO;
3417            nextchr = UCHARAT(++locinput);
3418            break;
3419        case BOUNDL:
3420        case NBOUNDL:
3421            PL_reg_flags |= RF_tainted;
3422            /* FALL THROUGH */
3423        case BOUND:
3424        case NBOUND:
3425            /* was last char in word? */
3426            if (do_utf8) {
3427                if (locinput == PL_bostr)
3428                    ln = '\n';
3429                else {
3430                    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3431                
3432                    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3433                }
3434                if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3435                    ln = isALNUM_uni(ln);
3436                    LOAD_UTF8_CHARCLASS_ALNUM();
3437                    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3438                }
3439                else {
3440                    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3441                    n = isALNUM_LC_utf8((U8*)locinput);
3442                }
3443            }
3444            else {
3445                ln = (locinput != PL_bostr) ?
3446                    UCHARAT(locinput - 1) : '\n';
3447                if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3448                    ln = isALNUM(ln);
3449                    n = isALNUM(nextchr);
3450                }
3451                else {
3452                    ln = isALNUM_LC(ln);
3453                    n = isALNUM_LC(nextchr);
3454                }
3455            }
3456            if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3457                                    OP(scan) == BOUNDL))
3458                    sayNO;
3459            break;
3460        case SPACEL:
3461            PL_reg_flags |= RF_tainted;
3462            /* FALL THROUGH */
3463        case SPACE:
3464            if (!nextchr)
3465                sayNO;
3466            if (do_utf8) {
3467                if (UTF8_IS_CONTINUED(nextchr)) {
3468                    LOAD_UTF8_CHARCLASS_SPACE();
3469                    if (!(OP(scan) == SPACE
3470                          ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3471                          : isSPACE_LC_utf8((U8*)locinput)))
3472                    {
3473                        sayNO;
3474                    }
3475                    locinput += PL_utf8skip[nextchr];
3476                    nextchr = UCHARAT(locinput);
3477                    break;
3478                }
3479                if (!(OP(scan) == SPACE
3480                      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3481                    sayNO;
3482                nextchr = UCHARAT(++locinput);
3483            }
3484            else {
3485                if (!(OP(scan) == SPACE
3486                      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3487                    sayNO;
3488                nextchr = UCHARAT(++locinput);
3489            }
3490            break;
3491        case NSPACEL:
3492            PL_reg_flags |= RF_tainted;
3493            /* FALL THROUGH */
3494        case NSPACE:
3495            if (!nextchr && locinput >= PL_regeol)
3496                sayNO;
3497            if (do_utf8) {
3498                LOAD_UTF8_CHARCLASS_SPACE();
3499                if (OP(scan) == NSPACE
3500                    ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3501                    : isSPACE_LC_utf8((U8*)locinput))
3502                {
3503                    sayNO;
3504                }
3505                locinput += PL_utf8skip[nextchr];
3506                nextchr = UCHARAT(locinput);
3507                break;
3508            }
3509            if (OP(scan) == NSPACE
3510                ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3511                sayNO;
3512            nextchr = UCHARAT(++locinput);
3513            break;
3514        case DIGITL:
3515            PL_reg_flags |= RF_tainted;
3516            /* FALL THROUGH */
3517        case DIGIT:
3518            if (!nextchr)
3519                sayNO;
3520            if (do_utf8) {
3521                LOAD_UTF8_CHARCLASS_DIGIT();
3522                if (!(OP(scan) == DIGIT
3523                      ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3524                      : isDIGIT_LC_utf8((U8*)locinput)))
3525                {
3526                    sayNO;
3527                }
3528                locinput += PL_utf8skip[nextchr];
3529                nextchr = UCHARAT(locinput);
3530                break;
3531            }
3532            if (!(OP(scan) == DIGIT
3533                  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3534                sayNO;
3535            nextchr = UCHARAT(++locinput);
3536            break;
3537        case NDIGITL:
3538            PL_reg_flags |= RF_tainted;
3539            /* FALL THROUGH */
3540        case NDIGIT:
3541            if (!nextchr && locinput >= PL_regeol)
3542                sayNO;
3543            if (do_utf8) {
3544                LOAD_UTF8_CHARCLASS_DIGIT();
3545                if (OP(scan) == NDIGIT
3546                    ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3547                    : isDIGIT_LC_utf8((U8*)locinput))
3548                {
3549                    sayNO;
3550                }
3551                locinput += PL_utf8skip[nextchr];
3552                nextchr = UCHARAT(locinput);
3553                break;
3554            }
3555            if (OP(scan) == NDIGIT
3556                ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3557                sayNO;
3558            nextchr = UCHARAT(++locinput);
3559            break;
3560        case CLUMP:
3561            if (locinput >= PL_regeol)
3562                sayNO;
3563            if  (do_utf8) {
3564                LOAD_UTF8_CHARCLASS_MARK();
3565                if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3566                    sayNO;
3567                locinput += PL_utf8skip[nextchr];
3568                while (locinput < PL_regeol &&
3569                       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3570                    locinput += UTF8SKIP(locinput);
3571                if (locinput > PL_regeol)
3572                    sayNO;
3573            } 
3574            else
3575               locinput++;
3576            nextchr = UCHARAT(locinput);
3577            break;
3578            
3579        case NREFFL:
3580        {
3581            char *s;
3582            char type;
3583            PL_reg_flags |= RF_tainted;
3584            /* FALL THROUGH */
3585        case NREF:
3586        case NREFF:
3587            type = OP(scan);
3588            n = reg_check_named_buff_matched(rex,scan);
3589
3590            if ( n ) {
3591                type = REF + ( type - NREF );
3592                goto do_ref;
3593            } else {
3594                sayNO;
3595            }
3596            /* unreached */
3597        case REFFL:
3598            PL_reg_flags |= RF_tainted;
3599            /* FALL THROUGH */
3600        case REF:
3601        case REFF: 
3602            n = ARG(scan);  /* which paren pair */
3603            type = OP(scan);
3604          do_ref:  
3605            ln = PL_regoffs[n].start;
3606            PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3607            if (*PL_reglastparen < n || ln == -1)
3608                sayNO;                  /* Do not match unless seen CLOSEn. */
3609            if (ln == PL_regoffs[n].end)
3610                break;
3611
3612            s = PL_bostr + ln;
3613            if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3614                char *l = locinput;
3615                const char *e = PL_bostr + PL_regoffs[n].end;
3616                /*
3617                 * Note that we can't do the "other character" lookup trick as
3618                 * in the 8-bit case (no pun intended) because in Unicode we
3619                 * have to map both upper and title case to lower case.
3620                 */
3621                if (type == REFF) {
3622                    while (s < e) {
3623                        STRLEN ulen1, ulen2;
3624                        U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3625                        U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3626
3627                        if (l >= PL_regeol)
3628                            sayNO;
3629                        toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3630                        toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3631                        if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3632                            sayNO;
3633                        s += ulen1;
3634                        l += ulen2;
3635                    }
3636                }
3637                locinput = l;
3638                nextchr = UCHARAT(locinput);
3639                break;
3640            }
3641
3642            /* Inline the first character, for speed. */
3643            if (UCHARAT(s) != nextchr &&
3644                (type == REF ||
3645                 (UCHARAT(s) != (type == REFF
3646                                  ? PL_fold : PL_fold_locale)[nextchr])))
3647                sayNO;
3648            ln = PL_regoffs[n].end - ln;
3649            if (locinput + ln > PL_regeol)
3650                sayNO;
3651            if (ln > 1 && (type == REF
3652                           ? memNE(s, locinput, ln)
3653                           : (type == REFF
3654                              ? ibcmp(s, locinput, ln)
3655                              : ibcmp_locale(s, locinput, ln))))
3656                sayNO;
3657            locinput += ln;
3658            nextchr = UCHARAT(locinput);
3659            break;
3660        }
3661        case NOTHING:
3662        case TAIL:
3663            break;
3664        case BACK:
3665            break;
3666
3667#undef  ST
3668#define ST st->u.eval
3669        {
3670            SV *ret;
3671            regexp *re;
3672            regexp_internal *rei;
3673            regnode *startpoint;
3674
3675        case GOSTART:
3676        case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3677            if (cur_eval && cur_eval->locinput==locinput) {
3678                if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) 
3679                    Perl_croak(aTHX_ "Infinite recursion in regex");
3680                if ( ++nochange_depth > max_nochange_depth )
3681                    Perl_croak(aTHX_ 
3682                        "Pattern subroutine nesting without pos change"
3683                        " exceeded limit in regex");
3684            } else {
3685                nochange_depth = 0;
3686            }
3687            re = rex;
3688            rei = rexi;
3689            (void)ReREFCNT_inc(rex);
3690            if (OP(scan)==GOSUB) {
3691                startpoint = scan + ARG2L(scan);
3692                ST.close_paren = ARG(scan);
3693            } else {
3694                startpoint = rei->program+1;
3695                ST.close_paren = 0;
3696            }
3697            goto eval_recurse_doit;
3698            /* NOTREACHED */
3699        case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */        
3700            if (cur_eval && cur_eval->locinput==locinput) {
3701                if ( ++nochange_depth > max_nochange_depth )
3702                    Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3703            } else {
3704                nochange_depth = 0;
3705            }    
3706            {
3707                /* execute the code in the {...} */
3708                dSP;
3709                SV ** const before = SP;
3710                OP_4tree * const oop = PL_op;
3711                COP * const ocurcop = PL_curcop;
3712                PAD *old_comppad;
3713            
3714                n = ARG(scan);
3715                PL_op = (OP_4tree*)rexi->data->data[n];
3716                DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, 
3717                    "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3718                PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3719                PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3720
3721                if (sv_yes_mark) {
3722                    SV *sv_mrk = get_sv("REGMARK", 1);
3723                    sv_setsv(sv_mrk, sv_yes_mark);
3724                }
3725
3726                CALLRUNOPS(aTHX);                       /* Scalar context. */
3727                SPAGAIN;
3728                if (SP == before)
3729                    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3730                else {
3731                    ret = POPs;
3732                    PUTBACK;
3733                }
3734
3735                PL_op = oop;
3736                PAD_RESTORE_LOCAL(old_comppad);
3737                PL_curcop = ocurcop;
3738                if (!logical) {
3739                    /* /(?{...})/ */
3740                    sv_setsv(save_scalar(PL_replgv), ret);
3741                    break;
3742                }
3743            }
3744            if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3745                logical = 0;
3746                {
3747                    /* extract RE object from returned value; compiling if
3748                     * necessary */
3749
3750                    MAGIC *mg = NULL;
3751                    const SV *sv;
3752                    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3753                        mg = mg_find(sv, PERL_MAGIC_qr);
3754                    else if (SvSMAGICAL(ret)) {
3755                        if (SvGMAGICAL(ret))
3756                            sv_unmagic(ret, PERL_MAGIC_qr);
3757                        else
3758                            mg = mg_find(ret, PERL_MAGIC_qr);
3759                    }
3760
3761                    if (mg) {
3762                        re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
3763                    }
3764                    else {
3765                        U32 pm_flags = 0;
3766                        const I32 osize = PL_regsize;
3767
3768                        if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
3769                        re = CALLREGCOMP(ret, pm_flags);
3770                        if (!(SvFLAGS(ret)
3771                              & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3772                                | SVs_GMG)))
3773                            sv_magic(ret,MUTABLE_SV(ReREFCNT_inc(re)),
3774                                        PERL_MAGIC_qr,0,0);
3775                        PL_regsize = osize;
3776                    }
3777                }
3778                RXp_MATCH_COPIED_off(re);
3779                re->subbeg = rex->subbeg;
3780                re->sublen = rex->sublen;
3781                rei = RXi_GET(re);
3782                DEBUG_EXECUTE_r(
3783                    debug_start_match(re, do_utf8, locinput, PL_regeol, 
3784                        "Matching embedded");
3785                );              
3786                startpoint = rei->program + 1;
3787                ST.close_paren = 0; /* only used for GOSUB */
3788                /* borrowed from regtry */
3789                if (PL_reg_start_tmpl <= re->nparens) {
3790                    PL_reg_start_tmpl = re->nparens*3/2 + 3;
3791                    if(PL_reg_start_tmp)
3792                        Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3793                    else
3794                        Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3795                }                       
3796
3797        eval_recurse_doit: /* Share code with GOSUB below this line */                          
3798                /* run the pattern returned from (??{...}) */
3799                ST.cp = regcppush(0);   /* Save *all* the positions. */
3800                REGCP_SET(ST.lastcp);
3801                
3802                PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3803                
3804                /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3805                PL_reglastparen = &re->lastparen;
3806                PL_reglastcloseparen = &re->lastcloseparen;
3807                re->lastparen = 0;
3808                re->lastcloseparen = 0;
3809
3810                PL_reginput = locinput;
3811                PL_regsize = 0;
3812
3813                /* XXXX This is too dramatic a measure... */
3814                PL_reg_maxiter = 0;
3815
3816                ST.toggle_reg_flags = PL_reg_flags;
3817                if (RX_UTF8(re))
3818                    PL_reg_flags |= RF_utf8;
3819                else
3820                    PL_reg_flags &= ~RF_utf8;
3821                ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3822
3823                ST.prev_rex = rex;
3824                ST.prev_curlyx = cur_curlyx;
3825                SETREX(rex,re);
3826                rexi = rei;
3827                cur_curlyx = NULL;
3828                ST.B = next;
3829                ST.prev_eval = cur_eval;
3830                cur_eval = st;
3831                /* now continue from first node in postoned RE */
3832                PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3833                /* NOTREACHED */
3834            }
3835            /* logical is 1,   /(?(?{...})X|Y)/ */
3836            sw = (bool)SvTRUE(ret);
3837            logical = 0;
3838            break;
3839        }
3840
3841        case EVAL_AB: /* cleanup after a successful (??{A})B */
3842            /* note: this is called twice; first after popping B, then A */
3843            PL_reg_flags ^= ST.toggle_reg_flags; 
3844            ReREFCNT_dec(rex);
3845            SETREX(rex,ST.prev_rex);
3846            rexi = RXi_GET(rex);
3847            regcpblow(ST.cp);
3848            cur_eval = ST.prev_eval;
3849            cur_curlyx = ST.prev_curlyx;
3850
3851            /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3852            PL_reglastparen = &rex->lastparen;
3853            PL_reglastcloseparen = &rex->lastcloseparen;
3854            /* also update PL_regoffs */
3855            PL_regoffs = rex->offs;
3856            
3857            /* XXXX This is too dramatic a measure... */
3858            PL_reg_maxiter = 0;
3859            if ( nochange_depth )
3860                nochange_depth--;
3861            sayYES;
3862
3863
3864        case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3865            /* note: this is called twice; first after popping B, then A */
3866            PL_reg_flags ^= ST.toggle_reg_flags; 
3867            ReREFCNT_dec(rex);
3868            SETREX(rex,ST.prev_rex);
3869            rexi = RXi_GET(rex); 
3870            /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3871            PL_reglastparen = &rex->lastparen;
3872            PL_reglastcloseparen = &rex->lastcloseparen;
3873
3874            PL_reginput = locinput;
3875            REGCP_UNWIND(ST.lastcp);
3876            regcppop(rex);
3877            cur_eval = ST.prev_eval;
3878            cur_curlyx = ST.prev_curlyx;
3879            /* XXXX This is too dramatic a measure... */
3880            PL_reg_maxiter = 0;
3881            if ( nochange_depth )
3882                nochange_depth--;
3883            sayNO_SILENT;
3884#undef ST
3885
3886        case OPEN:
3887            n = ARG(scan);  /* which paren pair */
3888            PL_reg_start_tmp[n] = locinput;
3889            if (n > PL_regsize)
3890                PL_regsize = n;
3891            lastopen = n;
3892            break;
3893        case CLOSE:
3894            n = ARG(scan);  /* which paren pair */
3895            PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3896            PL_regoffs[n].end = locinput - PL_bostr;
3897            /*if (n > PL_regsize)
3898                PL_regsize = n;*/
3899            if (n > *PL_reglastparen)
3900                *PL_reglastparen = n;
3901            *PL_reglastcloseparen = n;
3902            if (cur_eval && cur_eval->u.eval.close_paren == n) {
3903                goto fake_end;
3904            }    
3905            break;
3906        case ACCEPT:
3907            if (ARG(scan)){
3908                regnode *cursor;
3909                for (cursor=scan;
3910                     cursor && OP(cursor)!=END; 
3911                     cursor=regnext(cursor)) 
3912                {
3913                    if ( OP(cursor)==CLOSE ){
3914                        n = ARG(cursor);
3915                        if ( n <= lastopen ) {
3916                            PL_regoffs[n].start
3917                                = PL_reg_start_tmp[n] - PL_bostr;
3918                            PL_regoffs[n].end = locinput - PL_bostr;
3919                            /*if (n > PL_regsize)
3920                            PL_regsize = n;*/
3921                            if (n > *PL_reglastparen)
3922                                *PL_reglastparen = n;
3923                            *PL_reglastcloseparen = n;
3924                            if ( n == ARG(scan) || (cur_eval &&
3925                                cur_eval->u.eval.close_paren == n))
3926                                break;
3927                        }
3928                    }
3929                }
3930            }
3931            goto fake_end;
3932            /*NOTREACHED*/          
3933        case GROUPP:
3934            n = ARG(scan);  /* which paren pair */
3935            sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3936            break;
3937        case NGROUPP:
3938            /* reg_check_named_buff_matched returns 0 for no match */
3939            sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3940            break;
3941        case INSUBP:
3942            n = ARG(scan);
3943            sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3944            break;
3945        case DEFINEP:
3946            sw = 0;
3947            break;
3948        case IFTHEN:
3949            PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3950            if (sw)
3951                next = NEXTOPER(NEXTOPER(scan));
3952            else {
3953                next = scan + ARG(scan);
3954                if (OP(next) == IFTHEN) /* Fake one. */
3955                    next = NEXTOPER(NEXTOPER(next));
3956            }
3957            break;
3958        case LOGICAL:
3959            logical = scan->flags;
3960            break;
3961
3962/*******************************************************************
3963
3964The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3965pattern, where A and B are subpatterns. (For simple A, CURLYM or
3966STAR/PLUS/CURLY/CURLYN are used instead.)
3967
3968A*B is compiled as <CURLYX><A><WHILEM><B>
3969
3970On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3971state, which contains the current count, initialised to -1. It also sets
3972cur_curlyx to point to this state, with any previous value saved in the
3973state block.
3974
3975CURLYX then jumps straight to the WHILEM op, rather than executing A,
3976since the pattern may possibly match zero times (i.e. it's a while {} loop
3977rather than a do {} while loop).
3978
3979Each entry to WHILEM represents a successful match of A. The count in the
3980CURLYX block is incremented, another WHILEM state is pushed, and execution
3981passes to A or B depending on greediness and the current count.
3982
3983For example, if matching against the string a1a2a3b (where the aN are
3984substrings that match /A/), then the match progresses as follows: (the
3985pushed states are interspersed with the bits of strings matched so far):
3986
3987    <CURLYX cnt=-1>
3988    <CURLYX cnt=0><WHILEM>
3989    <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3990    <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3991    <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3992    <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3993
3994(Contrast this with something like CURLYM, which maintains only a single
3995backtrack state:
3996
3997    <CURLYM cnt=0> a1
3998    a1 <CURLYM cnt=1> a2
3999    a1 a2 <CURLYM cnt=2> a3
4000    a1 a2 a3 <CURLYM cnt=3> b
4001)
4002
4003Each WHILEM state block marks a point to backtrack to upon partial failure
4004of A or B, and also contains some minor state data related to that
4005iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4006overall state, such as the count, and pointers to the A and B ops.
4007
4008This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4009must always point to the *current* CURLYX block, the rules are:
4010
4011When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4012and set cur_curlyx to point the new block.
4013
4014When popping the CURLYX block after a successful or unsuccessful match,
4015restore the previous cur_curlyx.
4016
4017When WHILEM is about to execute B, save the current cur_curlyx, and set it
4018to the outer one saved in the CURLYX block.
4019
4020When popping the WHILEM block after a successful or unsuccessful B match,
4021restore the previous cur_curlyx.
4022
4023Here's an example for the pattern (AI* BI)*BO
4024I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4025
4026cur_
4027curlyx backtrack stack
4028------ ---------------
4029NULL   
4030CO     <CO prev=NULL> <WO>
4031CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai 
4032CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi 
4033NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4034
4035At this point the pattern succeeds, and we work back down the stack to
4036clean up, restoring as we go:
4037
4038CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi 
4039CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai 
4040CO     <CO prev=NULL> <WO>
4041NULL   
4042
4043*******************************************************************/
4044
4045#define ST st->u.curlyx
4046
4047        case CURLYX:    /* start of /A*B/  (for complex A) */
4048        {
4049            /* No need to save/restore up to this paren */
4050            I32 parenfloor = scan->flags;
4051            
4052            assert(next); /* keep Coverity happy */
4053            if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4054                next += ARG(next);
4055
4056            /* XXXX Probably it is better to teach regpush to support
4057               parenfloor > PL_regsize... */
4058            if (parenfloor > (I32)*PL_reglastparen)
4059                parenfloor = *PL_reglastparen; /* Pessimization... */
4060
4061            ST.prev_curlyx= cur_curlyx;
4062            cur_curlyx = st;
4063            ST.cp = PL_savestack_ix;
4064
4065            /* these fields contain the state of the current curly.
4066             * they are accessed by subsequent WHILEMs */
4067            ST.parenfloor = parenfloor;
4068            ST.min = ARG1(scan);
4069            ST.max = ARG2(scan);
4070            ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4071            ST.B = next;
4072            ST.minmod = minmod;
4073            minmod = 0;
4074            ST.count = -1;      /* this will be updated by WHILEM */
4075            ST.lastloc = NULL;  /* this will be updated by WHILEM */
4076
4077            PL_reginput = locinput;
4078            PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4079            /* NOTREACHED */
4080        }
4081
4082        case CURLYX_end: /* just finished matching all of A*B */
4083            cur_curlyx = ST.prev_curlyx;
4084            sayYES;
4085            /* NOTREACHED */
4086
4087        case CURLYX_end_fail: /* just failed to match all of A*B */
4088            regcpblow(ST.cp);
4089            cur_curlyx = ST.prev_curlyx;
4090            sayNO;
4091            /* NOTREACHED */
4092
4093
4094#undef ST
4095#define ST st->u.whilem
4096
4097        case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4098        {
4099            /* see the discussion above about CURLYX/WHILEM */
4100            I32 n;
4101            assert(cur_curlyx); /* keep Coverity happy */
4102            n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4103            ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4104            ST.cache_offset = 0;
4105            ST.cache_mask = 0;
4106            
4107            PL_reginput = locinput;
4108
4109            DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4110                  "%*s  whilem: matched %ld out of %ld..%ld\n",
4111                  REPORT_CODE_OFF+depth*2, "", (long)n,
4112                  (long)cur_curlyx->u.curlyx.min,
4113                  (long)cur_curlyx->u.curlyx.max)
4114            );
4115
4116            /* First just match a string of min A's. */
4117
4118            if (n < cur_curlyx->u.curlyx.min) {
4119                cur_curlyx->u.curlyx.lastloc = locinput;
4120                PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4121                /* NOTREACHED */
4122            }
4123
4124            /* If degenerate A matches "", assume A done. */
4125
4126            if (locinput == cur_curlyx->u.curlyx.lastloc) {
4127                DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4128                   "%*s  whilem: empty match detected, trying continuation...\n",
4129                   REPORT_CODE_OFF+depth*2, "")
4130                );
4131                goto do_whilem_B_max;
4132            }
4133
4134            /* super-linear cache processing */
4135
4136            if (scan->flags) {
4137
4138                if (!PL_reg_maxiter) {
4139                    /* start the countdown: Postpone detection until we
4140                     * know the match is not *that* much linear. */
4141                    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4142                    /* possible overflow for long strings and many CURLYX's */
4143                    if (PL_reg_maxiter < 0)
4144                        PL_reg_maxiter = I32_MAX;
4145                    PL_reg_leftiter = PL_reg_maxiter;
4146                }
4147
4148                if (PL_reg_leftiter-- == 0) {
4149                    /* initialise cache */
4150                    const I32 size = (PL_reg_maxiter + 7)/8;
4151                    if (PL_reg_poscache) {
4152                        if ((I32)PL_reg_poscache_size < size) {
4153                            Renew(PL_reg_poscache, size, char);
4154                            PL_reg_poscache_size = size;
4155                        }
4156                        Zero(PL_reg_poscache, size, char);
4157                    }
4158                    else {
4159                        PL_reg_poscache_size = size;
4160                        Newxz(PL_reg_poscache, size, char);
4161                    }
4162                    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4163      "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4164                              PL_colors[4], PL_colors[5])
4165                    );
4166                }
4167
4168                if (PL_reg_leftiter < 0) {
4169                    /* have we already failed at this position? */
4170                    I32 offset, mask;
4171                    offset  = (scan->flags & 0xf) - 1
4172                                + (locinput - PL_bostr)  * (scan->flags>>4);
4173                    mask    = 1 << (offset % 8);
4174                    offset /= 8;
4175                    if (PL_reg_poscache[offset] & mask) {
4176                        DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4177                            "%*s  whilem: (cache) already tried at this position...\n",
4178                            REPORT_CODE_OFF+depth*2, "")
4179                        );
4180                        sayNO; /* cache records failure */
4181                    }
4182                    ST.cache_offset = offset;
4183                    ST.cache_mask   = mask;
4184                }
4185            }
4186
4187            /* Prefer B over A for minimal matching. */
4188
4189            if (cur_curlyx->u.curlyx.minmod) {
4190                ST.save_curlyx = cur_curlyx;
4191                cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4192                ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4193                REGCP_SET(ST.lastcp);
4194                PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4195                /* NOTREACHED */
4196            }
4197
4198            /* Prefer A over B for maximal matching. */
4199
4200            if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4201                ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4202                cur_curlyx->u.curlyx.lastloc = locinput;
4203                REGCP_SET(ST.lastcp);
4204                PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4205                /* NOTREACHED */
4206            }
4207            goto do_whilem_B_max;
4208        }
4209        /* NOTREACHED */
4210
4211        case WHILEM_B_min: /* just matched B in a minimal match */
4212        case WHILEM_B_max: /* just matched B in a maximal match */
4213            cur_curlyx = ST.save_curlyx;
4214            sayYES;
4215            /* NOTREACHED */
4216
4217        case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4218            cur_curlyx = ST.save_curlyx;
4219            cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4220            cur_curlyx->u.curlyx.count--;
4221            CACHEsayNO;
4222            /* NOTREACHED */
4223
4224        case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4225            REGCP_UNWIND(ST.lastcp);
4226            regcppop(rex);
4227            /* FALL THROUGH */
4228        case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4229            cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4230            cur_curlyx->u.curlyx.count--;
4231            CACHEsayNO;
4232            /* NOTREACHED */
4233
4234        case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4235            REGCP_UNWIND(ST.lastcp);
4236            regcppop(rex);      /* Restore some previous $<digit>s? */
4237            PL_reginput = locinput;
4238            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4239                "%*s  whilem: failed, trying continuation...\n",
4240                REPORT_CODE_OFF+depth*2, "")
4241            );
4242          do_whilem_B_max:
4243            if (cur_curlyx->u.curlyx.count >= REG_INFTY
4244                && ckWARN(WARN_REGEXP)
4245                && !(PL_reg_flags & RF_warned))
4246            {
4247                PL_reg_flags |= RF_warned;
4248                Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4249                     "Complex regular subexpression recursion",
4250                     REG_INFTY - 1);
4251            }
4252
4253            /* now try B */
4254            ST.save_curlyx = cur_curlyx;
4255            cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4256            PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4257            /* NOTREACHED */
4258
4259        case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4260            cur_curlyx = ST.save_curlyx;
4261            REGCP_UNWIND(ST.lastcp);
4262            regcppop(rex);
4263
4264            if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4265                /* Maximum greed exceeded */
4266                if (cur_curlyx->u.curlyx.count >= REG_INFTY
4267                    && ckWARN(WARN_REGEXP)
4268                    && !(PL_reg_flags & RF_warned))
4269                {
4270                    PL_reg_flags |= RF_warned;
4271                    Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4272                        "%s limit (%d) exceeded",
4273                        "Complex regular subexpression recursion",
4274                        REG_INFTY - 1);
4275                }
4276                cur_curlyx->u.curlyx.count--;
4277                CACHEsayNO;
4278            }
4279
4280            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4281                "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4282            );
4283            /* Try grabbing another A and see if it helps. */
4284            PL_reginput = locinput;
4285            cur_curlyx->u.curlyx.lastloc = locinput;
4286            ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4287            REGCP_SET(ST.lastcp);
4288            PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4289            /* NOTREACHED */
4290
4291#undef  ST
4292#define ST st->u.branch
4293
4294        case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4295            next = scan + ARG(scan);
4296            if (next == scan)
4297                next = NULL;
4298            scan = NEXTOPER(scan);
4299            /* FALL THROUGH */
4300
4301        case BRANCH:        /*  /(...|A|...)/ */
4302            scan = NEXTOPER(scan); /* scan now points to inner node */
4303            ST.lastparen = *PL_reglastparen;
4304            ST.next_branch = next;
4305            REGCP_SET(ST.cp);
4306            PL_reginput = locinput;
4307
4308            /* Now go into the branch */
4309            if (has_cutgroup) {
4310                PUSH_YES_STATE_GOTO(BRANCH_next, scan);    
4311            } else {
4312                PUSH_STATE_GOTO(BRANCH_next, scan);
4313            }
4314            /* NOTREACHED */
4315        case CUTGROUP:
4316            PL_reginput = locinput;
4317            sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4318                MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4319            PUSH_STATE_GOTO(CUTGROUP_next,next);
4320            /* NOTREACHED */
4321        case CUTGROUP_next_fail:
4322            do_cutgroup = 1;
4323            no_final = 1;
4324            if (st->u.mark.mark_name)
4325                sv_commit = st->u.mark.mark_name;
4326            sayNO;          
4327            /* NOTREACHED */
4328        case BRANCH_next:
4329            sayYES;
4330            /* NOTREACHED */
4331        case BRANCH_next_fail: /* that branch failed; try the next, if any */
4332            if (do_cutgroup) {
4333                do_cutgroup = 0;
4334                no_final = 0;
4335            }
4336            REGCP_UNWIND(ST.cp);
4337            for (n = *PL_reglastparen; n > ST.lastparen; n--)
4338                PL_regoffs[n].end = -1;
4339            *PL_reglastparen = n;
4340            /*dmq: *PL_reglastcloseparen = n; */
4341            scan = ST.next_branch;
4342            /* no more branches? */
4343            if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4344                DEBUG_EXECUTE_r({
4345                    PerlIO_printf( Perl_debug_log,
4346                        "%*s  %sBRANCH failed...%s\n",
4347                        REPORT_CODE_OFF+depth*2, "", 
4348                        PL_colors[4],
4349                        PL_colors[5] );
4350                });
4351                sayNO_SILENT;
4352            }
4353            continue; /* execute next BRANCH[J] op */
4354            /* NOTREACHED */
4355    
4356        case MINMOD:
4357            minmod = 1;
4358            break;
4359
4360#undef  ST
4361#define ST st->u.curlym
4362
4363        case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4364
4365            /* This is an optimisation of CURLYX that enables us to push
4366             * only a single backtracking state, no matter how many matches
4367             * there are in {m,n}. It relies on the pattern being constant
4368             * length, with no parens to influence future backrefs
4369             */
4370
4371            ST.me = scan;
4372            scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4373
4374            /* if paren positive, emulate an OPEN/CLOSE around A */
4375            if (ST.me->flags) {
4376                U32 paren = ST.me->flags;
4377                if (paren > PL_regsize)
4378                    PL_regsize = paren;
4379                if (paren > *PL_reglastparen)
4380                    *PL_reglastparen = paren;
4381                scan += NEXT_OFF(scan); /* Skip former OPEN. */
4382            }
4383            ST.A = scan;
4384            ST.B = next;
4385            ST.alen = 0;
4386            ST.count = 0;
4387            ST.minmod = minmod;
4388            minmod = 0;
4389            ST.c1 = CHRTEST_UNINIT;
4390            REGCP_SET(ST.cp);
4391
4392            if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4393                goto curlym_do_B;
4394
4395          curlym_do_A: /* execute the A in /A{m,n}B/  */
4396            PL_reginput = locinput;
4397            PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4398            /* NOTREACHED */
4399
4400        case CURLYM_A: /* we've just matched an A */
4401            locinput = st->locinput;
4402            nextchr = UCHARAT(locinput);
4403
4404            ST.count++;
4405            /* after first match, determine A's length: u.curlym.alen */
4406            if (ST.count == 1) {
4407                if (PL_reg_match_utf8) {
4408                    char *s = locinput;
4409                    while (s < PL_reginput) {
4410                        ST.alen++;
4411                        s += UTF8SKIP(s);
4412                    }
4413                }
4414                else {
4415                    ST.alen = PL_reginput - locinput;
4416                }
4417                if (ST.alen == 0)
4418                    ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4419            }
4420            DEBUG_EXECUTE_r(
4421                PerlIO_printf(Perl_debug_log,
4422                          "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4423                          (int)(REPORT_CODE_OFF+(depth*2)), "",
4424                          (IV) ST.count, (IV)ST.alen)
4425            );
4426
4427            locinput = PL_reginput;
4428                        
4429            if (cur_eval && cur_eval->u.eval.close_paren && 
4430                cur_eval->u.eval.close_paren == (U32)ST.me->flags) 
4431                goto fake_end;
4432                
4433            {
4434                I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4435                if ( max == REG_INFTY || ST.count < max )
4436                    goto curlym_do_A; /* try to match another A */
4437            }
4438            goto curlym_do_B; /* try to match B */
4439
4440        case CURLYM_A_fail: /* just failed to match an A */
4441            REGCP_UNWIND(ST.cp);
4442
4443            if (ST.minmod || ST.count < ARG1(ST.me) /* min*/ 
4444                || (cur_eval && cur_eval->u.eval.close_paren &&
4445                    cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4446                sayNO;
4447
4448          curlym_do_B: /* execute the B in /A{m,n}B/  */
4449            PL_reginput = locinput;
4450            if (ST.c1 == CHRTEST_UNINIT) {
4451                /* calculate c1 and c2 for possible match of 1st char
4452                 * following curly */
4453                ST.c1 = ST.c2 = CHRTEST_VOID;
4454                if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4455                    regnode *text_node = ST.B;
4456                    if (! HAS_TEXT(text_node))
4457                        FIND_NEXT_IMPT(text_node);
4458                    /* this used to be 
4459                        
4460                        (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4461                        
4462                        But the former is redundant in light of the latter.
4463                        
4464                        if this changes back then the macro for 
4465                        IS_TEXT and friends need to change.
4466                     */
4467                    if (PL_regkind[OP(text_node)] == EXACT)
4468                    {
4469                        
4470                        ST.c1 = (U8)*STRING(text_node);
4471                        ST.c2 =
4472                            (IS_TEXTF(text_node))
4473                            ? PL_fold[ST.c1]
4474                            : (IS_TEXTFL(text_node))
4475                                ? PL_fold_locale[ST.c1]
4476                                : ST.c1;
4477                    }
4478                }
4479            }
4480
4481            DEBUG_EXECUTE_r(
4482                PerlIO_printf(Perl_debug_log,
4483                    "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4484                    (int)(REPORT_CODE_OFF+(depth*2)),
4485                    "", (IV)ST.count)
4486                );
4487            if (ST.c1 != CHRTEST_VOID
4488                    && UCHARAT(PL_reginput) != ST.c1
4489                    && UCHARAT(PL_reginput) != ST.c2)
4490            {
4491                /* simulate B failing */
4492                DEBUG_OPTIMISE_r(
4493                    PerlIO_printf(Perl_debug_log,
4494                        "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4495                        (int)(REPORT_CODE_OFF+(depth*2)),"",
4496                        (IV)ST.c1,(IV)ST.c2
4497                ));
4498                state_num = CURLYM_B_fail;
4499                goto reenter_switch;
4500            }
4501
4502            if (ST.me->flags) {
4503                /* mark current A as captured */
4504                I32 paren = ST.me->flags;
4505                if (ST.count) {
4506                    PL_regoffs[paren].start
4507                        = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4508                    PL_regoffs[paren].end = PL_reginput - PL_bostr;
4509                    /*dmq: *PL_reglastcloseparen = paren; */
4510                }
4511                else
4512                    PL_regoffs[paren].end = -1;
4513                if (cur_eval && cur_eval->u.eval.close_paren &&
4514                    cur_eval->u.eval.close_paren == (U32)ST.me->flags) 
4515                {
4516                    if (ST.count) 
4517                        goto fake_end;
4518                    else
4519                        sayNO;
4520                }
4521            }
4522            
4523            PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4524            /* NOTREACHED */
4525
4526        case CURLYM_B_fail: /* just failed to match a B */
4527            REGCP_UNWIND(ST.cp);
4528            if (ST.minmod) {
4529                I32 max = ARG2(ST.me);
4530                if (max != REG_INFTY && ST.count == max)
4531                    sayNO;
4532                goto curlym_do_A; /* try to match a further A */
4533            }
4534            /* backtrack one A */
4535            if (ST.count == ARG1(ST.me) /* min */)
4536                sayNO;
4537            ST.count--;
4538            locinput = HOPc(locinput, -ST.alen);
4539            goto curlym_do_B; /* try to match B */
4540
4541#undef ST
4542#define ST st->u.curly
4543
4544#define CURLY_SETPAREN(paren, success) \
4545    if (paren) { \
4546        if (success) { \
4547            PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4548            PL_regoffs[paren].end = locinput - PL_bostr; \
4549            *PL_reglastcloseparen = paren; \
4550        } \
4551        else \
4552            PL_regoffs[paren].end = -1; \
4553    }
4554
4555        case STAR:              /*  /A*B/ where A is width 1 */
4556            ST.paren = 0;
4557            ST.min = 0;
4558            ST.max = REG_INFTY;
4559            scan = NEXTOPER(scan);
4560            goto repeat;
4561        case PLUS:              /*  /A+B/ where A is width 1 */
4562            ST.paren = 0;
4563            ST.min = 1;
4564            ST.max = REG_INFTY;
4565            scan = NEXTOPER(scan);
4566            goto repeat;
4567        case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4568            ST.paren = scan->flags;     /* Which paren to set */
4569            if (ST.paren > PL_regsize)
4570                PL_regsize = ST.paren;
4571            if (ST.paren > *PL_reglastparen)
4572                *PL_reglastparen = ST.paren;
4573            ST.min = ARG1(scan);  /* min to match */
4574            ST.max = ARG2(scan);  /* max to match */
4575            if (cur_eval && cur_eval->u.eval.close_paren &&
4576                cur_eval->u.eval.close_paren == (U32)ST.paren) {
4577                ST.min=1;
4578                ST.max=1;
4579            }
4580            scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4581            goto repeat;
4582        case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4583            ST.paren = 0;
4584            ST.min = ARG1(scan);  /* min to match */
4585            ST.max = ARG2(scan);  /* max to match */
4586            scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4587          repeat:
4588            /*
4589            * Lookahead to avoid useless match attempts
4590            * when we know what character comes next.
4591            *
4592            * Used to only do .*x and .*?x, but now it allows
4593            * for )'s, ('s and (?{ ... })'s to be in the way
4594            * of the quantifier and the EXACT-like node.  -- japhy
4595            */
4596
4597            if (ST.min > ST.max) /* XXX make this a compile-time check? */
4598                sayNO;
4599            if (HAS_TEXT(next) || JUMPABLE(next)) {
4600                U8 *s;
4601                regnode *text_node = next;
4602
4603                if (! HAS_TEXT(text_node)) 
4604                    FIND_NEXT_IMPT(text_node);
4605
4606                if (! HAS_TEXT(text_node))
4607                    ST.c1 = ST.c2 = CHRTEST_VOID;
4608                else {
4609                    if ( PL_regkind[OP(text_node)] != EXACT ) {
4610                        ST.c1 = ST.c2 = CHRTEST_VOID;
4611                        goto assume_ok_easy;
4612                    }
4613                    else
4614                        s = (U8*)STRING(text_node);
4615                    
4616                    /*  Currently we only get here when 
4617                        
4618                        PL_rekind[OP(text_node)] == EXACT
4619                    
4620                        if this changes back then the macro for IS_TEXT and 
4621                        friends need to change. */
4622                    if (!UTF) {
4623                        ST.c2 = ST.c1 = *s;
4624                        if (IS_TEXTF(text_node))
4625                            ST.c2 = PL_fold[ST.c1];
4626                        else if (IS_TEXTFL(text_node))
4627                            ST.c2 = PL_fold_locale[ST.c1];
4628                    }
4629                    else { /* UTF */
4630                        if (IS_TEXTF(text_node)) {
4631                             STRLEN ulen1, ulen2;
4632                             U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4633                             U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4634
4635                             to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4636                             to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4637#ifdef EBCDIC
4638                             ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4639                                                    ckWARN(WARN_UTF8) ?
4640                                                    0 : UTF8_ALLOW_ANY);
4641                             ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4642                                                    ckWARN(WARN_UTF8) ?
4643                                                    0 : UTF8_ALLOW_ANY);
4644#else
4645                             ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4646                                                    uniflags);
4647                             ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4648                                                    uniflags);
4649#endif
4650                        }
4651                        else {
4652                            ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4653                                                     uniflags);
4654                        }
4655                    }
4656                }
4657            }
4658            else
4659                ST.c1 = ST.c2 = CHRTEST_VOID;
4660        assume_ok_easy:
4661
4662            ST.A = scan;
4663            ST.B = next;
4664            PL_reginput = locinput;
4665            if (minmod) {
4666                minmod = 0;
4667                if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4668                    sayNO;
4669                ST.count = ST.min;
4670                locinput = PL_reginput;
4671                REGCP_SET(ST.cp);
4672                if (ST.c1 == CHRTEST_VOID)
4673                    goto curly_try_B_min;
4674
4675                ST.oldloc = locinput;
4676
4677                /* set ST.maxpos to the furthest point along the
4678                 * string that could possibly match */
4679                if  (ST.max == REG_INFTY) {
4680                    ST.maxpos = PL_regeol - 1;
4681                    if (do_utf8)
4682                        while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4683                            ST.maxpos--;
4684                }
4685                else if (do_utf8) {
4686                    int m = ST.max - ST.min;
4687                    for (ST.maxpos = locinput;
4688                         m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4689                        ST.maxpos += UTF8SKIP(ST.maxpos);
4690                }
4691                else {
4692                    ST.maxpos = locinput + ST.max - ST.min;
4693                    if (ST.maxpos >= PL_regeol)
4694                        ST.maxpos = PL_regeol - 1;
4695                }
4696                goto curly_try_B_min_known;
4697
4698            }
4699            else {
4700                ST.count = regrepeat(rex, ST.A, ST.max, depth);
4701                locinput = PL_reginput;
4702                if (ST.count < ST.min)
4703                    sayNO;
4704                if ((ST.count > ST.min)
4705                    && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4706                {
4707                    /* A{m,n} must come at the end of the string, there's
4708                     * no point in backing off ... */
4709                    ST.min = ST.count;
4710                    /* ...except that $ and \Z can match before *and* after
4711                       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4712                       We may back off by one in this case. */
4713                    if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4714                        ST.min--;
4715                }
4716                REGCP_SET(ST.cp);
4717                goto curly_try_B_max;
4718            }
4719            /* NOTREACHED */
4720
4721
4722        case CURLY_B_min_known_fail:
4723            /* failed to find B in a non-greedy match where c1,c2 valid */
4724            if (ST.paren && ST.count)
4725                PL_regoffs[ST.paren].end = -1;
4726
4727            PL_reginput = locinput;     /* Could be reset... */
4728            REGCP_UNWIND(ST.cp);
4729            /* Couldn't or didn't -- move forward. */
4730            ST.oldloc = locinput;
4731            if (do_utf8)
4732                locinput += UTF8SKIP(locinput);
4733            else
4734                locinput++;
4735            ST.count++;
4736          curly_try_B_min_known:
4737             /* find the next place where 'B' could work, then call B */
4738            {
4739                int n;
4740                if (do_utf8) {
4741                    n = (ST.oldloc == locinput) ? 0 : 1;
4742                    if (ST.c1 == ST.c2) {
4743                        STRLEN len;
4744                        /* set n to utf8_distance(oldloc, locinput) */
4745                        while (locinput <= ST.maxpos &&
4746                               utf8n_to_uvchr((U8*)locinput,
4747                                              UTF8_MAXBYTES, &len,
4748                                              uniflags) != (UV)ST.c1) {
4749                            locinput += len;
4750                            n++;
4751                        }
4752                    }
4753                    else {
4754                        /* set n to utf8_distance(oldloc, locinput) */
4755                        while (locinput <= ST.maxpos) {
4756                            STRLEN len;
4757                            const UV c = utf8n_to_uvchr((U8*)locinput,
4758                                                  UTF8_MAXBYTES, &len,
4759                                                  uniflags);
4760                            if (c == (UV)ST.c1 || c == (UV)ST.c2)
4761                                break;
4762                            locinput += len;
4763                            n++;
4764                        }
4765                    }
4766                }
4767                else {
4768                    if (ST.c1 == ST.c2) {
4769                        while (locinput <= ST.maxpos &&
4770                               UCHARAT(locinput) != ST.c1)
4771                            locinput++;
4772                    }
4773                    else {
4774                        while (locinput <= ST.maxpos
4775                               && UCHARAT(locinput) != ST.c1
4776                               && UCHARAT(locinput) != ST.c2)
4777                            locinput++;
4778                    }
4779                    n = locinput - ST.oldloc;
4780                }
4781                if (locinput > ST.maxpos)
4782                    sayNO;
4783                /* PL_reginput == oldloc now */
4784                if (n) {
4785                    ST.count += n;
4786                    if (regrepeat(rex, ST.A, n, depth) < n)
4787                        sayNO;
4788                }
4789                PL_reginput = locinput;
4790                CURLY_SETPAREN(ST.paren, ST.count);
4791                if (cur_eval && cur_eval->u.eval.close_paren && 
4792                    cur_eval->u.eval.close_paren == (U32)ST.paren) {
4793                    goto fake_end;
4794                }
4795                PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4796            }
4797            /* NOTREACHED */
4798
4799
4800        case CURLY_B_min_fail:
4801            /* failed to find B in a non-greedy match where c1,c2 invalid */
4802            if (ST.paren && ST.count)
4803                PL_regoffs[ST.paren].end = -1;
4804
4805            REGCP_UNWIND(ST.cp);
4806            /* failed -- move forward one */
4807            PL_reginput = locinput;
4808            if (regrepeat(rex, ST.A, 1, depth)) {
4809                ST.count++;
4810                locinput = PL_reginput;
4811                if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4812                        ST.count > 0)) /* count overflow ? */
4813                {
4814                  curly_try_B_min:
4815                    CURLY_SETPAREN(ST.paren, ST.count);
4816                    if (cur_eval && cur_eval->u.eval.close_paren &&
4817                        cur_eval->u.eval.close_paren == (U32)ST.paren) {
4818                        goto fake_end;
4819                    }
4820                    PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4821                }
4822            }
4823            sayNO;
4824            /* NOTREACHED */
4825
4826
4827        curly_try_B_max:
4828            /* a successful greedy match: now try to match B */
4829            if (cur_eval && cur_eval->u.eval.close_paren &&
4830                cur_eval->u.eval.close_paren == (U32)ST.paren) {
4831                goto fake_end;
4832            }
4833            {
4834                UV c = 0;
4835                if (ST.c1 != CHRTEST_VOID)
4836                    c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4837                                           UTF8_MAXBYTES, 0, uniflags)
4838                                : (UV) UCHARAT(PL_reginput);
4839                /* If it could work, try it. */
4840                if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4841                    CURLY_SETPAREN(ST.paren, ST.count);
4842                    PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4843                    /* NOTREACHED */
4844                }
4845            }
4846            /* FALL THROUGH */
4847        case CURLY_B_max_fail:
4848            /* failed to find B in a greedy match */
4849            if (ST.paren && ST.count)
4850                PL_regoffs[ST.paren].end = -1;
4851
4852            REGCP_UNWIND(ST.cp);
4853            /*  back up. */
4854            if (--ST.count < ST.min)
4855                sayNO;
4856            PL_reginput = locinput = HOPc(locinput, -1);
4857            goto curly_try_B_max;
4858
4859#undef ST
4860
4861        case END:
4862            fake_end:
4863            if (cur_eval) {
4864                /* we've just finished A in /(??{A})B/; now continue with B */
4865                I32 tmpix;
4866                st->u.eval.toggle_reg_flags
4867                            = cur_eval->u.eval.toggle_reg_flags;
4868                PL_reg_flags ^= st->u.eval.toggle_reg_flags; 
4869
4870                st->u.eval.prev_rex = rex;              /* inner */
4871                SETREX(rex,cur_eval->u.eval.prev_rex);
4872                rexi = RXi_GET(rex);
4873                cur_curlyx = cur_eval->u.eval.prev_curlyx;
4874                ReREFCNT_inc(rex);
4875                st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4876
4877                /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4878                PL_reglastparen = &rex->lastparen;
4879                PL_reglastcloseparen = &rex->lastcloseparen;
4880
4881                REGCP_SET(st->u.eval.lastcp);
4882                PL_reginput = locinput;
4883
4884                /* Restore parens of the outer rex without popping the
4885                 * savestack */
4886                tmpix = PL_savestack_ix;
4887                PL_savestack_ix = cur_eval->u.eval.lastcp;
4888                regcppop(rex);
4889                PL_savestack_ix = tmpix;
4890
4891                st->u.eval.prev_eval = cur_eval;
4892                cur_eval = cur_eval->u.eval.prev_eval;
4893                DEBUG_EXECUTE_r(
4894                    PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4895                                      REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4896                if ( nochange_depth )
4897                    nochange_depth--;
4898
4899                PUSH_YES_STATE_GOTO(EVAL_AB,
4900                        st->u.eval.prev_eval->u.eval.B); /* match B */
4901            }
4902
4903            if (locinput < reginfo->till) {
4904                DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4905                                      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4906                                      PL_colors[4],
4907                                      (long)(locinput - PL_reg_starttry),
4908                                      (long)(reginfo->till - PL_reg_starttry),
4909                                      PL_colors[5]));
4910                                              
4911                sayNO_SILENT;           /* Cannot match: too short. */
4912            }
4913            PL_reginput = locinput;     /* put where regtry can find it */
4914            sayYES;                     /* Success! */
4915
4916        case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4917            DEBUG_EXECUTE_r(
4918            PerlIO_printf(Perl_debug_log,
4919                "%*s  %ssubpattern success...%s\n",
4920                REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4921            PL_reginput = locinput;     /* put where regtry can find it */
4922            sayYES;                     /* Success! */
4923
4924#undef  ST
4925#define ST st->u.ifmatch
4926
4927        case SUSPEND:   /* (?>A) */
4928            ST.wanted = 1;
4929            PL_reginput = locinput;
4930            goto do_ifmatch;    
4931
4932        case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4933            ST.wanted = 0;
4934            goto ifmatch_trivial_fail_test;
4935
4936        case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4937            ST.wanted = 1;
4938          ifmatch_trivial_fail_test:
4939            if (scan->flags) {
4940                char * const s = HOPBACKc(locinput, scan->flags);
4941                if (!s) {
4942                    /* trivial fail */
4943                    if (logical) {
4944                        logical = 0;
4945                        sw = 1 - (bool)ST.wanted;
4946                    }
4947                    else if (ST.wanted)
4948                        sayNO;
4949                    next = scan + ARG(scan);
4950                    if (next == scan)
4951                        next = NULL;
4952                    break;
4953                }
4954                PL_reginput = s;
4955            }
4956            else
4957                PL_reginput = locinput;
4958
4959          do_ifmatch:
4960            ST.me = scan;
4961            ST.logical = logical;
4962            logical = 0; /* XXX: reset state of logical once it has been saved into ST */
4963            
4964            /* execute body of (?...A) */
4965            PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4966            /* NOTREACHED */
4967
4968        case IFMATCH_A_fail: /* body of (?...A) failed */
4969            ST.wanted = !ST.wanted;
4970            /* FALL THROUGH */
4971
4972        case IFMATCH_A: /* body of (?...A) succeeded */
4973            if (ST.logical) {
4974                sw = (bool)ST.wanted;
4975            }
4976            else if (!ST.wanted)
4977                sayNO;
4978
4979            if (OP(ST.me) == SUSPEND)
4980                locinput = PL_reginput;
4981            else {
4982                locinput = PL_reginput = st->locinput;
4983                nextchr = UCHARAT(locinput);
4984            }
4985            scan = ST.me + ARG(ST.me);
4986            if (scan == ST.me)
4987                scan = NULL;
4988            continue; /* execute B */
4989
4990#undef ST
4991
4992        case LONGJMP:
4993            next = scan + ARG(scan);
4994            if (next == scan)
4995                next = NULL;
4996            break;
4997        case COMMIT:
4998            reginfo->cutpoint = PL_regeol;
4999            /* FALLTHROUGH */
5000        case PRUNE:
5001            PL_reginput = locinput;
5002            if (!scan->flags)
5003                sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5004            PUSH_STATE_GOTO(COMMIT_next,next);
5005            /* NOTREACHED */
5006        case COMMIT_next_fail:
5007            no_final = 1;    
5008            /* FALLTHROUGH */       
5009        case OPFAIL:
5010            sayNO;
5011            /* NOTREACHED */
5012
5013#define ST st->u.mark
5014        case MARKPOINT:
5015            ST.prev_mark = mark_state;
5016            ST.mark_name = sv_commit = sv_yes_mark 
5017                = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5018            mark_state = st;
5019            ST.mark_loc = PL_reginput = locinput;
5020            PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5021            /* NOTREACHED */
5022        case MARKPOINT_next:
5023            mark_state = ST.prev_mark;
5024            sayYES;
5025            /* NOTREACHED */
5026        case MARKPOINT_next_fail:
5027            if (popmark && sv_eq(ST.mark_name,popmark)) 
5028            {
5029                if (ST.mark_loc > startpoint)
5030                    reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5031                popmark = NULL; /* we found our mark */
5032                sv_commit = ST.mark_name;
5033
5034                DEBUG_EXECUTE_r({
5035                        PerlIO_printf(Perl_debug_log,
5036                            "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5037                            REPORT_CODE_OFF+depth*2, "", 
5038                            PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5039                });
5040            }
5041            mark_state = ST.prev_mark;
5042            sv_yes_mark = mark_state ? 
5043                mark_state->u.mark.mark_name : NULL;
5044            sayNO;
5045            /* NOTREACHED */
5046        case SKIP:
5047            PL_reginput = locinput;
5048            if (scan->flags) {
5049                /* (*SKIP) : if we fail we cut here*/
5050                ST.mark_name = NULL;
5051                ST.mark_loc = locinput;
5052                PUSH_STATE_GOTO(SKIP_next,next);    
5053            } else {
5054                /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
5055                   otherwise do nothing.  Meaning we need to scan 
5056                 */
5057                regmatch_state *cur = mark_state;
5058                SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5059                
5060                while (cur) {
5061                    if ( sv_eq( cur->u.mark.mark_name, 
5062                                find ) ) 
5063                    {
5064                        ST.mark_name = find;
5065                        PUSH_STATE_GOTO( SKIP_next, next );
5066                    }
5067                    cur = cur->u.mark.prev_mark;
5068                }
5069            }    
5070            /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5071            break;    
5072        case SKIP_next_fail:
5073            if (ST.mark_name) {
5074                /* (*CUT:NAME) - Set up to search for the name as we 
5075                   collapse the stack*/
5076                popmark = ST.mark_name;    
5077            } else {
5078                /* (*CUT) - No name, we cut here.*/
5079                if (ST.mark_loc > startpoint)
5080                    reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5081                /* but we set sv_commit to latest mark_name if there
5082                   is one so they can test to see how things lead to this
5083                   cut */    
5084                if (mark_state) 
5085                    sv_commit=mark_state->u.mark.mark_name;                 
5086            } 
5087            no_final = 1; 
5088            sayNO;
5089            /* NOTREACHED */
5090#undef ST
5091        case FOLDCHAR:
5092            n = ARG(scan);
5093            if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5094                locinput += ln;
5095            } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5096                sayNO;
5097            } else  {
5098                U8 folded[UTF8_MAXBYTES_CASE+1];
5099                STRLEN foldlen;
5100                const char * const l = locinput;
5101                char *e = PL_regeol;
5102                to_uni_fold(n, folded, &foldlen);
5103
5104                if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5105                               l, &e, 0,  do_utf8)) {
5106                        sayNO;
5107                }
5108                locinput = e;
5109            } 
5110            nextchr = UCHARAT(locinput);  
5111            break;
5112        case LNBREAK:
5113            if ((n=is_LNBREAK(locinput,do_utf8))) {
5114                locinput += n;
5115                nextchr = UCHARAT(locinput);
5116            } else
5117                sayNO;
5118            break;
5119
5120#define CASE_CLASS(nAmE)                              \
5121        case nAmE:                                    \
5122            if ((n=is_##nAmE(locinput,do_utf8))) {    \
5123                locinput += n;                        \
5124                nextchr = UCHARAT(locinput);          \
5125            } else                                    \
5126                sayNO;                                \
5127            break;                                    \
5128        case N##nAmE:                                 \
5129            if ((n=is_##nAmE(locinput,do_utf8))) {    \
5130                sayNO;                                \
5131            } else {                                  \
5132                locinput += UTF8SKIP(locinput);       \
5133                nextchr = UCHARAT(locinput);          \
5134            }                                         \
5135            break
5136
5137        CASE_CLASS(VERTWS);
5138        CASE_CLASS(HORIZWS);
5139#undef CASE_CLASS
5140
5141        default:
5142            PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5143                          PTR2UV(scan), OP(scan));
5144            Perl_croak(aTHX_ "regexp memory corruption");
5145            
5146        } /* end switch */ 
5147
5148        /* switch break jumps here */
5149        scan = next; /* prepare to execute the next op and ... */
5150        continue;    /* ... jump back to the top, reusing st */
5151        /* NOTREACHED */
5152
5153      push_yes_state:
5154        /* push a state that backtracks on success */
5155        st->u.yes.prev_yes_state = yes_state;
5156        yes_state = st;
5157        /* FALL THROUGH */
5158      push_state:
5159        /* push a new regex state, then continue at scan  */
5160        {
5161            regmatch_state *newst;
5162
5163            DEBUG_STACK_r({
5164                regmatch_state *cur = st;
5165                regmatch_state *curyes = yes_state;
5166                int curd = depth;
5167                regmatch_slab *slab = PL_regmatch_slab;
5168                for (;curd > -1;cur--,curd--) {
5169                    if (cur < SLAB_FIRST(slab)) {
5170                        slab = slab->prev;
5171                        cur = SLAB_LAST(slab);
5172                    }
5173                    PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5174                        REPORT_CODE_OFF + 2 + depth * 2,"",
5175                        curd, PL_reg_name[cur->resume_state],
5176                        (curyes == cur) ? "yes" : ""
5177                    );
5178                    if (curyes == cur)
5179                        curyes = cur->u.yes.prev_yes_state;
5180                }
5181            } else 
5182                DEBUG_STATE_pp("push")
5183            );
5184            depth++;
5185            st->locinput = locinput;
5186            newst = st+1; 
5187            if (newst >  SLAB_LAST(PL_regmatch_slab))
5188                newst = S_push_slab(aTHX);
5189            PL_regmatch_state = newst;
5190
5191            locinput = PL_reginput;
5192            nextchr = UCHARAT(locinput);
5193            st = newst;
5194            continue;
5195            /* NOTREACHED */
5196        }
5197    }
5198
5199    /*
5200    * We get here only if there's trouble -- normally "case END" is
5201    * the terminating point.
5202    */
5203    Perl_croak(aTHX_ "corrupted regexp pointers");
5204    /*NOTREACHED*/
5205    sayNO;
5206
5207yes:
5208    if (yes_state) {
5209        /* we have successfully completed a subexpression, but we must now
5210         * pop to the state marked by yes_state and continue from there */
5211        assert(st != yes_state);
5212#ifdef DEBUGGING
5213        while (st != yes_state) {
5214            st--;
5215            if (st < SLAB_FIRST(PL_regmatch_slab)) {
5216                PL_regmatch_slab = PL_regmatch_slab->prev;
5217                st = SLAB_LAST(PL_regmatch_slab);
5218            }
5219            DEBUG_STATE_r({
5220                if (no_final) {
5221                    DEBUG_STATE_pp("pop (no final)");        
5222                } else {
5223                    DEBUG_STATE_pp("pop (yes)");
5224                }
5225            });
5226            depth--;
5227        }
5228#else
5229        while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5230            || yes_state > SLAB_LAST(PL_regmatch_slab))
5231        {
5232            /* not in this slab, pop slab */
5233            depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5234            PL_regmatch_slab = PL_regmatch_slab->prev;
5235            st = SLAB_LAST(PL_regmatch_slab);
5236        }
5237        depth -= (st - yes_state);
5238#endif
5239        st = yes_state;
5240        yes_state = st->u.yes.prev_yes_state;
5241        PL_regmatch_state = st;
5242        
5243        if (no_final) {
5244            locinput= st->locinput;
5245            nextchr = UCHARAT(locinput);
5246        }
5247        state_num = st->resume_state + no_final;
5248        goto reenter_switch;
5249    }
5250
5251    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5252                          PL_colors[4], PL_colors[5]));
5253
5254    if (PL_reg_eval_set) {
5255        /* each successfully executed (?{...}) block does the equivalent of
5256         *   local $^R = do {...}
5257         * When popping the save stack, all these locals would be undone;
5258         * bypass this by setting the outermost saved $^R to the latest
5259         * value */
5260        if (oreplsv != GvSV(PL_replgv))
5261            sv_setsv(oreplsv, GvSV(PL_replgv));
5262    }
5263    result = 1;
5264    goto final_exit;
5265
5266no:
5267    DEBUG_EXECUTE_r(
5268        PerlIO_printf(Perl_debug_log,
5269            "%*s  %sfailed...%s\n",
5270            REPORT_CODE_OFF+depth*2, "", 
5271            PL_colors[4], PL_colors[5])
5272        );
5273
5274no_silent:
5275    if (no_final) {
5276        if (yes_state) {
5277            goto yes;
5278        } else {
5279            goto final_exit;
5280        }
5281    }    
5282    if (depth) {
5283        /* there's a previous state to backtrack to */
5284        st--;
5285        if (st < SLAB_FIRST(PL_regmatch_slab)) {
5286            PL_regmatch_slab = PL_regmatch_slab->prev;
5287            st = SLAB_LAST(PL_regmatch_slab);
5288        }
5289        PL_regmatch_state = st;
5290        locinput= st->locinput;
5291        nextchr = UCHARAT(locinput);
5292
5293        DEBUG_STATE_pp("pop");
5294        depth--;
5295        if (yes_state == st)
5296            yes_state = st->u.yes.prev_yes_state;
5297
5298        state_num = st->resume_state + 1; /* failure = success + 1 */
5299        goto reenter_switch;
5300    }
5301    result = 0;
5302
5303  final_exit:
5304    if (rex->intflags & PREGf_VERBARG_SEEN) {
5305        SV *sv_err = get_sv("REGERROR", 1);
5306        SV *sv_mrk = get_sv("REGMARK", 1);
5307        if (result) {
5308            sv_commit = &PL_sv_no;
5309            if (!sv_yes_mark) 
5310                sv_yes_mark = &PL_sv_yes;
5311        } else {
5312            if (!sv_commit) 
5313                sv_commit = &PL_sv_yes;
5314            sv_yes_mark = &PL_sv_no;
5315        }
5316        sv_setsv(sv_err, sv_commit);
5317        sv_setsv(sv_mrk, sv_yes_mark);
5318    }
5319
5320    /* clean up; in particular, free all slabs above current one */
5321    LEAVE_SCOPE(oldsave);
5322
5323    return result;
5324}
5325
5326/*
5327 - regrepeat - repeatedly match something simple, report how many
5328 */
5329/*
5330 * [This routine now assumes that it will only match on things of length 1.
5331 * That was true before, but now we assume scan - reginput is the count,
5332 * rather than incrementing count on every character.  [Er, except utf8.]]
5333 */
5334STATIC I32
5335S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5336{
5337    dVAR;
5338    register char *scan;
5339    register I32 c;
5340    register char *loceol = PL_regeol;
5341    register I32 hardcount = 0;
5342    register bool do_utf8 = PL_reg_match_utf8;
5343#ifndef DEBUGGING
5344    PERL_UNUSED_ARG(depth);
5345#endif
5346
5347    PERL_ARGS_ASSERT_REGREPEAT;
5348
5349    scan = PL_reginput;
5350    if (max == REG_INFTY)
5351        max = I32_MAX;
5352    else if (max < loceol - scan)
5353        loceol = scan + max;
5354    switch (OP(p)) {
5355    case REG_ANY:
5356        if (do_utf8) {
5357            loceol = PL_regeol;
5358            while (scan < loceol && hardcount < max && *scan != '\n') {
5359                scan += UTF8SKIP(scan);
5360                hardcount++;
5361            }
5362        } else {
5363            while (scan < loceol && *scan != '\n')
5364                scan++;
5365        }
5366        break;
5367    case SANY:
5368        if (do_utf8) {
5369            loceol = PL_regeol;
5370            while (scan < loceol && hardcount < max) {
5371                scan += UTF8SKIP(scan);
5372                hardcount++;
5373            }
5374        }
5375        else
5376            scan = loceol;
5377        break;
5378    case CANY:
5379        scan = loceol;
5380        break;
5381    case EXACT:         /* length of string is 1 */
5382        c = (U8)*STRING(p);
5383        while (scan < loceol && UCHARAT(scan) == c)
5384            scan++;
5385        break;
5386    case EXACTF:        /* length of string is 1 */
5387        c = (U8)*STRING(p);
5388        while (scan < loceol &&
5389               (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5390            scan++;
5391        break;
5392    case EXACTFL:       /* length of string is 1 */
5393        PL_reg_flags |= RF_tainted;
5394        c = (U8)*STRING(p);
5395        while (scan < loceol &&
5396               (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5397            scan++;
5398        break;
5399    case ANYOF:
5400        if (do_utf8) {
5401            loceol = PL_regeol;
5402            while (hardcount < max && scan < loceol &&
5403                   reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5404                scan += UTF8SKIP(scan);
5405                hardcount++;
5406            }
5407        } else {
5408            while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5409                scan++;
5410        }
5411        break;
5412    case ALNUM:
5413        if (do_utf8) {
5414            loceol = PL_regeol;
5415            LOAD_UTF8_CHARCLASS_ALNUM();
5416            while (hardcount < max && scan < loceol &&
5417                   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5418                scan += UTF8SKIP(scan);
5419                hardcount++;
5420            }
5421        } else {
5422            while (scan < loceol && isALNUM(*scan))
5423                scan++;
5424        }
5425        break;
5426    case ALNUML:
5427        PL_reg_flags |= RF_tainted;
5428        if (do_utf8) {
5429            loceol = PL_regeol;
5430            while (hardcount < max && scan < loceol &&
5431                   isALNUM_LC_utf8((U8*)scan)) {
5432                scan += UTF8SKIP(scan);
5433                hardcount++;
5434            }
5435        } else {
5436            while (scan < loceol && isALNUM_LC(*scan))
5437                scan++;
5438        }
5439        break;
5440    case NALNUM:
5441        if (do_utf8) {
5442            loceol = PL_regeol;
5443            LOAD_UTF8_CHARCLASS_ALNUM();
5444            while (hardcount < max && scan < loceol &&
5445                   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5446                scan += UTF8SKIP(scan);
5447                hardcount++;
5448            }
5449        } else {
5450            while (scan < loceol && !isALNUM(*scan))
5451                scan++;
5452        }
5453        break;
5454    case NALNUML:
5455        PL_reg_flags |= RF_tainted;
5456        if (do_utf8) {
5457            loceol = PL_regeol;
5458            while (hardcount < max && scan < loceol &&
5459                   !isALNUM_LC_utf8((U8*)scan)) {
5460                scan += UTF8SKIP(scan);
5461                hardcount++;
5462            }
5463        } else {
5464            while (scan < loceol && !isALNUM_LC(*scan))
5465                scan++;
5466        }
5467        break;
5468    case SPACE:
5469        if (do_utf8) {
5470            loceol = PL_regeol;
5471            LOAD_UTF8_CHARCLASS_SPACE();
5472            while (hardcount < max && scan < loceol &&
5473                   (*scan == ' ' ||
5474                    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5475                scan += UTF8SKIP(scan);
5476                hardcount++;
5477            }
5478        } else {
5479            while (scan < loceol && isSPACE(*scan))
5480                scan++;
5481        }
5482        break;
5483    case SPACEL:
5484        PL_reg_flags |= RF_tainted;
5485        if (do_utf8) {
5486            loceol = PL_regeol;
5487            while (hardcount < max && scan < loceol &&
5488                   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5489                scan += UTF8SKIP(scan);
5490                hardcount++;
5491            }
5492        } else {
5493            while (scan < loceol && isSPACE_LC(*scan))
5494                scan++;
5495        }
5496        break;
5497    case NSPACE:
5498        if (do_utf8) {
5499            loceol = PL_regeol;
5500            LOAD_UTF8_CHARCLASS_SPACE();
5501            while (hardcount < max && scan < loceol &&
5502                   !(*scan == ' ' ||
5503                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5504                scan += UTF8SKIP(scan);
5505                hardcount++;
5506            }
5507        } else {
5508            while (scan < loceol && !isSPACE(*scan))
5509                scan++;
5510        }
5511        break;
5512    case NSPACEL:
5513        PL_reg_flags |= RF_tainted;
5514        if (do_utf8) {
5515            loceol = PL_regeol;
5516            while (hardcount < max && scan < loceol &&
5517                   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5518                scan += UTF8SKIP(scan);
5519                hardcount++;
5520            }
5521        } else {
5522            while (scan < loceol && !isSPACE_LC(*scan))
5523                scan++;
5524        }
5525        break;
5526    case DIGIT:
5527        if (do_utf8) {
5528            loceol = PL_regeol;
5529            LOAD_UTF8_CHARCLASS_DIGIT();
5530            while (hardcount < max && scan < loceol &&
5531                   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5532                scan += UTF8SKIP(scan);
5533                hardcount++;
5534            }
5535        } else {
5536            while (scan < loceol && isDIGIT(*scan))
5537                scan++;
5538        }
5539        break;
5540    case NDIGIT:
5541        if (do_utf8) {
5542            loceol = PL_regeol;
5543            LOAD_UTF8_CHARCLASS_DIGIT();
5544            while (hardcount < max && scan < loceol &&
5545                   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5546                scan += UTF8SKIP(scan);
5547                hardcount++;
5548            }
5549        } else {
5550            while (scan < loceol && !isDIGIT(*scan))
5551                scan++;
5552        }
5553    case LNBREAK:
5554        if (do_utf8) {
5555            loceol = PL_regeol;
5556            while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5557                scan += c;
5558                hardcount++;
5559            }
5560        } else {
5561            /*
5562              LNBREAK can match two latin chars, which is ok,
5563              because we have a null terminated string, but we
5564              have to use hardcount in this situation
5565            */
5566            while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5567                scan+=c;
5568                hardcount++;
5569            }
5570        }       
5571        break;
5572    case HORIZWS:
5573        if (do_utf8) {
5574            loceol = PL_regeol;
5575            while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5576                scan += c;
5577                hardcount++;
5578            }
5579        } else {
5580            while (scan < loceol && is_HORIZWS_latin1(scan)) 
5581                scan++;         
5582        }       
5583        break;
5584    case NHORIZWS:
5585        if (do_utf8) {
5586            loceol = PL_regeol;
5587            while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5588                scan += UTF8SKIP(scan);
5589                hardcount++;
5590            }
5591        } else {
5592            while (scan < loceol && !is_HORIZWS_latin1(scan))
5593                scan++;
5594
5595        }       
5596        break;
5597    case VERTWS:
5598        if (do_utf8) {
5599            loceol = PL_regeol;
5600            while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5601                scan += c;
5602                hardcount++;
5603            }
5604        } else {
5605            while (scan < loceol && is_VERTWS_latin1(scan)) 
5606                scan++;
5607
5608        }       
5609        break;
5610    case NVERTWS:
5611        if (do_utf8) {
5612            loceol = PL_regeol;
5613            while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5614                scan += UTF8SKIP(scan);
5615                hardcount++;
5616            }
5617        } else {
5618            while (scan < loceol && !is_VERTWS_latin1(scan)) 
5619                scan++;
5620          
5621        }       
5622        break;
5623
5624    default:            /* Called on something of 0 width. */
5625        break;          /* So match right here or not at all. */
5626    }
5627
5628    if (hardcount)
5629        c = hardcount;
5630    else
5631        c = scan - PL_reginput;
5632    PL_reginput = scan;
5633
5634    DEBUG_r({
5635        GET_RE_DEBUG_FLAGS_DECL;
5636        DEBUG_EXECUTE_r({
5637            SV * const prop = sv_newmortal();
5638            regprop(prog, prop, p);
5639            PerlIO_printf(Perl_debug_log,
5640                        "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5641                        REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5642        });
5643    });
5644
5645    return(c);
5646}
5647
5648
5649#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5650/*
5651- regclass_swash - prepare the utf8 swash
5652*/
5653
5654SV *
5655Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5656{
5657    dVAR;
5658    SV *sw  = NULL;
5659    SV *si  = NULL;
5660    SV *alt = NULL;
5661    RXi_GET_DECL(prog,progi);
5662    const struct reg_data * const data = prog ? progi->data : NULL;
566