1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36#ifdef PERL_EXT_RE_BUILD
37#include "re_top.h"
38#endif
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73#include "EXTERN.h"
74#define PERL_IN_REGEXEC_C
75#include "perl.h"
76
77#ifdef PERL_IN_XSUB_RE
78# include "re_comp.h"
79#else
80# include "regcomp.h"
81#endif
82
83#define RF_tainted 1
84#define RF_warned 2
85
86#define RF_utf8 8
87
88#define UTF ((PL_reg_flags & RF_utf8) != 0)
89
90#define RS_init 1
91#define RS_set 2
92
93#ifndef STATIC
94#define STATIC static
95#endif
96
97#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
98
99
100
101
102
103#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
105
106#define HOPc(pos,off) \
107 (char *)(PL_reg_match_utf8 \
108 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
109 : (U8*)(pos + off))
110#define HOPBACKc(pos, off) \
111 (char*)(PL_reg_match_utf8\
112 ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
113 : (pos - off >= PL_bostr) \
114 ? (U8*)pos - off \
115 : NULL)
116
117#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
118#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
119
120#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
121 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
122#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
123#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
124#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
125#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
126
127
128
129
130
131#define JUMPABLE(rn) ( \
132 OP(rn) == OPEN || \
133 (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
134 OP(rn) == EVAL || \
135 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
136 OP(rn) == PLUS || OP(rn) == MINMOD || \
137 OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
138 (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
139)
140#define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
141
142#define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
143
144#if 0
145
146
147#define IS_TEXT(rn) ( OP(rn)==EXACT || OP(rn)==REF || OP(rn)==NREF )
148#define IS_TEXTF(rn) ( OP(rn)==EXACTF || OP(rn)==REFF || OP(rn)==NREFF )
149#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
150
151#else
152
153#define IS_TEXT(rn) ( OP(rn)==EXACT )
154#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
155#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
156
157#endif
158
159
160
161
162
163#define FIND_NEXT_IMPT(rn) STMT_START { \
164 while (JUMPABLE(rn)) { \
165 const OPCODE type = OP(rn); \
166 if (type == SUSPEND || PL_regkind[type] == CURLY) \
167 rn = NEXTOPER(NEXTOPER(rn)); \
168 else if (type == PLUS) \
169 rn = NEXTOPER(rn); \
170 else if (type == IFMATCH) \
171 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
172 else rn += NEXT_OFF(rn); \
173 } \
174} STMT_END
175
176
177static void restore_pos(pTHX_ void *arg);
178
179STATIC CHECKPOINT
180S_regcppush(pTHX_ I32 parenfloor)
181{
182 dVAR;
183 const int retval = PL_savestack_ix;
184#define REGCP_PAREN_ELEMS 4
185 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
186 int p;
187 GET_RE_DEBUG_FLAGS_DECL;
188
189 if (paren_elems_to_push < 0)
190 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
191
192#define REGCP_OTHER_ELEMS 7
193 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
194
195 for (p = PL_regsize; p > parenfloor; p--) {
196
197 SSPUSHINT(PL_regoffs[p].end);
198 SSPUSHINT(PL_regoffs[p].start);
199 SSPUSHPTR(PL_reg_start_tmp[p]);
200 SSPUSHINT(p);
201 DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
202 " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
203 (UV)p, (IV)PL_regoffs[p].start,
204 (IV)(PL_reg_start_tmp[p] - PL_bostr),
205 (IV)PL_regoffs[p].end
206 ));
207 }
208
209 SSPUSHPTR(PL_regoffs);
210 SSPUSHINT(PL_regsize);
211 SSPUSHINT(*PL_reglastparen);
212 SSPUSHINT(*PL_reglastcloseparen);
213 SSPUSHPTR(PL_reginput);
214#define REGCP_FRAME_ELEMS 2
215
216
217 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 SSPUSHINT(SAVEt_REGCONTEXT);
219
220 return retval;
221}
222
223
224#define REGCP_SET(cp) \
225 DEBUG_STATE_r( \
226 PerlIO_printf(Perl_debug_log, \
227 " Setting an EVAL scope, savestack=%"IVdf"\n", \
228 (IV)PL_savestack_ix)); \
229 cp = PL_savestack_ix
230
231#define REGCP_UNWIND(cp) \
232 DEBUG_STATE_r( \
233 if (cp != PL_savestack_ix) \
234 PerlIO_printf(Perl_debug_log, \
235 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
236 (IV)(cp), (IV)PL_savestack_ix)); \
237 regcpblow(cp)
238
239STATIC char *
240S_regcppop(pTHX_ const regexp *rex)
241{
242 dVAR;
243 U32 i;
244 char *input;
245 GET_RE_DEBUG_FLAGS_DECL;
246
247 PERL_ARGS_ASSERT_REGCPPOP;
248
249
250 i = SSPOPINT;
251 assert(i == SAVEt_REGCONTEXT);
252 i = SSPOPINT;
253 input = (char *) SSPOPPTR;
254 *PL_reglastcloseparen = SSPOPINT;
255 *PL_reglastparen = SSPOPINT;
256 PL_regsize = SSPOPINT;
257 PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
258
259
260
261 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
262 i > 0; i -= REGCP_PAREN_ELEMS) {
263 I32 tmps;
264 U32 paren = (U32)SSPOPINT;
265 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
266 PL_regoffs[paren].start = SSPOPINT;
267 tmps = SSPOPINT;
268 if (paren <= *PL_reglastparen)
269 PL_regoffs[paren].end = tmps;
270 DEBUG_BUFFERS_r(
271 PerlIO_printf(Perl_debug_log,
272 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
273 (UV)paren, (IV)PL_regoffs[paren].start,
274 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
275 (IV)PL_regoffs[paren].end,
276 (paren > *PL_reglastparen ? "(no)" : ""));
277 );
278 }
279 DEBUG_BUFFERS_r(
280 if (*PL_reglastparen + 1 <= rex->nparens) {
281 PerlIO_printf(Perl_debug_log,
282 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
283 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
284 }
285 );
286#if 1
287
288
289
290
291
292
293
294
295
296 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
297 if (i > PL_regsize)
298 PL_regoffs[i].start = -1;
299 PL_regoffs[i].end = -1;
300 }
301#endif
302 return input;
303}
304
305#define regcpblow(cp) LEAVE_SCOPE(cp)
306
307
308
309
310
311#ifndef PERL_IN_XSUB_RE
312
313
314
315I32
316Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
317 char *strbeg, I32 minend, SV *screamer, U32 nosave)
318
319
320
321
322{
323 PERL_ARGS_ASSERT_PREGEXEC;
324
325 return
326 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
327 nosave ? 0 : REXEC_COPY_STR);
328}
329#endif
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381char *
382Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
383 char *strend, const U32 flags, re_scream_pos_data *data)
384{
385 dVAR;
386 register I32 start_shift = 0;
387
388 register I32 end_shift = 0;
389 register char *s;
390 register SV *check;
391 char *strbeg;
392 char *t;
393 const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0;
394 I32 ml_anch;
395 register char *other_last = NULL;
396 char *check_at = NULL;
397 const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
398 RXi_GET_DECL(prog,progi);
399#ifdef DEBUGGING
400 const char * const i_strpos = strpos;
401#endif
402 GET_RE_DEBUG_FLAGS_DECL;
403
404 PERL_ARGS_ASSERT_RE_INTUIT_START;
405
406 RX_MATCH_UTF8_set(prog,do_utf8);
407
408 if (RX_UTF8(prog)) {
409 PL_reg_flags |= RF_utf8;
410 }
411 DEBUG_EXECUTE_r(
412 debug_start_match(prog, do_utf8, strpos, strend,
413 sv ? "Guessing start of match in sv for"
414 : "Guessing start of match in string for");
415 );
416
417
418 if (prog->minlen > strend - strpos) {
419 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
420 "String too short... [re_intuit_start]\n"));
421 goto fail;
422 }
423
424 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
425 PL_regeol = strend;
426 if (do_utf8) {
427 if (!prog->check_utf8 && prog->check_substr)
428 to_utf8_substr(prog);
429 check = prog->check_utf8;
430 } else {
431 if (!prog->check_substr && prog->check_utf8)
432 to_byte_substr(prog);
433 check = prog->check_substr;
434 }
435 if (check == &PL_sv_undef) {
436 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
437 "Non-utf8 string cannot match utf8 check string\n"));
438 goto fail;
439 }
440 if (prog->extflags & RXf_ANCH) {
441 ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
442 || ( (prog->extflags & RXf_ANCH_BOL)
443 && !multiline ) );
444
445 if (!ml_anch) {
446 if ( !(prog->extflags & RXf_ANCH_GPOS)
447 && !(prog->intflags & PREGf_IMPLICIT)
448
449 && sv && !SvROK(sv)
450 && (strpos != strbeg)) {
451 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
452 goto fail;
453 }
454 if (prog->check_offset_min == prog->check_offset_max &&
455 !(prog->extflags & RXf_CANY_SEEN)) {
456
457 I32 slen;
458
459 s = HOP3c(strpos, prog->check_offset_min, strend);
460
461 if (SvTAIL(check)) {
462 slen = SvCUR(check);
463
464 if ( strend - s > slen || strend - s < slen - 1
465 || (strend - s == slen && strend[-1] != '\n')) {
466 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
467 goto fail_finish;
468 }
469
470 slen--;
471 if (slen && (*SvPVX_const(check) != *s
472 || (slen > 1
473 && memNE(SvPVX_const(check), s, slen)))) {
474 report_neq:
475 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
476 goto fail_finish;
477 }
478 }
479 else if (*SvPVX_const(check) != *s
480 || ((slen = SvCUR(check)) > 1
481 && memNE(SvPVX_const(check), s, slen)))
482 goto report_neq;
483 check_at = s;
484 goto success_at_start;
485 }
486 }
487
488 s = strpos;
489 start_shift = prog->check_offset_min;
490 end_shift = prog->check_end_shift;
491
492 if (!ml_anch) {
493 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
494 - (SvTAIL(check) != 0);
495 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
496
497 if (end_shift < eshift)
498 end_shift = eshift;
499 }
500 }
501 else {
502 ml_anch = 0;
503 s = strpos;
504 start_shift = prog->check_offset_min;
505 end_shift = prog->check_end_shift;
506
507
508 }
509
510#ifdef QDEBUGGING
511 if (end_shift < 0)
512 Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
513 (IV)end_shift, RX_PRECOMP(prog));
514#endif
515
516 restart:
517
518
519
520 {
521 I32 srch_start_shift = start_shift;
522 I32 srch_end_shift = end_shift;
523 if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
524 srch_end_shift -= ((strbeg - s) - srch_start_shift);
525 srch_start_shift = strbeg - s;
526 }
527 DEBUG_OPTIMISE_MORE_r({
528 PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
529 (IV)prog->check_offset_min,
530 (IV)srch_start_shift,
531 (IV)srch_end_shift,
532 (IV)prog->check_end_shift);
533 });
534
535 if (flags & REXEC_SCREAM) {
536 I32 p = -1;
537 I32 * const pp = data ? data->scream_pos : &p;
538
539 if (PL_screamfirst[BmRARE(check)] >= 0
540 || ( BmRARE(check) == '\n'
541 && (BmPREVIOUS(check) == SvCUR(check) - 1)
542 && SvTAIL(check) ))
543 s = screaminstr(sv, check,
544 srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
545 else
546 goto fail_finish;
547
548 if (s && RXp_MATCH_COPIED(prog))
549 s = strbeg + (s - SvPVX_const(sv));
550 if (data)
551 *data->scream_olds = s;
552 }
553 else {
554 U8* start_point;
555 U8* end_point;
556 if (prog->extflags & RXf_CANY_SEEN) {
557 start_point= (U8*)(s + srch_start_shift);
558 end_point= (U8*)(strend - srch_end_shift);
559 } else {
560 start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
561 end_point= HOP3(strend, -srch_end_shift, strbeg);
562 }
563 DEBUG_OPTIMISE_MORE_r({
564 PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
565 (int)(end_point - start_point),
566 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
567 start_point);
568 });
569
570 s = fbm_instr( start_point, end_point,
571 check, multiline ? FBMrf_MULTILINE : 0);
572 }
573 }
574
575
576
577 DEBUG_EXECUTE_r({
578 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
579 SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
580 PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
581 (s ? "Found" : "Did not find"),
582 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
583 ? "anchored" : "floating"),
584 quoted,
585 RE_SV_TAIL(check),
586 (s ? " at offset " : "...\n") );
587 });
588
589 if (!s)
590 goto fail_finish;
591
592 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
593
594
595
596
597
598
599
600
601
602 check_at=s;
603
604
605
606
607
608
609
610
611
612
613 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
614 : (prog->float_substr && prog->anchored_substr))
615 {
616
617
618 if (!other_last)
619 other_last = strpos;
620 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
621 do_other_anchored:
622 {
623 char * const last = HOP3c(s, -start_shift, strbeg);
624 char *last1, *last2;
625 char * const saved_s = s;
626 SV* must;
627
628 t = s - prog->check_offset_max;
629 if (s - strpos > prog->check_offset_max
630 && (!do_utf8
631 || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
632 && t > strpos)))
633 NOOP;
634 else
635 t = strpos;
636 t = HOP3c(t, prog->anchored_offset, strend);
637 if (t < other_last)
638 t = other_last;
639 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
640 if (last < last1)
641 last1 = last;
642
643
644
645
646
647
648
649 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
650 if (must == &PL_sv_undef) {
651 s = (char*)NULL;
652 DEBUG_r(must = prog->anchored_utf8);
653 }
654 else
655 s = fbm_instr(
656 (unsigned char*)t,
657 HOP3(HOP3(last1, prog->anchored_offset, strend)
658 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
659 must,
660 multiline ? FBMrf_MULTILINE : 0
661 );
662 DEBUG_EXECUTE_r({
663 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
664 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
665 PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
666 (s ? "Found" : "Contradicts"),
667 quoted, RE_SV_TAIL(must));
668 });
669
670
671 if (!s) {
672 if (last1 >= last2) {
673 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
674 ", giving up...\n"));
675 goto fail_finish;
676 }
677 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
678 ", trying floating at offset %ld...\n",
679 (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
680 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
681 s = HOP3c(last, 1, strend);
682 goto restart;
683 }
684 else {
685 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
686 (long)(s - i_strpos)));
687 t = HOP3c(s, -prog->anchored_offset, strbeg);
688 other_last = HOP3c(s, 1, strend);
689 s = saved_s;
690 if (t == strpos)
691 goto try_at_start;
692 goto try_at_offset;
693 }
694 }
695 }
696 else {
697 char *last, *last1;
698 char * const saved_s = s;
699 SV* must;
700
701 t = HOP3c(s, -start_shift, strbeg);
702 last1 = last =
703 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
704 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
705 last = HOP3c(t, prog->float_max_offset, strend);
706 s = HOP3c(t, prog->float_min_offset, strend);
707 if (s < other_last)
708 s = other_last;
709
710 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
711
712
713
714 if (must == &PL_sv_undef) {
715 s = (char*)NULL;
716 DEBUG_r(must = prog->float_utf8);
717 }
718 else
719 s = fbm_instr((unsigned char*)s,
720 (unsigned char*)last + SvCUR(must)
721 - (SvTAIL(must)!=0),
722 must, multiline ? FBMrf_MULTILINE : 0);
723 DEBUG_EXECUTE_r({
724 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
725 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
726 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
727 (s ? "Found" : "Contradicts"),
728 quoted, RE_SV_TAIL(must));
729 });
730 if (!s) {
731 if (last1 == last) {
732 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
733 ", giving up...\n"));
734 goto fail_finish;
735 }
736 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
737 ", trying anchored starting at offset %ld...\n",
738 (long)(saved_s + 1 - i_strpos)));
739 other_last = last;
740 s = HOP3c(t, 1, strend);
741 goto restart;
742 }
743 else {
744 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
745 (long)(s - i_strpos)));
746 other_last = s;
747 s = saved_s;
748 if (t == strpos)
749 goto try_at_start;
750 goto try_at_offset;
751 }
752 }
753 }
754
755
756 t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
757
758 DEBUG_OPTIMISE_MORE_r(
759 PerlIO_printf(Perl_debug_log,
760 "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
761 (IV)prog->check_offset_min,
762 (IV)prog->check_offset_max,
763 (IV)(s-strpos),
764 (IV)(t-strpos),
765 (IV)(t-s),
766 (IV)(strend-strpos)
767 )
768 );
769
770 if (s - strpos > prog->check_offset_max
771 && (!do_utf8
772 || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
773 && t > strpos)))
774 {
775
776
777 try_at_offset:
778 if (ml_anch && t[-1] != '\n') {
779
780
781
782
783
784
785 find_anchor:
786 while (t < strend - prog->minlen) {
787 if (*t == '\n') {
788 if (t < check_at - prog->check_offset_min) {
789 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
790
791
792
793
794
795
796
797 strpos = t + 1;
798 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
799 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
800 goto do_other_anchored;
801 }
802
803
804 s = t + 1;
805 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
806 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
807 goto set_useful;
808 }
809
810
811
812 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
813 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
814 other_last = strpos = s = t + 1;
815 goto restart;
816 }
817 t++;
818 }
819 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
820 PL_colors[0], PL_colors[1]));
821 goto fail_finish;
822 }
823 else {
824 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
825 PL_colors[0], PL_colors[1]));
826 }
827 s = t;
828 set_useful:
829 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);
830 }
831 else {
832
833
834
835
836 try_at_start:
837
838
839 if (ml_anch && sv && !SvROK(sv)
840 && (strpos != strbeg) && strpos[-1] != '\n'
841
842 && !(prog->intflags & PREGf_IMPLICIT))
843 {
844 t = strpos;
845 goto find_anchor;
846 }
847 DEBUG_EXECUTE_r( if (ml_anch)
848 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
849 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
850 );
851 success_at_start:
852 if (!(prog->intflags & PREGf_NAUGHTY)
853 && (do_utf8 ? (
854 prog->check_utf8
855 && --BmUSEFUL(prog->check_utf8) < 0
856 && (prog->check_utf8 == prog->float_utf8)
857 ) : (
858 prog->check_substr
859 && --BmUSEFUL(prog->check_substr) < 0
860 && (prog->check_substr == prog->float_substr)
861 )))
862 {
863
864 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
865 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
866 if (do_utf8 ? prog->check_substr : prog->check_utf8)
867 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
868 prog->check_substr = prog->check_utf8 = NULL;
869 prog->float_substr = prog->float_utf8 = NULL;
870 check = NULL;
871 s = strpos;
872
873
874
875 prog->extflags &= ~RXf_USE_INTUIT;
876 }
877 else
878 s = strpos;
879 }
880
881
882
883
884
885 if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
886
887
888
889
890
891
892
893
894 const U8* const str = (U8*)STRING(progi->regstclass);
895 const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
896 ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
897 : 1);
898 char * endpos;
899 if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
900 endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
901 else if (prog->float_substr || prog->float_utf8)
902 endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
903 else
904 endpos= strend;
905
906 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
907 (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
908
909 t = s;
910 s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
911 if (!s) {
912#ifdef DEBUGGING
913 const char *what = NULL;
914#endif
915 if (endpos == strend) {
916 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
917 "Could not match STCLASS...\n") );
918 goto fail;
919 }
920 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
921 "This position contradicts STCLASS...\n") );
922 if ((prog->extflags & RXf_ANCH) && !ml_anch)
923 goto fail;
924
925 if (prog->anchored_substr || prog->anchored_utf8) {
926 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
927 DEBUG_EXECUTE_r( what = "anchored" );
928 hop_and_restart:
929 s = HOP3c(t, 1, strend);
930 if (s + start_shift + end_shift > strend) {
931
932 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
933 "Could not match STCLASS...\n") );
934 goto fail;
935 }
936 if (!check)
937 goto giveup;
938 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
939 "Looking for %s substr starting at offset %ld...\n",
940 what, (long)(s + start_shift - i_strpos)) );
941 goto restart;
942 }
943
944 if (t + start_shift >= check_at)
945 goto retry_floating_check;
946
947 s = check_at;
948 if (!check)
949 goto giveup;
950 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
951 "Looking for anchored substr starting at offset %ld...\n",
952 (long)(other_last - i_strpos)) );
953 goto do_other_anchored;
954 }
955
956
957 if (ml_anch) {
958 s = t = t + 1;
959 if (!check)
960 goto giveup;
961 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
962 "Looking for /%s^%s/m starting at offset %ld...\n",
963 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
964 goto try_at_offset;
965 }
966 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
967 goto fail;
968
969 retry_floating_check:
970 t = check_at - start_shift;
971 DEBUG_EXECUTE_r( what = "floating" );
972 goto hop_and_restart;
973 }
974 if (t != s) {
975 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
976 "By STCLASS: moving %ld --> %ld\n",
977 (long)(t - i_strpos), (long)(s - i_strpos))
978 );
979 }
980 else {
981 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
982 "Does not contradict STCLASS...\n");
983 );
984 }
985 }
986 giveup:
987 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
988 PL_colors[4], (check ? "Guessed" : "Giving up"),
989 PL_colors[5], (long)(s - i_strpos)) );
990 return s;
991
992 fail_finish:
993 if (prog->check_substr || prog->check_utf8)
994 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5;
995 fail:
996 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
997 PL_colors[4], PL_colors[5]));
998 return NULL;
999}
1000
1001#define DECL_TRIE_TYPE(scan) \
1002 const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1003 trie_type = (scan->flags != EXACT) \
1004 ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1005 : (do_utf8 ? trie_utf8 : trie_plain)
1006
1007#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
1008uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
1009 UV uvc_unfolded = 0; \
1010 switch (trie_type) { \
1011 case trie_utf8_fold: \
1012 if ( foldlen>0 ) { \
1013 uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1014 foldlen -= len; \
1015 uscan += len; \
1016 len=0; \
1017 } else { \
1018 uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1019 uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
1020 foldlen -= UNISKIP( uvc ); \
1021 uscan = foldbuf + UNISKIP( uvc ); \
1022 } \
1023 break; \
1024 case trie_latin_utf8_fold: \
1025 if ( foldlen>0 ) { \
1026 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1027 foldlen -= len; \
1028 uscan += len; \
1029 len=0; \
1030 } else { \
1031 len = 1; \
1032 uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen ); \
1033 foldlen -= UNISKIP( uvc ); \
1034 uscan = foldbuf + UNISKIP( uvc ); \
1035 } \
1036 break; \
1037 case trie_utf8: \
1038 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1039 break; \
1040 case trie_plain: \
1041 uvc = (UV)*uc; \
1042 len = 1; \
1043 } \
1044 \
1045 if (uvc < 256) { \
1046 charid = trie->charmap[ uvc ]; \
1047 } \
1048 else { \
1049 charid = 0; \
1050 if (widecharmap) { \
1051 SV** const svpp = hv_fetch(widecharmap, \
1052 (char*)&uvc, sizeof(UV), 0); \
1053 if (svpp) \
1054 charid = (U16)SvIV(*svpp); \
1055 } \
1056 } \
1057 if (!charid && trie_type == trie_utf8_fold && !UTF) { \
1058 charid = trie->charmap[uvc_unfolded]; \
1059 } \
1060} STMT_END
1061
1062#define REXEC_FBC_EXACTISH_CHECK(CoNd) \
1063{ \
1064 char *my_strend= (char *)strend; \
1065 if ( (CoNd) \
1066 && (ln == len || \
1067 !ibcmp_utf8(s, &my_strend, 0, do_utf8, \
1068 m, NULL, ln, (bool)UTF)) \
1069 && (!reginfo || regtry(reginfo, &s)) ) \
1070 goto got_it; \
1071 else { \
1072 U8 foldbuf[UTF8_MAXBYTES_CASE+1]; \
1073 uvchr_to_utf8(tmpbuf, c); \
1074 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen); \
1075 if ( f != c \
1076 && (f == c1 || f == c2) \
1077 && (ln == len || \
1078 !ibcmp_utf8(s, &my_strend, 0, do_utf8,\
1079 m, NULL, ln, (bool)UTF)) \
1080 && (!reginfo || regtry(reginfo, &s)) ) \
1081 goto got_it; \
1082 } \
1083} \
1084s += len
1085
1086#define REXEC_FBC_EXACTISH_SCAN(CoNd) \
1087STMT_START { \
1088 while (s <= e) { \
1089 if ( (CoNd) \
1090 && (ln == 1 || !(OP(c) == EXACTF \
1091 ? ibcmp(s, m, ln) \
1092 : ibcmp_locale(s, m, ln))) \
1093 && (!reginfo || regtry(reginfo, &s)) ) \
1094 goto got_it; \
1095 s++; \
1096 } \
1097} STMT_END
1098
1099#define REXEC_FBC_UTF8_SCAN(CoDe) \
1100STMT_START { \
1101 while (s + (uskip = UTF8SKIP(s)) <= strend) { \
1102 CoDe \
1103 s += uskip; \
1104 } \
1105} STMT_END
1106
1107#define REXEC_FBC_SCAN(CoDe) \
1108STMT_START { \
1109 while (s < strend) { \
1110 CoDe \
1111 s++; \
1112 } \
1113} STMT_END
1114
1115#define REXEC_FBC_UTF8_CLASS_SCAN(CoNd) \
1116REXEC_FBC_UTF8_SCAN( \
1117 if (CoNd) { \
1118 if (tmp && (!reginfo || regtry(reginfo, &s))) \
1119 goto got_it; \
1120 else \
1121 tmp = doevery; \
1122 } \
1123 else \
1124 tmp = 1; \
1125)
1126
1127#define REXEC_FBC_CLASS_SCAN(CoNd) \
1128REXEC_FBC_SCAN( \
1129 if (CoNd) { \
1130 if (tmp && (!reginfo || regtry(reginfo, &s))) \
1131 goto got_it; \
1132 else \
1133 tmp = doevery; \
1134 } \
1135 else \
1136 tmp = 1; \
1137)
1138
1139#define REXEC_FBC_TRYIT \
1140if ((!reginfo || regtry(reginfo, &s))) \
1141 goto got_it
1142
1143#define REXEC_FBC_CSCAN(CoNdUtF8,CoNd) \
1144 if (do_utf8) { \
1145 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1146 } \
1147 else { \
1148 REXEC_FBC_CLASS_SCAN(CoNd); \
1149 } \
1150 break
1151
1152#define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd) \
1153 if (do_utf8) { \
1154 UtFpReLoAd; \
1155 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1156 } \
1157 else { \
1158 REXEC_FBC_CLASS_SCAN(CoNd); \
1159 } \
1160 break
1161
1162#define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd) \
1163 PL_reg_flags |= RF_tainted; \
1164 if (do_utf8) { \
1165 REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8); \
1166 } \
1167 else { \
1168 REXEC_FBC_CLASS_SCAN(CoNd); \
1169 } \
1170 break
1171
1172#define DUMP_EXEC_POS(li,s,doutf8) \
1173 dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1174
1175
1176
1177
1178
1179
1180STATIC char *
1181S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1182 const char *strend, regmatch_info *reginfo)
1183{
1184 dVAR;
1185 const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1186 char *m;
1187 STRLEN ln;
1188 STRLEN lnc;
1189 register STRLEN uskip;
1190 unsigned int c1;
1191 unsigned int c2;
1192 char *e;
1193 register I32 tmp = 1;
1194 register const bool do_utf8 = PL_reg_match_utf8;
1195 RXi_GET_DECL(prog,progi);
1196
1197 PERL_ARGS_ASSERT_FIND_BYCLASS;
1198
1199
1200 switch (OP(c)) {
1201 case ANYOF:
1202 if (do_utf8) {
1203 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1204 !UTF8_IS_INVARIANT((U8)s[0]) ?
1205 reginclass(prog, c, (U8*)s, 0, do_utf8) :
1206 REGINCLASS(prog, c, (U8*)s));
1207 }
1208 else {
1209 while (s < strend) {
1210 STRLEN skip = 1;
1211
1212 if (REGINCLASS(prog, c, (U8*)s) ||
1213 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1214
1215
1216 (skip = SHARP_S_SKIP))) {
1217 if (tmp && (!reginfo || regtry(reginfo, &s)))
1218 goto got_it;
1219 else
1220 tmp = doevery;
1221 }
1222 else
1223 tmp = 1;
1224 s += skip;
1225 }
1226 }
1227 break;
1228 case CANY:
1229 REXEC_FBC_SCAN(
1230 if (tmp && (!reginfo || regtry(reginfo, &s)))
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 );
1235 break;
1236 case EXACTF:
1237 m = STRING(c);
1238 ln = STR_LEN(c);
1239 lnc = (I32) ln;
1240 if (UTF) {
1241 STRLEN ulen1, ulen2;
1242 U8 *sm = (U8 *) m;
1243 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1244 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1245
1246
1247
1248
1249
1250
1251
1252 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1253 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 lnc = 0;
1268 while (sm < ((U8 *) m + ln)) {
1269 lnc++;
1270 sm += UTF8SKIP(sm);
1271 }
1272 }
1273 else {
1274 c1 = *(U8*)m;
1275 c2 = PL_fold[c1];
1276 }
1277 goto do_exactf;
1278 case EXACTFL:
1279 m = STRING(c);
1280 ln = STR_LEN(c);
1281 lnc = (I32) ln;
1282 c1 = *(U8*)m;
1283 c2 = PL_fold_locale[c1];
1284 do_exactf:
1285 e = HOP3c(strend, -((I32)lnc), s);
1286
1287 if (!reginfo && e < s)
1288 e = s;
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301 if (do_utf8 || UTF) {
1302 UV c, f;
1303 U8 tmpbuf [UTF8_MAXBYTES+1];
1304 STRLEN len = 1;
1305 STRLEN foldlen;
1306 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1307 if (c1 == c2) {
1308
1309
1310 while (s <= e) {
1311 if (do_utf8) {
1312 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1313 uniflags);
1314 } else {
1315 c = *((U8*)s);
1316 }
1317 REXEC_FBC_EXACTISH_CHECK(c == c1);
1318 }
1319 }
1320 else {
1321 while (s <= e) {
1322 if (do_utf8) {
1323 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1324 uniflags);
1325 } else {
1326 c = *((U8*)s);
1327 }
1328
1329
1330
1331
1332
1333
1334
1335
1336 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1337 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1338 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1339
1340 REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1341 }
1342 }
1343 }
1344 else {
1345
1346 if (c1 == c2)
1347 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1348 else
1349 REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1350 }
1351 break;
1352 case BOUNDL:
1353 PL_reg_flags |= RF_tainted;
1354
1355 case BOUND:
1356 if (do_utf8) {
1357 if (s == PL_bostr)
1358 tmp = '\n';
1359 else {
1360 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1361 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1362 }
1363 tmp = ((OP(c) == BOUND ?
1364 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1365 LOAD_UTF8_CHARCLASS_ALNUM();
1366 REXEC_FBC_UTF8_SCAN(
1367 if (tmp == !(OP(c) == BOUND ?
1368 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1369 isALNUM_LC_utf8((U8*)s)))
1370 {
1371 tmp = !tmp;
1372 REXEC_FBC_TRYIT;
1373 }
1374 );
1375 }
1376 else {
1377 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1378 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1379 REXEC_FBC_SCAN(
1380 if (tmp ==
1381 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1382 tmp = !tmp;
1383 REXEC_FBC_TRYIT;
1384 }
1385 );
1386 }
1387 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1388 goto got_it;
1389 break;
1390 case NBOUNDL:
1391 PL_reg_flags |= RF_tainted;
1392
1393 case NBOUND:
1394 if (do_utf8) {
1395 if (s == PL_bostr)
1396 tmp = '\n';
1397 else {
1398 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1399 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1400 }
1401 tmp = ((OP(c) == NBOUND ?
1402 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1403 LOAD_UTF8_CHARCLASS_ALNUM();
1404 REXEC_FBC_UTF8_SCAN(
1405 if (tmp == !(OP(c) == NBOUND ?
1406 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1407 isALNUM_LC_utf8((U8*)s)))
1408 tmp = !tmp;
1409 else REXEC_FBC_TRYIT;
1410 );
1411 }
1412 else {
1413 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1414 tmp = ((OP(c) == NBOUND ?
1415 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1416 REXEC_FBC_SCAN(
1417 if (tmp ==
1418 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1419 tmp = !tmp;
1420 else REXEC_FBC_TRYIT;
1421 );
1422 }
1423 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1424 goto got_it;
1425 break;
1426 case ALNUM:
1427 REXEC_FBC_CSCAN_PRELOAD(
1428 LOAD_UTF8_CHARCLASS_ALNUM(),
1429 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1430 isALNUM(*s)
1431 );
1432 case ALNUML:
1433 REXEC_FBC_CSCAN_TAINT(
1434 isALNUM_LC_utf8((U8*)s),
1435 isALNUM_LC(*s)
1436 );
1437 case NALNUM:
1438 REXEC_FBC_CSCAN_PRELOAD(
1439 LOAD_UTF8_CHARCLASS_ALNUM(),
1440 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1441 !isALNUM(*s)
1442 );
1443 case NALNUML:
1444 REXEC_FBC_CSCAN_TAINT(
1445 !isALNUM_LC_utf8((U8*)s),
1446 !isALNUM_LC(*s)
1447 );
1448 case SPACE:
1449 REXEC_FBC_CSCAN_PRELOAD(
1450 LOAD_UTF8_CHARCLASS_SPACE(),
1451 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1452 isSPACE(*s)
1453 );
1454 case SPACEL:
1455 REXEC_FBC_CSCAN_TAINT(
1456 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1457 isSPACE_LC(*s)
1458 );
1459 case NSPACE:
1460 REXEC_FBC_CSCAN_PRELOAD(
1461 LOAD_UTF8_CHARCLASS_SPACE(),
1462 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1463 !isSPACE(*s)
1464 );
1465 case NSPACEL:
1466 REXEC_FBC_CSCAN_TAINT(
1467 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1468 !isSPACE_LC(*s)
1469 );
1470 case DIGIT:
1471 REXEC_FBC_CSCAN_PRELOAD(
1472 LOAD_UTF8_CHARCLASS_DIGIT(),
1473 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1474 isDIGIT(*s)
1475 );
1476 case DIGITL:
1477 REXEC_FBC_CSCAN_TAINT(
1478 isDIGIT_LC_utf8((U8*)s),
1479 isDIGIT_LC(*s)
1480 );
1481 case NDIGIT:
1482 REXEC_FBC_CSCAN_PRELOAD(
1483 LOAD_UTF8_CHARCLASS_DIGIT(),
1484 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1485 !isDIGIT(*s)
1486 );
1487 case NDIGITL:
1488 REXEC_FBC_CSCAN_TAINT(
1489 !isDIGIT_LC_utf8((U8*)s),
1490 !isDIGIT_LC(*s)
1491 );
1492 case LNBREAK:
1493 REXEC_FBC_CSCAN(
1494 is_LNBREAK_utf8(s),
1495 is_LNBREAK_latin1(s)
1496 );
1497 case VERTWS:
1498 REXEC_FBC_CSCAN(
1499 is_VERTWS_utf8(s),
1500 is_VERTWS_latin1(s)
1501 );
1502 case NVERTWS:
1503 REXEC_FBC_CSCAN(
1504 !is_VERTWS_utf8(s),
1505 !is_VERTWS_latin1(s)
1506 );
1507 case HORIZWS:
1508 REXEC_FBC_CSCAN(
1509 is_HORIZWS_utf8(s),
1510 is_HORIZWS_latin1(s)
1511 );
1512 case NHORIZWS:
1513 REXEC_FBC_CSCAN(
1514 !is_HORIZWS_utf8(s),
1515 !is_HORIZWS_latin1(s)
1516 );
1517 case AHOCORASICKC:
1518 case AHOCORASICK:
1519 {
1520 DECL_TRIE_TYPE(c);
1521
1522 reg_ac_data *aho
1523 = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1524 reg_trie_data *trie
1525 = (reg_trie_data*)progi->data->data[ aho->trie ];
1526 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1527
1528 const char *last_start = strend - trie->minlen;
1529#ifdef DEBUGGING
1530 const char *real_start = s;
1531#endif
1532 STRLEN maxlen = trie->maxlen;
1533 SV *sv_points;
1534 U8 **points;
1535
1536
1537
1538
1539 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1540 U8 *bitmap=NULL;
1541
1542
1543 GET_RE_DEBUG_FLAGS_DECL;
1544
1545
1546
1547
1548 ENTER;
1549 SAVETMPS;
1550 sv_points=newSV(maxlen * sizeof(U8 *));
1551 SvCUR_set(sv_points,
1552 maxlen * sizeof(U8 *));
1553 SvPOK_on(sv_points);
1554 sv_2mortal(sv_points);
1555 points=(U8**)SvPV_nolen(sv_points );
1556 if ( trie_type != trie_utf8_fold
1557 && (trie->bitmap || OP(c)==AHOCORASICKC) )
1558 {
1559 if (trie->bitmap)
1560 bitmap=(U8*)trie->bitmap;
1561 else
1562 bitmap=(U8*)ANYOF_BITMAP(c);
1563 }
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581 while (s <= last_start) {
1582 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1583 U8 *uc = (U8*)s;
1584 U16 charid = 0;
1585 U32 base = 1;
1586 U32 state = 1;
1587 UV uvc = 0;
1588 STRLEN len = 0;
1589 STRLEN foldlen = 0;
1590 U8 *uscan = (U8*)NULL;
1591 U8 *leftmost = NULL;
1592#ifdef DEBUGGING
1593 U32 accepted_word= 0;
1594#endif
1595 U32 pointpos = 0;
1596
1597 while ( state && uc <= (U8*)strend ) {
1598 int failed=0;
1599 U32 word = aho->states[ state ].wordnum;
1600
1601 if( state==1 ) {
1602 if ( bitmap ) {
1603 DEBUG_TRIE_EXECUTE_r(
1604 if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1605 dump_exec_pos( (char *)uc, c, strend, real_start,
1606 (char *)uc, do_utf8 );
1607 PerlIO_printf( Perl_debug_log,
1608 " Scanning for legal start char...\n");
1609 }
1610 );
1611 while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1612 uc++;
1613 }
1614 s= (char *)uc;
1615 }
1616 if (uc >(U8*)last_start) break;
1617 }
1618
1619 if ( word ) {
1620 U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1621 if (!leftmost || lpos < leftmost) {
1622 DEBUG_r(accepted_word=word);
1623 leftmost= lpos;
1624 }
1625 if (base==0) break;
1626
1627 }
1628 points[pointpos++ % maxlen]= uc;
1629 REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1630 uscan, len, uvc, charid, foldlen,
1631 foldbuf, uniflags);
1632 DEBUG_TRIE_EXECUTE_r({
1633 dump_exec_pos( (char *)uc, c, strend, real_start,
1634 s, do_utf8 );
1635 PerlIO_printf(Perl_debug_log,
1636 " Charid:%3u CP:%4"UVxf" ",
1637 charid, uvc);
1638 });
1639
1640 do {
1641#ifdef DEBUGGING
1642 word = aho->states[ state ].wordnum;
1643#endif
1644 base = aho->states[ state ].trans.base;
1645
1646 DEBUG_TRIE_EXECUTE_r({
1647 if (failed)
1648 dump_exec_pos( (char *)uc, c, strend, real_start,
1649 s, do_utf8 );
1650 PerlIO_printf( Perl_debug_log,
1651 "%sState: %4"UVxf", word=%"UVxf,
1652 failed ? " Fail transition to " : "",
1653 (UV)state, (UV)word);
1654 });
1655 if ( base ) {
1656 U32 tmp;
1657 if (charid &&
1658 (base + charid > trie->uniquecharcount )
1659 && (base + charid - 1 - trie->uniquecharcount
1660 < trie->lasttrans)
1661 && trie->trans[base + charid - 1 -
1662 trie->uniquecharcount].check == state
1663 && (tmp=trie->trans[base + charid - 1 -
1664 trie->uniquecharcount ].next))
1665 {
1666 DEBUG_TRIE_EXECUTE_r(
1667 PerlIO_printf( Perl_debug_log," - legal\n"));
1668 state = tmp;
1669 break;
1670 }
1671 else {
1672 DEBUG_TRIE_EXECUTE_r(
1673 PerlIO_printf( Perl_debug_log," - fail\n"));
1674 failed = 1;
1675 state = aho->fail[state];
1676 }
1677 }
1678 else {
1679
1680 DEBUG_TRIE_EXECUTE_r(
1681 PerlIO_printf( Perl_debug_log," - accepting\n"));
1682 failed = 1;
1683 break;
1684 }
1685 } while(state);
1686 uc += len;
1687 if (failed) {
1688 if (leftmost)
1689 break;
1690 if (!state) state = 1;
1691 }
1692 }
1693 if ( aho->states[ state ].wordnum ) {
1694 U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1695 if (!leftmost || lpos < leftmost) {
1696 DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1697 leftmost = lpos;
1698 }
1699 }
1700 if (leftmost) {
1701 s = (char*)leftmost;
1702 DEBUG_TRIE_EXECUTE_r({
1703 PerlIO_printf(
1704 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1705 (UV)accepted_word, (IV)(s - real_start)
1706 );
1707 });
1708 if (!reginfo || regtry(reginfo, &s)) {
1709 FREETMPS;
1710 LEAVE;
1711 goto got_it;
1712 }
1713 s = HOPc(s,1);
1714 DEBUG_TRIE_EXECUTE_r({
1715 PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1716 });
1717 } else {
1718 DEBUG_TRIE_EXECUTE_r(
1719 PerlIO_printf( Perl_debug_log,"No match.\n"));
1720 break;
1721 }
1722 }
1723 FREETMPS;
1724 LEAVE;
1725 }
1726 break;
1727 default:
1728 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1729 break;
1730 }
1731 return 0;
1732 got_it:
1733 return s;
1734}
1735
1736static void
1737S_swap_match_buff (pTHX_ regexp *prog)
1738{
1739 regexp_paren_pair *t;
1740
1741 PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1742
1743 if (!prog->swap) {
1744
1745
1746
1747
1748
1749
1750
1751 Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1752 }
1753 t = prog->swap;
1754 prog->swap = prog->offs;
1755 prog->offs = t;
1756}
1757
1758
1759
1760
1761
1762I32
1763Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
1764 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1765
1766
1767
1768
1769
1770
1771
1772{
1773 dVAR;
1774 char *s;
1775 register regnode *c;
1776 char *startpos = stringarg;
1777 I32 minlen;
1778 I32 dontbother = 0;
1779 I32 end_shift = 0;
1780 I32 scream_pos = -1;
1781 char *scream_olds = NULL;
1782 const bool do_utf8 = (bool)DO_UTF8(sv);
1783 I32 multiline;
1784 RXi_GET_DECL(prog,progi);
1785 regmatch_info reginfo;
1786 bool swap_on_fail = 0;
1787 GET_RE_DEBUG_FLAGS_DECL;
1788
1789 PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1790 PERL_UNUSED_ARG(data);
1791
1792
1793 if (prog == NULL || startpos == NULL) {
1794 Perl_croak(aTHX_ "NULL regexp parameter");
1795 return 0;
1796 }
1797
1798 multiline = prog->extflags & RXf_PMf_MULTILINE;
1799 reginfo.prog = prog;
1800
1801 RX_MATCH_UTF8_set(prog, do_utf8);
1802 DEBUG_EXECUTE_r(
1803 debug_start_match(prog, do_utf8, startpos, strend,
1804 "Matching");
1805 );
1806
1807 minlen = prog->minlen;
1808
1809 if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1810 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1811 "String too short [regexec_flags]...\n"));
1812 goto phooey;
1813 }
1814
1815
1816
1817 if (UCHARAT(progi->program) != REG_MAGIC) {
1818 Perl_croak(aTHX_ "corrupted regexp program");
1819 }
1820
1821 PL_reg_flags = 0;
1822 PL_reg_eval_set = 0;
1823 PL_reg_maxiter = 0;
1824
1825 if (RX_UTF8(prog))
1826 PL_reg_flags |= RF_utf8;
1827
1828
1829 reginfo.bol = startpos;
1830 PL_bostr = strbeg;
1831 reginfo.sv = sv;
1832
1833
1834 PL_regeol = strend;
1835
1836
1837 reginfo.till = startpos+minend;
1838
1839
1840 s = startpos;
1841
1842 if (prog->extflags & RXf_GPOS_SEEN) {
1843 MAGIC *mg;
1844
1845 if (flags & REXEC_IGNOREPOS)
1846 reginfo.ganch = startpos + prog->gofs;
1847 else if (sv && SvTYPE(sv) >= SVt_PVMG
1848 && SvMAGIC(sv)
1849 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1850 && mg->mg_len >= 0) {
1851 reginfo.ganch = strbeg + mg->mg_len;
1852 if (prog->extflags & RXf_ANCH_GPOS) {
1853 if (s > reginfo.ganch)
1854 goto phooey;
1855 s = reginfo.ganch - prog->gofs;
1856 }
1857 }
1858 else if (data) {
1859 reginfo.ganch = strbeg + PTR2UV(data);
1860 } else
1861 reginfo.ganch = strbeg;
1862 }
1863 if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1864 swap_on_fail = 1;
1865 swap_match_buff(prog);
1866
1867 }
1868 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1869 re_scream_pos_data d;
1870
1871 d.scream_olds = &scream_olds;
1872 d.scream_pos = &scream_pos;
1873 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1874 if (!s) {
1875 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1876 goto phooey;
1877 }
1878 }
1879
1880
1881
1882
1883
1884 if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1885 if (s == startpos && regtry(®info, &startpos))
1886 goto got_it;
1887 else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1888 || (prog->extflags & RXf_ANCH_MBOL))
1889 {
1890 char *end;
1891
1892 if (minlen)
1893 dontbother = minlen - 1;
1894 end = HOP3c(strend, -dontbother, strbeg) - 1;
1895
1896 if (prog->check_substr || prog->check_utf8) {
1897 if (s == startpos)
1898 goto after_try;
1899 while (1) {
1900 if (regtry(®info, &s))
1901 goto got_it;
1902 after_try:
1903 if (s > end)
1904 goto phooey;
1905 if (prog->extflags & RXf_USE_INTUIT) {
1906 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1907 if (!s)
1908 goto phooey;
1909 }
1910 else
1911 s++;
1912 }
1913 } else {
1914 if (s > startpos)
1915 s--;
1916 while (s < end) {
1917 if (*s++ == '\n') {
1918 if (regtry(®info, &s))
1919 goto got_it;
1920 }
1921 }
1922 }
1923 }
1924 goto phooey;
1925 } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1926 {
1927
1928
1929
1930 char *tmp_s = reginfo.ganch - prog->gofs;
1931 if (regtry(®info, &tmp_s))
1932 goto got_it;
1933 goto phooey;
1934 }
1935
1936
1937 if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1938
1939
1940 char ch;
1941#ifdef DEBUGGING
1942 int did_match = 0;
1943#endif
1944 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1945 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1946 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1947
1948 if (do_utf8) {
1949 REXEC_FBC_SCAN(
1950 if (*s == ch) {
1951 DEBUG_EXECUTE_r( did_match = 1 );
1952 if (regtry(®info, &s)) goto got_it;
1953 s += UTF8SKIP(s);
1954 while (s < strend && *s == ch)
1955 s += UTF8SKIP(s);
1956 }
1957 );
1958 }
1959 else {
1960 REXEC_FBC_SCAN(
1961 if (*s == ch) {
1962 DEBUG_EXECUTE_r( did_match = 1 );
1963 if (regtry(®info, &s)) goto got_it;
1964 s++;
1965 while (s < strend && *s == ch)
1966 s++;
1967 }
1968 );
1969 }
1970 DEBUG_EXECUTE_r(if (!did_match)
1971 PerlIO_printf(Perl_debug_log,
1972 "Did not find anchored character...\n")
1973 );
1974 }
1975 else if (prog->anchored_substr != NULL
1976 || prog->anchored_utf8 != NULL
1977 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1978 && prog->float_max_offset < strend - s)) {
1979 SV *must;
1980 I32 back_max;
1981 I32 back_min;
1982 char *last;
1983 char *last1;
1984#ifdef DEBUGGING
1985 int did_match = 0;
1986#endif
1987 if (prog->anchored_substr || prog->anchored_utf8) {
1988 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1989 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1990 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1991 back_max = back_min = prog->anchored_offset;
1992 } else {
1993 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1994 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1995 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1996 back_max = prog->float_max_offset;
1997 back_min = prog->float_min_offset;
1998 }
1999
2000
2001 if (must == &PL_sv_undef)
2002
2003 goto phooey;
2004
2005 if (back_min<0) {
2006 last = strend;
2007 } else {
2008 last = HOP3c(strend,
2009 -(I32)(CHR_SVLEN(must)
2010 - (SvTAIL(must) != 0) + back_min), strbeg);
2011 }
2012 if (s > PL_bostr)
2013 last1 = HOPc(s, -1);
2014 else
2015 last1 = s - 1;
2016
2017
2018
2019 scream_pos = -1;
2020 dontbother = end_shift;
2021 strend = HOPc(strend, -dontbother);
2022 while ( (s <= last) &&
2023 ((flags & REXEC_SCREAM)
2024 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2025 end_shift, &scream_pos, 0))
2026 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2027 (unsigned char*)strend, must,
2028 multiline ? FBMrf_MULTILINE : 0))) ) {
2029
2030 if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2031 s = strbeg + (s - SvPVX_const(sv));
2032 DEBUG_EXECUTE_r( did_match = 1 );
2033 if (HOPc(s, -back_max) > last1) {
2034 last1 = HOPc(s, -back_min);
2035 s = HOPc(s, -back_max);
2036 }
2037 else {
2038 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2039
2040 last1 = HOPc(s, -back_min);
2041 s = t;
2042 }
2043 if (do_utf8) {
2044 while (s <= last1) {
2045 if (regtry(®info, &s))
2046 goto got_it;
2047 s += UTF8SKIP(s);
2048 }
2049 }
2050 else {
2051 while (s <= last1) {
2052 if (regtry(®info, &s))
2053 goto got_it;
2054 s++;
2055 }
2056 }
2057 }
2058 DEBUG_EXECUTE_r(if (!did_match) {
2059 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2060 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2061 PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2062 ((must == prog->anchored_substr || must == prog->anchored_utf8)
2063 ? "anchored" : "floating"),
2064 quoted, RE_SV_TAIL(must));
2065 });
2066 goto phooey;
2067 }
2068 else if ( (c = progi->regstclass) ) {
2069 if (minlen) {
2070 const OPCODE op = OP(progi->regstclass);
2071
2072 if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2073 strend = HOPc(strend, -(minlen - 1));
2074 }
2075 DEBUG_EXECUTE_r({
2076 SV * const prop = sv_newmortal();
2077 regprop(prog, prop, c);
2078 {
2079 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2080 s,strend-s,60);
2081 PerlIO_printf(Perl_debug_log,
2082 "Matching stclass %.*s against %s (%d chars)\n",
2083 (int)SvCUR(prop), SvPVX_const(prop),
2084 quoted, (int)(strend - s));
2085 }
2086 });
2087 if (find_byclass(prog, c, s, strend, ®info))
2088 goto got_it;
2089 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2090 }
2091 else {
2092 dontbother = 0;
2093 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2094
2095 char *last;
2096 SV* float_real;
2097
2098 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2099 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2100 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2101
2102 if (flags & REXEC_SCREAM) {
2103 last = screaminstr(sv, float_real, s - strbeg,
2104 end_shift, &scream_pos, 1);
2105 if (!last)
2106 last = scream_olds;
2107
2108 else if (RXp_MATCH_COPIED(prog))
2109 s = strbeg + (s - SvPVX_const(sv));
2110 }
2111 else {
2112 STRLEN len;
2113 const char * const little = SvPV_const(float_real, len);
2114
2115 if (SvTAIL(float_real)) {
2116 if (memEQ(strend - len + 1, little, len - 1))
2117 last = strend - len + 1;
2118 else if (!multiline)
2119 last = memEQ(strend - len, little, len)
2120 ? strend - len : NULL;
2121 else
2122 goto find_last;
2123 } else {
2124 find_last:
2125 if (len)
2126 last = rninstr(s, strend, little, little + len);
2127 else
2128 last = strend;
2129 }
2130 }
2131 if (last == NULL) {
2132 DEBUG_EXECUTE_r(
2133 PerlIO_printf(Perl_debug_log,
2134 "%sCan't trim the tail, match fails (should not happen)%s\n",
2135 PL_colors[4], PL_colors[5]));
2136 goto phooey;
2137 }
2138 dontbother = strend - last + prog->float_min_offset;
2139 }
2140 if (minlen && (dontbother < minlen))
2141 dontbother = minlen - 1;
2142 strend -= dontbother;
2143
2144 if (do_utf8) {
2145 for (;;) {
2146 if (regtry(®info, &s))
2147 goto got_it;
2148 if (s >= strend)
2149 break;
2150 s += UTF8SKIP(s);
2151 };
2152 }
2153 else {
2154 do {
2155 if (regtry(®info, &s))
2156 goto got_it;
2157 } while (s++ < strend);
2158 }
2159 }
2160
2161
2162 goto phooey;
2163
2164got_it:
2165 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2166
2167 if (PL_reg_eval_set)
2168 restore_pos(aTHX_ prog);
2169 if (RXp_PAREN_NAMES(prog))
2170 (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2171
2172
2173 if ( !(flags & REXEC_NOT_FIRST) ) {
2174 RX_MATCH_COPY_FREE(prog);
2175 if (flags & REXEC_COPY_STR) {
2176 const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2177#ifdef PERL_OLD_COPY_ON_WRITE
2178 if ((SvIsCOW(sv)
2179 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2180 if (DEBUG_C_TEST) {
2181 PerlIO_printf(Perl_debug_log,
2182 "Copy on write: regexp capture, type %d\n",
2183 (int) SvTYPE(sv));
2184 }
2185 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2186 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2187 assert (SvPOKp(prog->saved_copy));
2188 } else
2189#endif
2190 {
2191 RX_MATCH_COPIED_on(prog);
2192 s = savepvn(strbeg, i);
2193 prog->subbeg = s;
2194 }
2195 prog->sublen = i;
2196 }
2197 else {
2198 prog->subbeg = strbeg;
2199 prog->sublen = PL_regeol - strbeg;
2200 }
2201 }
2202
2203 return 1;
2204
2205phooey:
2206 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2207 PL_colors[4], PL_colors[5]));
2208 if (PL_reg_eval_set)
2209 restore_pos(aTHX_ prog);
2210 if (swap_on_fail)
2211
2212 swap_match_buff(prog);
2213
2214 return 0;
2215}
2216
2217
2218
2219
2220
2221STATIC I32
2222S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2223{
2224 dVAR;
2225 CHECKPOINT lastcp;
2226 regexp *prog = reginfo->prog;
2227 RXi_GET_DECL(prog,progi);
2228 GET_RE_DEBUG_FLAGS_DECL;
2229
2230 PERL_ARGS_ASSERT_REGTRY;
2231
2232 reginfo->cutpoint=NULL;
2233
2234 if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2235 MAGIC *mg;
2236
2237 PL_reg_eval_set = RS_init;
2238 DEBUG_EXECUTE_r(DEBUG_s(
2239 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2240 (IV)(PL_stack_sp - PL_stack_base));
2241 ));
2242 SAVESTACK_CXPOS();
2243 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2244
2245 SAVETMPS;
2246
2247
2248
2249
2250 if (reginfo->sv) {
2251
2252 if (reginfo->sv != DEFSV) {
2253 SAVE_DEFSV;
2254 DEFSV_set(reginfo->sv);
2255 }
2256
2257 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2258 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2259
2260#ifdef PERL_OLD_COPY_ON_WRITE
2261 if (SvIsCOW(reginfo->sv))
2262 sv_force_normal_flags(reginfo->sv, 0);
2263#endif
2264 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2265 &PL_vtbl_mglob, NULL, 0);
2266 mg->mg_len = -1;
2267 }
2268 PL_reg_magic = mg;
2269 PL_reg_oldpos = mg->mg_len;
2270 SAVEDESTRUCTOR_X(restore_pos, prog);
2271 }
2272 if (!PL_reg_curpm) {
2273 Newxz(PL_reg_curpm, 1, PMOP);
2274#ifdef USE_ITHREADS
2275 {
2276 SV* const repointer = newSViv(0);
2277
2278
2279 av_push(PL_regex_padav,repointer);
2280 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2281 PL_regex_pad = AvARRAY(PL_regex_padav);
2282 }
2283#endif
2284 }
2285#ifdef USE_ITHREADS
2286
2287
2288
2289
2290 ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2291
2292 ReREFCNT_inc(prog);
2293#endif
2294 PM_SETRE(PL_reg_curpm, prog);
2295 PL_reg_oldcurpm = PL_curpm;
2296 PL_curpm = PL_reg_curpm;
2297 if (RXp_MATCH_COPIED(prog)) {
2298
2299
2300
2301 PL_reg_oldsaved = prog->subbeg;
2302 PL_reg_oldsavedlen = prog->sublen;
2303#ifdef PERL_OLD_COPY_ON_WRITE
2304 PL_nrs = prog->saved_copy;
2305#endif
2306 RXp_MATCH_COPIED_off(prog);
2307 }
2308 else
2309 PL_reg_oldsaved = NULL;
2310 prog->subbeg = PL_bostr;
2311 prog->sublen = PL_regeol - PL_bostr;
2312 }
2313 DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2314 prog->offs[0].start = *startpos - PL_bostr;
2315 PL_reginput = *startpos;
2316 PL_reglastparen = &prog->lastparen;
2317 PL_reglastcloseparen = &prog->lastcloseparen;
2318 prog->lastparen = 0;
2319 prog->lastcloseparen = 0;
2320 PL_regsize = 0;
2321 PL_regoffs = prog->offs;
2322 if (PL_reg_start_tmpl <= prog->nparens) {
2323 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2324 if(PL_reg_start_tmp)
2325 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2326 else
2327 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2328 }
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343#if 1
2344 if (prog->nparens) {
2345 regexp_paren_pair *pp = PL_regoffs;
2346 register I32 i;
2347 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2348 ++pp;
2349 pp->start = -1;
2350 pp->end = -1;
2351 }
2352 }
2353#endif
2354 REGCP_SET(lastcp);
2355 if (regmatch(reginfo, progi->program + 1)) {
2356 PL_regoffs[0].end = PL_reginput - PL_bostr;
2357 return 1;
2358 }
2359 if (reginfo->cutpoint)
2360 *startpos= reginfo->cutpoint;
2361 REGCP_UNWIND(lastcp);
2362 return 0;
2363}
2364
2365
2366#define sayYES goto yes
2367#define sayNO goto no
2368#define sayNO_SILENT goto no_silent
2369
2370
2371
2372#define CACHEsayNO \
2373 if (ST.cache_mask) \
2374 PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2375 sayNO
2376
2377
2378
2379
2380
2381#define REPORT_CODE_OFF 32
2382
2383
2384
2385#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2386
2387#define CHRTEST_UNINIT -1001
2388#define CHRTEST_VOID -1000
2389
2390#define SLAB_FIRST(s) (&(s)->states[0])
2391#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2392
2393
2394
2395STATIC regmatch_state *
2396S_push_slab(pTHX)
2397{
2398#if PERL_VERSION < 9 && !defined(PERL_CORE)
2399 dMY_CXT;
2400#endif
2401 regmatch_slab *s = PL_regmatch_slab->next;
2402 if (!s) {
2403 Newx(s, 1, regmatch_slab);
2404 s->prev = PL_regmatch_slab;
2405 s->next = NULL;
2406 PL_regmatch_slab->next = s;
2407 }
2408 PL_regmatch_slab = s;
2409 return SLAB_FIRST(s);
2410}
2411
2412
2413
2414
2415#define PUSH_STATE_GOTO(state, node) \
2416 scan = node; \
2417 st->resume_state = state; \
2418 goto push_state;
2419
2420
2421
2422#define PUSH_YES_STATE_GOTO(state, node) \
2423 scan = node; \
2424 st->resume_state = state; \
2425 goto push_yes_state;
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563#define DEBUG_STATE_pp(pp) \
2564 DEBUG_STATE_r({ \
2565 DUMP_EXEC_POS(locinput, scan, do_utf8); \
2566 PerlIO_printf(Perl_debug_log, \
2567 " %*s"pp" %s%s%s%s%s\n", \
2568 depth*2, "", \
2569 PL_reg_name[st->resume_state], \
2570 ((st==yes_state||st==mark_state) ? "[" : ""), \
2571 ((st==yes_state) ? "Y" : ""), \
2572 ((st==mark_state) ? "M" : ""), \
2573 ((st==yes_state||st==mark_state) ? "]" : "") \
2574 ); \
2575 });
2576
2577
2578#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2579
2580#ifdef DEBUGGING
2581
2582STATIC void
2583S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2584 const char *start, const char *end, const char *blurb)
2585{
2586 const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2587
2588 PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2589
2590 if (!PL_colorset)
2591 reginitcolors();
2592 {
2593 RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2594 RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2595
2596 RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2597 start, end - start, 60);
2598
2599 PerlIO_printf(Perl_debug_log,
2600 "%s%s REx%s %s against %s\n",
2601 PL_colors[4], blurb, PL_colors[5], s0, s1);
2602
2603 if (do_utf8||utf8_pat)
2604 PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2605 utf8_pat ? "pattern" : "",
2606 utf8_pat && do_utf8 ? " and " : "",
2607 do_utf8 ? "string" : ""
2608 );
2609 }
2610}
2611
2612STATIC void
2613S_dump_exec_pos(pTHX_ const char *locinput,
2614 const regnode *scan,
2615 const char *loc_regeol,
2616 const char *loc_bostr,
2617 const char *loc_reg_starttry,
2618 const bool do_utf8)
2619{
2620 const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2621 const int taill = (docolor ? 10 : 7);
2622 int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2623
2624
2625
2626
2627
2628
2629 int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2630 ? (5 + taill) - l : locinput - loc_bostr;
2631 int pref0_len;
2632
2633 PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2634
2635 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2636 pref_len++;
2637 pref0_len = pref_len - (locinput - loc_reg_starttry);
2638 if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2639 l = ( loc_regeol - locinput > (5 + taill) - pref_len
2640 ? (5 + taill) - pref_len : loc_regeol - locinput);
2641 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2642 l--;
2643 if (pref0_len < 0)
2644 pref0_len = 0;
2645 if (pref0_len > pref_len)
2646 pref0_len = pref_len;
2647 {
2648 const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2649
2650 RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2651 (locinput - pref_len),pref0_len, 60, 4, 5);
2652
2653 RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2654 (locinput - pref_len + pref0_len),
2655 pref_len - pref0_len, 60, 2, 3);
2656
2657 RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2658 locinput, loc_regeol - locinput, 10, 0, 1);
2659
2660 const STRLEN tlen=len0+len1+len2;
2661 PerlIO_printf(Perl_debug_log,
2662 "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2663 (IV)(locinput - loc_bostr),
2664 len0, s0,
2665 len1, s1,
2666 (docolor ? "" : "> <"),
2667 len2, s2,
2668 (int)(tlen > 19 ? 0 : 19 - tlen),
2669 "");
2670 }
2671}
2672
2673#endif
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684STATIC I32
2685S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2686{
2687 I32 n;
2688 RXi_GET_DECL(rex,rexi);
2689 SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2690 I32 *nums=(I32*)SvPVX(sv_dat);
2691
2692 PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2693
2694 for ( n=0; n<SvIVX(sv_dat); n++ ) {
2695 if ((I32)*PL_reglastparen >= nums[n] &&
2696 PL_regoffs[nums[n]].end != -1)
2697 {
2698 return nums[n];
2699 }
2700 }
2701 return 0;
2702}
2703
2704
2705
2706
2707STATIC void
2708S_clear_backtrack_stack(pTHX_ void *p)
2709{
2710 regmatch_slab *s = PL_regmatch_slab->next;
2711 PERL_UNUSED_ARG(p);
2712
2713 if (!s)
2714 return;
2715 PL_regmatch_slab->next = NULL;
2716 while (s) {
2717 regmatch_slab * const osl = s;
2718 s = s->next;
2719 Safefree(osl);
2720 }
2721}
2722
2723
2724#define SETREX(Re1,Re2) \
2725 if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2726 Re1 = (Re2)
2727
2728STATIC I32
2729S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2730{
2731#if PERL_VERSION < 9 && !defined(PERL_CORE)
2732 dMY_CXT;
2733#endif
2734 dVAR;
2735 register const bool do_utf8 = PL_reg_match_utf8;
2736 const U32 uniflags = UTF8_ALLOW_DEFAULT;
2737 regexp *rex = reginfo->prog;
2738 RXi_GET_DECL(rex,rexi);
2739 I32 oldsave;
2740
2741 register regmatch_state *st;
2742
2743 register regnode *scan;
2744 register regnode *next;
2745 register U32 n = 0;
2746 register I32 ln = 0;
2747 register char *locinput = PL_reginput;
2748 register I32 nextchr;
2749
2750 bool result = 0;
2751 int depth = 0;
2752 U32 nochange_depth = 0;
2753 const U32 max_nochange_depth =
2754 (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2755 3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2756 regmatch_state *yes_state = NULL;
2757
2758
2759
2760 regmatch_state *mark_state = NULL;
2761 regmatch_state *cur_eval = NULL;
2762 struct regmatch_state *cur_curlyx = NULL;
2763 U32 state_num;
2764 bool no_final = 0;
2765 bool do_cutgroup = 0;
2766 char *startpoint = PL_reginput;
2767 SV *popmark = NULL;
2768 SV *sv_commit = NULL;
2769 SV *sv_yes_mark = NULL;
2770
2771 U32 lastopen = 0;
2772 bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2773 SV* const oreplsv = GvSV(PL_replgv);
2774
2775
2776
2777
2778 bool sw = 0;
2779 bool minmod = 0;
2780 int logical = 0;
2781
2782
2783
2784
2785
2786
2787
2788#ifdef DEBUGGING
2789 GET_RE_DEBUG_FLAGS_DECL;
2790#endif
2791
2792 PERL_ARGS_ASSERT_REGMATCH;
2793
2794 DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2795 PerlIO_printf(Perl_debug_log,"regmatch start\n");
2796 }));
2797
2798 if (!PL_regmatch_slab) {
2799 Newx(PL_regmatch_slab, 1, regmatch_slab);
2800 PL_regmatch_slab->prev = NULL;
2801 PL_regmatch_slab->next = NULL;
2802 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2803 }
2804
2805 oldsave = PL_savestack_ix;
2806 SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2807 SAVEVPTR(PL_regmatch_slab);
2808 SAVEVPTR(PL_regmatch_state);
2809
2810
2811 st = ++PL_regmatch_state;
2812 if (st > SLAB_LAST(PL_regmatch_slab))
2813 st = PL_regmatch_state = S_push_slab(aTHX);
2814
2815
2816 nextchr = UCHARAT(locinput);
2817 scan = prog;
2818 while (scan != NULL) {
2819
2820 DEBUG_EXECUTE_r( {
2821 SV * const prop = sv_newmortal();
2822 regnode *rnext=regnext(scan);
2823 DUMP_EXEC_POS( locinput, scan, do_utf8 );
2824 regprop(rex, prop, scan);
2825
2826 PerlIO_printf(Perl_debug_log,
2827 "%3"IVdf":%*s%s(%"IVdf")\n",
2828 (IV)(scan - rexi->program), depth*2, "",
2829 SvPVX_const(prop),
2830 (PL_regkind[OP(scan)] == END || !rnext) ?
2831 0 : (IV)(rnext - rexi->program));
2832 });
2833
2834 next = scan + NEXT_OFF(scan);
2835 if (next == scan)
2836 next = NULL;
2837 state_num = OP(scan);
2838
2839 reenter_switch:
2840
2841 assert(PL_reglastparen == &rex->lastparen);
2842 assert(PL_reglastcloseparen == &rex->lastcloseparen);
2843 assert(PL_regoffs == rex->offs);
2844
2845 switch (state_num) {
2846 case BOL:
2847 if (locinput == PL_bostr)
2848 {
2849
2850 break;
2851 }
2852 sayNO;
2853 case MBOL:
2854 if (locinput == PL_bostr ||
2855 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2856 {
2857 break;
2858 }
2859 sayNO;
2860 case SBOL:
2861 if (locinput == PL_bostr)
2862 break;
2863 sayNO;
2864 case GPOS:
2865 if (locinput == reginfo->ganch)
2866 break;
2867 sayNO;
2868
2869 case KEEPS:
2870
2871 st->u.keeper.val = PL_regoffs[0].start;
2872 PL_reginput = locinput;
2873 PL_regoffs[0].start = locinput - PL_bostr;
2874 PUSH_STATE_GOTO(KEEPS_next, next);
2875
2876 case KEEPS_next_fail:
2877
2878 PL_regoffs[0].start = st->u.keeper.val;
2879 sayNO_SILENT;
2880
2881 case EOL:
2882 goto seol;
2883 case MEOL:
2884 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2885 sayNO;
2886 break;
2887 case SEOL:
2888 seol:
2889 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2890 sayNO;
2891 if (PL_regeol - locinput > 1)
2892 sayNO;
2893 break;
2894 case EOS:
2895 if (PL_regeol != locinput)
2896 sayNO;
2897 break;
2898 case SANY:
2899 if (!nextchr && locinput >= PL_regeol)
2900 sayNO;
2901 if (do_utf8) {
2902 locinput += PL_utf8skip[nextchr];
2903 if (locinput > PL_regeol)
2904 sayNO;
2905 nextchr = UCHARAT(locinput);
2906 }
2907 else
2908 nextchr = UCHARAT(++locinput);
2909 break;
2910 case CANY:
2911 if (!nextchr && locinput >= PL_regeol)
2912 sayNO;
2913 nextchr = UCHARAT(++locinput);
2914 break;
2915 case REG_ANY:
2916 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2917 sayNO;
2918 if (do_utf8) {
2919 locinput += PL_utf8skip[nextchr];
2920 if (locinput > PL_regeol)
2921 sayNO;
2922 nextchr = UCHARAT(locinput);
2923 }
2924 else
2925 nextchr = UCHARAT(++locinput);
2926 break;
2927
2928#undef ST
2929#define ST st->u.trie
2930 case TRIEC:
2931
2932
2933
2934 if (scan->flags == EXACT || !do_utf8) {
2935 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2936 DEBUG_EXECUTE_r(
2937 PerlIO_printf(Perl_debug_log,
2938 "%*s %sfailed to match trie start class...%s\n",
2939 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2940 );
2941 sayNO_SILENT;
2942
2943 }
2944 }
2945
2946 case TRIE:
2947 {
2948
2949 DECL_TRIE_TYPE(scan);
2950
2951
2952 reg_trie_data * const trie
2953 = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2954 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2955 U32 state = trie->startstate;
2956
2957 if (trie->bitmap && trie_type != trie_utf8_fold &&
2958 !TRIE_BITMAP_TEST(trie,*locinput)
2959 ) {
2960 if (trie->states[ state ].wordnum) {
2961 DEBUG_EXECUTE_r(
2962 PerlIO_printf(Perl_debug_log,
2963 "%*s %smatched empty string...%s\n",
2964 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2965 );
2966 break;
2967 } else {
2968 DEBUG_EXECUTE_r(
2969 PerlIO_printf(Perl_debug_log,
2970 "%*s %sfailed to match trie start class...%s\n",
2971 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2972 );
2973 sayNO_SILENT;
2974 }
2975 }
2976
2977 {
2978 U8 *uc = ( U8* )locinput;
2979
2980 STRLEN len = 0;
2981 STRLEN foldlen = 0;
2982 U8 *uscan = (U8*)NULL;
2983 STRLEN bufflen=0;
2984 SV *sv_accept_buff = NULL;
2985 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2986
2987 ST.accepted = 0;
2988 ST.B = next;
2989 ST.jump = trie->jump;
2990 ST.me = scan;
2991
2992
2993
2994
2995
2996 while ( state && uc <= (U8*)PL_regeol ) {
2997 U32 base = trie->states[ state ].trans.base;
2998 UV uvc = 0;
2999 U16 charid;
3000
3001
3002
3003
3004
3005#define got_wordnum charid
3006 got_wordnum = trie->states[ state ].wordnum;
3007
3008 if ( got_wordnum ) {
3009 if ( ! ST.accepted ) {
3010 ENTER;
3011 SAVETMPS;
3012 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3013 sv_accept_buff=newSV(bufflen *
3014 sizeof(reg_trie_accepted) - 1);
3015 SvCUR_set(sv_accept_buff, 0);
3016 SvPOK_on(sv_accept_buff);
3017 sv_2mortal(sv_accept_buff);
3018 SAVETMPS;
3019 ST.accept_buff =
3020 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3021 }
3022 do {
3023 if (ST.accepted >= bufflen) {
3024 bufflen *= 2;
3025 ST.accept_buff =(reg_trie_accepted*)
3026 SvGROW(sv_accept_buff,
3027 bufflen * sizeof(reg_trie_accepted));
3028 }
3029 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3030 + sizeof(reg_trie_accepted));
3031
3032
3033 ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3034 ST.accept_buff[ST.accepted].endpos = uc;
3035 ++ST.accepted;
3036 } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3037 }
3038#undef got_wordnum
3039
3040 DEBUG_TRIE_EXECUTE_r({
3041 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3042 PerlIO_printf( Perl_debug_log,
3043 "%*s %sState: %4"UVxf" Accepted: %4"UVxf" ",
3044 2+depth * 2, "", PL_colors[4],
3045 (UV)state, (UV)ST.accepted );
3046 });
3047
3048 if ( base ) {
3049 REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3050 uscan, len, uvc, charid, foldlen,
3051 foldbuf, uniflags);
3052
3053 if (charid &&
3054 (base + charid > trie->uniquecharcount )
3055 && (base + charid - 1 - trie->uniquecharcount
3056 < trie->lasttrans)
3057 && trie->trans[base + charid - 1 -
3058 trie->uniquecharcount].check == state)
3059 {
3060 state = trie->trans[base + charid - 1 -
3061 trie->uniquecharcount ].next;
3062 }
3063 else {
3064 state = 0;
3065 }
3066 uc += len;
3067
3068 }
3069 else {
3070 state = 0;
3071 }
3072 DEBUG_TRIE_EXECUTE_r(
3073 PerlIO_printf( Perl_debug_log,
3074 "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3075 charid, uvc, (UV)state, PL_colors[5] );
3076 );
3077 }
3078 if (!ST.accepted )
3079 sayNO;
3080
3081 DEBUG_EXECUTE_r(
3082 PerlIO_printf( Perl_debug_log,
3083 "%*s %sgot %"IVdf" possible matches%s\n",
3084 REPORT_CODE_OFF + depth * 2, "",
3085 PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3086 );
3087 }}
3088 goto trie_first_try;
3089
3090 case TRIE_next_fail:
3091 if ( ST.jump) {
3092 REGCP_UNWIND(ST.cp);
3093 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3094 PL_regoffs[n].end = -1;
3095 *PL_reglastparen = n;
3096 }
3097 trie_first_try:
3098 if (do_cutgroup) {
3099 do_cutgroup = 0;
3100 no_final = 0;
3101 }
3102
3103 if ( ST.jump) {
3104 ST.lastparen = *PL_reglastparen;
3105 REGCP_SET(ST.cp);
3106 }
3107 if ( ST.accepted == 1 ) {
3108
3109 DEBUG_EXECUTE_r({
3110 AV *const trie_words
3111 = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3112 SV ** const tmp = av_fetch( trie_words,
3113 ST.accept_buff[ 0 ].wordnum-1, 0 );
3114 SV *sv= tmp ? sv_newmortal() : NULL;
3115
3116 PerlIO_printf( Perl_debug_log,
3117 "%*s %sonly one match left: #%d <%s>%s\n",
3118 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3119 ST.accept_buff[ 0 ].wordnum,
3120 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3121 PL_colors[0], PL_colors[1],
3122 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3123 )
3124 : "not compiled under -Dr",
3125 PL_colors[5] );
3126 });
3127 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3128
3129
3130
3131 locinput = PL_reginput;
3132 nextchr = UCHARAT(locinput);
3133 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3134 scan = ST.B;
3135 else
3136 scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3137 if (!has_cutgroup) {
3138 FREETMPS;
3139 LEAVE;
3140 } else {
3141 ST.accepted--;
3142 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3143 }
3144
3145 continue;
3146 }
3147
3148 if ( !ST.accepted-- ) {
3149 DEBUG_EXECUTE_r({
3150 PerlIO_printf( Perl_debug_log,
3151 "%*s %sTRIE failed...%s\n",
3152 REPORT_CODE_OFF+depth*2, "",
3153 PL_colors[4],
3154 PL_colors[5] );
3155 });
3156 FREETMPS;
3157 LEAVE;
3158 sayNO_SILENT;
3159
3160 }
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174 {
3175 U32 best = 0;
3176 U32 cur;
3177 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3178 DEBUG_TRIE_EXECUTE_r(
3179 PerlIO_printf( Perl_debug_log,
3180 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3181 REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3182 (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3183 ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3184 );
3185
3186 if (ST.accept_buff[cur].wordnum <
3187 ST.accept_buff[best].wordnum)
3188 best = cur;
3189 }
3190
3191 DEBUG_EXECUTE_r({
3192 AV *const trie_words
3193 = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3194 SV ** const tmp = av_fetch( trie_words,
3195 ST.accept_buff[ best ].wordnum - 1, 0 );
3196 regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3197 ST.B :
3198 ST.me + ST.jump[ST.accept_buff[best].wordnum];
3199 SV *sv= tmp ? sv_newmortal() : NULL;
3200
3201 PerlIO_printf( Perl_debug_log,
3202 "%*s %strying alternation #%d <%s> at node #%d %s\n",
3203 REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3204 ST.accept_buff[best].wordnum,
3205 tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3206 PL_colors[0], PL_colors[1],
3207 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3208 ) : "not compiled under -Dr",
3209 REG_NODE_NUM(nextop),
3210 PL_colors[5] );
3211 });
3212
3213 if ( best<ST.accepted ) {
3214 reg_trie_accepted tmp = ST.accept_buff[ best ];
3215 ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3216 ST.accept_buff[ ST.accepted ] = tmp;
3217 best = ST.accepted;
3218 }
3219 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3220 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3221 scan = ST.B;
3222 } else {
3223 scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3224 }
3225 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3226
3227 }
3228
3229 case TRIE_next:
3230
3231 if (oreplsv != GvSV(PL_replgv))
3232 sv_setsv(oreplsv, GvSV(PL_replgv));
3233 FREETMPS;
3234 LEAVE;
3235 sayYES;
3236#undef ST
3237
3238 case EXACT: {
3239 char *s = STRING(scan);
3240 ln = STR_LEN(scan);
3241 if (do_utf8 != UTF) {
3242
3243 char *l = locinput;
3244 const char * const e = s + ln;
3245
3246 if (do_utf8) {
3247
3248 while (s < e) {
3249 STRLEN ulen;
3250 if (l >= PL_regeol)
3251 sayNO;
3252 if (NATIVE_TO_UNI(*(U8*)s) !=
3253 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3254 uniflags))
3255 sayNO;
3256 l += ulen;
3257 s ++;
3258 }
3259 }
3260 else {
3261
3262 while (s < e) {
3263 STRLEN ulen;
3264 if (l >= PL_regeol)
3265 sayNO;
3266 if (NATIVE_TO_UNI(*((U8*)l)) !=
3267 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3268 uniflags))
3269 sayNO;
3270 s += ulen;
3271 l ++;
3272 }
3273 }
3274 locinput = l;
3275 nextchr = UCHARAT(locinput);
3276 break;
3277 }
3278
3279
3280 if (UCHARAT(s) != nextchr)
3281 sayNO;
3282 if (PL_regeol - locinput < ln)
3283 sayNO;
3284 if (ln > 1 && memNE(s, locinput, ln))
3285 sayNO;
3286 locinput += ln;
3287 nextchr = UCHARAT(locinput);
3288 break;
3289 }
3290 case EXACTFL:
3291 PL_reg_flags |= RF_tainted;
3292
3293 case EXACTF: {
3294 char * const s = STRING(scan);
3295 ln = STR_LEN(scan);
3296
3297 if (do_utf8 || UTF) {
3298
3299 const char * const l = locinput;
3300 char *e = PL_regeol;
3301
3302 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
3303 l, &e, 0, do_utf8)) {
3304
3305
3306
3307
3308
3309 if (!(do_utf8 &&
3310 toLOWER(s[0]) == 's' &&
3311 ln >= 2 &&
3312 toLOWER(s[1]) == 's' &&
3313 (U8)l[0] == 0xC3 &&
3314 e - l >= 2 &&
3315 (U8)l[1] == 0x9F))
3316 sayNO;
3317 }
3318 locinput = e;
3319 nextchr = UCHARAT(locinput);
3320 break;
3321 }
3322
3323
3324
3325
3326 if (UCHARAT(s) != nextchr &&
3327 UCHARAT(s) != ((OP(scan) == EXACTF)
3328 ? PL_fold : PL_fold_locale)[nextchr])
3329 sayNO;
3330 if (PL_regeol - locinput < ln)
3331 sayNO;
3332 if (ln > 1 && (OP(scan) == EXACTF
3333 ? ibcmp(s, locinput, ln)
3334 : ibcmp_locale(s, locinput, ln)))
3335 sayNO;
3336 locinput += ln;
3337 nextchr = UCHARAT(locinput);
3338 break;
3339 }
3340 case ANYOF:
3341 if (do_utf8) {
3342 STRLEN inclasslen = PL_regeol - locinput;
3343
3344 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3345 goto anyof_fail;
3346 if (locinput >= PL_regeol)
3347 sayNO;
3348 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3349 nextchr = UCHARAT(locinput);
3350 break;
3351 }
3352 else {
3353 if (nextchr < 0)
3354 nextchr = UCHARAT(locinput);
3355 if (!REGINCLASS(rex, scan, (U8*)locinput))
3356 goto anyof_fail;
3357 if (!nextchr && locinput >= PL_regeol)
3358 sayNO;
3359 nextchr = UCHARAT(++locinput);
3360 break;
3361 }
3362 anyof_fail:
3363
3364
3365
3366 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3367 locinput += SHARP_S_SKIP;
3368 nextchr = UCHARAT(locinput);
3369 }
3370 else
3371 sayNO;
3372 break;
3373 case ALNUML:
3374 PL_reg_flags |= RF_tainted;
3375
3376 case ALNUM:
3377 if (!nextchr)
3378 sayNO;
3379 if (do_utf8) {
3380 LOAD_UTF8_CHARCLASS_ALNUM();
3381 if (!(OP(scan) == ALNUM
3382 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3383 : isALNUM_LC_utf8((U8*)locinput)))
3384 {
3385 sayNO;
3386 }
3387 locinput += PL_utf8skip[nextchr];
3388 nextchr = UCHARAT(locinput);
3389 break;
3390 }
3391 if (!(OP(scan) == ALNUM
3392 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3393 sayNO;
3394 nextchr = UCHARAT(++locinput);
3395 break;
3396 case NALNUML:
3397 PL_reg_flags |= RF_tainted;
3398
3399 case NALNUM:
3400 if (!nextchr && locinput >= PL_regeol)
3401 sayNO;
3402 if (do_utf8) {
3403 LOAD_UTF8_CHARCLASS_ALNUM();
3404 if (OP(scan) == NALNUM
3405 ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3406 : isALNUM_LC_utf8((U8*)locinput))
3407 {
3408 sayNO;
3409 }
3410 locinput += PL_utf8skip[nextchr];
3411 nextchr = UCHARAT(locinput);
3412 break;
3413 }
3414 if (OP(scan) == NALNUM
3415 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3416 sayNO;
3417 nextchr = UCHARAT(++locinput);
3418 break;
3419 case BOUNDL:
3420 case NBOUNDL:
3421 PL_reg_flags |= RF_tainted;
3422
3423 case BOUND:
3424 case NBOUND:
3425
3426 if (do_utf8) {
3427 if (locinput == PL_bostr)
3428 ln = '\n';
3429 else {
3430 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3431
3432 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3433 }
3434 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3435 ln = isALNUM_uni(ln);
3436 LOAD_UTF8_CHARCLASS_ALNUM();
3437 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3438 }
3439 else {
3440 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3441 n = isALNUM_LC_utf8((U8*)locinput);
3442 }
3443 }
3444 else {
3445 ln = (locinput != PL_bostr) ?
3446 UCHARAT(locinput - 1) : '\n';
3447 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3448 ln = isALNUM(ln);
3449 n = isALNUM(nextchr);
3450 }
3451 else {
3452 ln = isALNUM_LC(ln);
3453 n = isALNUM_LC(nextchr);
3454 }
3455 }
3456 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3457 OP(scan) == BOUNDL))
3458 sayNO;
3459 break;
3460 case SPACEL:
3461 PL_reg_flags |= RF_tainted;
3462
3463 case SPACE:
3464 if (!nextchr)
3465 sayNO;
3466 if (do_utf8) {
3467 if (UTF8_IS_CONTINUED(nextchr)) {
3468 LOAD_UTF8_CHARCLASS_SPACE();
3469 if (!(OP(scan) == SPACE
3470 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3471 : isSPACE_LC_utf8((U8*)locinput)))
3472 {
3473 sayNO;
3474 }
3475 locinput += PL_utf8skip[nextchr];
3476 nextchr = UCHARAT(locinput);
3477 break;
3478 }
3479 if (!(OP(scan) == SPACE
3480 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3481 sayNO;
3482 nextchr = UCHARAT(++locinput);
3483 }
3484 else {
3485 if (!(OP(scan) == SPACE
3486 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3487 sayNO;
3488 nextchr = UCHARAT(++locinput);
3489 }
3490 break;
3491 case NSPACEL:
3492 PL_reg_flags |= RF_tainted;
3493
3494 case NSPACE:
3495 if (!nextchr && locinput >= PL_regeol)
3496 sayNO;
3497 if (do_utf8) {
3498 LOAD_UTF8_CHARCLASS_SPACE();
3499 if (OP(scan) == NSPACE
3500 ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3501 : isSPACE_LC_utf8((U8*)locinput))
3502 {
3503 sayNO;
3504 }
3505 locinput += PL_utf8skip[nextchr];
3506 nextchr = UCHARAT(locinput);
3507 break;
3508 }
3509 if (OP(scan) == NSPACE
3510 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3511 sayNO;
3512 nextchr = UCHARAT(++locinput);
3513 break;
3514 case DIGITL:
3515 PL_reg_flags |= RF_tainted;
3516
3517 case DIGIT:
3518 if (!nextchr)
3519 sayNO;
3520 if (do_utf8) {
3521 LOAD_UTF8_CHARCLASS_DIGIT();
3522 if (!(OP(scan) == DIGIT
3523 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3524 : isDIGIT_LC_utf8((U8*)locinput)))
3525 {
3526 sayNO;
3527 }
3528 locinput += PL_utf8skip[nextchr];
3529 nextchr = UCHARAT(locinput);
3530 break;
3531 }
3532 if (!(OP(scan) == DIGIT
3533 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3534 sayNO;
3535 nextchr = UCHARAT(++locinput);
3536 break;
3537 case NDIGITL:
3538 PL_reg_flags |= RF_tainted;
3539
3540 case NDIGIT:
3541 if (!nextchr && locinput >= PL_regeol)
3542 sayNO;
3543 if (do_utf8) {
3544 LOAD_UTF8_CHARCLASS_DIGIT();
3545 if (OP(scan) == NDIGIT
3546 ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3547 : isDIGIT_LC_utf8((U8*)locinput))
3548 {
3549 sayNO;
3550 }
3551 locinput += PL_utf8skip[nextchr];
3552 nextchr = UCHARAT(locinput);
3553 break;
3554 }
3555 if (OP(scan) == NDIGIT
3556 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3557 sayNO;
3558 nextchr = UCHARAT(++locinput);
3559 break;
3560 case CLUMP:
3561 if (locinput >= PL_regeol)
3562 sayNO;
3563 if (do_utf8) {
3564 LOAD_UTF8_CHARCLASS_MARK();
3565 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3566 sayNO;
3567 locinput += PL_utf8skip[nextchr];
3568 while (locinput < PL_regeol &&
3569 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3570 locinput += UTF8SKIP(locinput);
3571 if (locinput > PL_regeol)
3572 sayNO;
3573 }
3574 else
3575 locinput++;
3576 nextchr = UCHARAT(locinput);
3577 break;
3578
3579 case NREFFL:
3580 {
3581 char *s;
3582 char type;
3583 PL_reg_flags |= RF_tainted;
3584
3585 case NREF:
3586 case NREFF:
3587 type = OP(scan);
3588 n = reg_check_named_buff_matched(rex,scan);
3589
3590 if ( n ) {
3591 type = REF + ( type - NREF );
3592 goto do_ref;
3593 } else {
3594 sayNO;
3595 }
3596
3597 case REFFL:
3598 PL_reg_flags |= RF_tainted;
3599
3600 case REF:
3601 case REFF:
3602 n = ARG(scan);
3603 type = OP(scan);
3604 do_ref:
3605 ln = PL_regoffs[n].start;
3606 PL_reg_leftiter = PL_reg_maxiter;
3607 if (*PL_reglastparen < n || ln == -1)
3608 sayNO;
3609 if (ln == PL_regoffs[n].end)
3610 break;
3611
3612 s = PL_bostr + ln;
3613 if (do_utf8 && type != REF) {
3614 char *l = locinput;
3615 const char *e = PL_bostr + PL_regoffs[n].end;
3616
3617
3618
3619
3620
3621 if (type == REFF) {
3622 while (s < e) {
3623 STRLEN ulen1, ulen2;
3624 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3625 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3626
3627 if (l >= PL_regeol)
3628 sayNO;
3629 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3630 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3631 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3632 sayNO;
3633 s += ulen1;
3634 l += ulen2;
3635 }
3636 }
3637 locinput = l;
3638 nextchr = UCHARAT(locinput);
3639 break;
3640 }
3641
3642
3643 if (UCHARAT(s) != nextchr &&
3644 (type == REF ||
3645 (UCHARAT(s) != (type == REFF
3646 ? PL_fold : PL_fold_locale)[nextchr])))
3647 sayNO;
3648 ln = PL_regoffs[n].end - ln;
3649 if (locinput + ln > PL_regeol)
3650 sayNO;
3651 if (ln > 1 && (type == REF
3652 ? memNE(s, locinput, ln)
3653 : (type == REFF
3654 ? ibcmp(s, locinput, ln)
3655 : ibcmp_locale(s, locinput, ln))))
3656 sayNO;
3657 locinput += ln;
3658 nextchr = UCHARAT(locinput);
3659 break;
3660 }
3661 case NOTHING:
3662 case TAIL:
3663 break;
3664 case BACK:
3665 break;
3666
3667#undef ST
3668#define ST st->u.eval
3669 {
3670 SV *ret;
3671 regexp *re;
3672 regexp_internal *rei;
3673 regnode *startpoint;
3674
3675 case GOSTART:
3676 case GOSUB:
3677 if (cur_eval && cur_eval->locinput==locinput) {
3678 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3679 Perl_croak(aTHX_ "Infinite recursion in regex");
3680 if ( ++nochange_depth > max_nochange_depth )
3681 Perl_croak(aTHX_
3682 "Pattern subroutine nesting without pos change"
3683 " exceeded limit in regex");
3684 } else {
3685 nochange_depth = 0;
3686 }
3687 re = rex;
3688 rei = rexi;
3689 (void)ReREFCNT_inc(rex);
3690 if (OP(scan)==GOSUB) {
3691 startpoint = scan + ARG2L(scan);
3692 ST.close_paren = ARG(scan);
3693 } else {
3694 startpoint = rei->program+1;
3695 ST.close_paren = 0;
3696 }
3697 goto eval_recurse_doit;
3698
3699 case EVAL:
3700 if (cur_eval && cur_eval->locinput==locinput) {
3701 if ( ++nochange_depth > max_nochange_depth )
3702 Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3703 } else {
3704 nochange_depth = 0;
3705 }
3706 {
3707
3708 dSP;
3709 SV ** const before = SP;
3710 OP_4tree * const oop = PL_op;
3711 COP * const ocurcop = PL_curcop;
3712 PAD *old_comppad;
3713
3714 n = ARG(scan);
3715 PL_op = (OP_4tree*)rexi->data->data[n];
3716 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3717 " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3718 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3719 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3720
3721 if (sv_yes_mark) {
3722 SV *sv_mrk = get_sv("REGMARK", 1);
3723 sv_setsv(sv_mrk, sv_yes_mark);
3724 }
3725
3726 CALLRUNOPS(aTHX);
3727 SPAGAIN;
3728 if (SP == before)
3729 ret = &PL_sv_undef;
3730 else {
3731 ret = POPs;
3732 PUTBACK;
3733 }
3734
3735 PL_op = oop;
3736 PAD_RESTORE_LOCAL(old_comppad);
3737 PL_curcop = ocurcop;
3738 if (!logical) {
3739
3740 sv_setsv(save_scalar(PL_replgv), ret);
3741 break;
3742 }
3743 }
3744 if (logical == 2) {
3745 logical = 0;
3746 {
3747
3748
3749
3750 MAGIC *mg = NULL;
3751 const SV *sv;
3752 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3753 mg = mg_find(sv, PERL_MAGIC_qr);
3754 else if (SvSMAGICAL(ret)) {
3755 if (SvGMAGICAL(ret))
3756 sv_unmagic(ret, PERL_MAGIC_qr);
3757 else
3758 mg = mg_find(ret, PERL_MAGIC_qr);
3759 }
3760
3761 if (mg) {
3762 re = reg_temp_copy((regexp *)mg->mg_obj);
3763 }
3764 else {
3765 U32 pm_flags = 0;
3766 const I32 osize = PL_regsize;
3767
3768 if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
3769 re = CALLREGCOMP(ret, pm_flags);
3770 if (!(SvFLAGS(ret)
3771 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3772 | SVs_GMG)))
3773 sv_magic(ret,MUTABLE_SV(ReREFCNT_inc(re)),
3774 PERL_MAGIC_qr,0,0);
3775 PL_regsize = osize;
3776 }
3777 }
3778 RXp_MATCH_COPIED_off(re);
3779 re->subbeg = rex->subbeg;
3780 re->sublen = rex->sublen;
3781 rei = RXi_GET(re);
3782 DEBUG_EXECUTE_r(
3783 debug_start_match(re, do_utf8, locinput, PL_regeol,
3784 "Matching embedded");
3785 );
3786 startpoint = rei->program + 1;
3787 ST.close_paren = 0;
3788
3789 if (PL_reg_start_tmpl <= re->nparens) {
3790 PL_reg_start_tmpl = re->nparens*3/2 + 3;
3791 if(PL_reg_start_tmp)
3792 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3793 else
3794 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3795 }
3796
3797 eval_recurse_doit:
3798
3799 ST.cp = regcppush(0);
3800 REGCP_SET(ST.lastcp);
3801
3802 PL_regoffs = re->offs;
3803
3804
3805 PL_reglastparen = &re->lastparen;
3806 PL_reglastcloseparen = &re->lastcloseparen;
3807 re->lastparen = 0;
3808 re->lastcloseparen = 0;
3809
3810 PL_reginput = locinput;
3811 PL_regsize = 0;
3812
3813
3814 PL_reg_maxiter = 0;
3815
3816 ST.toggle_reg_flags = PL_reg_flags;
3817 if (RX_UTF8(re))
3818 PL_reg_flags |= RF_utf8;
3819 else
3820 PL_reg_flags &= ~RF_utf8;
3821 ST.toggle_reg_flags ^= PL_reg_flags;
3822
3823 ST.prev_rex = rex;
3824 ST.prev_curlyx = cur_curlyx;
3825 SETREX(rex,re);
3826 rexi = rei;
3827 cur_curlyx = NULL;
3828 ST.B = next;
3829 ST.prev_eval = cur_eval;
3830 cur_eval = st;
3831
3832 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3833
3834 }
3835
3836 sw = (bool)SvTRUE(ret);
3837 logical = 0;
3838 break;
3839 }
3840
3841 case EVAL_AB:
3842
3843 PL_reg_flags ^= ST.toggle_reg_flags;
3844 ReREFCNT_dec(rex);
3845 SETREX(rex,ST.prev_rex);
3846 rexi = RXi_GET(rex);
3847 regcpblow(ST.cp);
3848 cur_eval = ST.prev_eval;
3849 cur_curlyx = ST.prev_curlyx;
3850
3851
3852 PL_reglastparen = &rex->lastparen;
3853 PL_reglastcloseparen = &rex->lastcloseparen;
3854
3855 PL_regoffs = rex->offs;
3856
3857
3858 PL_reg_maxiter = 0;
3859 if ( nochange_depth )
3860 nochange_depth--;
3861 sayYES;
3862
3863
3864 case EVAL_AB_fail:
3865
3866 PL_reg_flags ^= ST.toggle_reg_flags;
3867 ReREFCNT_dec(rex);
3868 SETREX(rex,ST.prev_rex);
3869 rexi = RXi_GET(rex);
3870
3871 PL_reglastparen = &rex->lastparen;
3872 PL_reglastcloseparen = &rex->lastcloseparen;
3873
3874 PL_reginput = locinput;
3875 REGCP_UNWIND(ST.lastcp);
3876 regcppop(rex);
3877 cur_eval = ST.prev_eval;
3878 cur_curlyx = ST.prev_curlyx;
3879
3880 PL_reg_maxiter = 0;
3881 if ( nochange_depth )
3882 nochange_depth--;
3883 sayNO_SILENT;
3884#undef ST
3885
3886 case OPEN:
3887 n = ARG(scan);
3888 PL_reg_start_tmp[n] = locinput;
3889 if (n > PL_regsize)
3890 PL_regsize = n;
3891 lastopen = n;
3892 break;
3893 case CLOSE:
3894 n = ARG(scan);
3895 PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3896 PL_regoffs[n].end = locinput - PL_bostr;
3897
3898
3899 if (n > *PL_reglastparen)
3900 *PL_reglastparen = n;
3901 *PL_reglastcloseparen = n;
3902 if (cur_eval && cur_eval->u.eval.close_paren == n) {
3903 goto fake_end;
3904 }
3905 break;
3906 case ACCEPT:
3907 if (ARG(scan)){
3908 regnode *cursor;
3909 for (cursor=scan;
3910 cursor && OP(cursor)!=END;
3911 cursor=regnext(cursor))
3912 {
3913 if ( OP(cursor)==CLOSE ){
3914 n = ARG(cursor);
3915 if ( n <= lastopen ) {
3916 PL_regoffs[n].start
3917 = PL_reg_start_tmp[n] - PL_bostr;
3918 PL_regoffs[n].end = locinput - PL_bostr;
3919
3920
3921 if (n > *PL_reglastparen)
3922 *PL_reglastparen = n;
3923 *PL_reglastcloseparen = n;
3924 if ( n == ARG(scan) || (cur_eval &&
3925 cur_eval->u.eval.close_paren == n))
3926 break;
3927 }
3928 }
3929 }
3930 }
3931 goto fake_end;
3932
3933 case GROUPP:
3934 n = ARG(scan);
3935 sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3936 break;
3937 case NGROUPP:
3938
3939 sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3940 break;
3941 case INSUBP:
3942 n = ARG(scan);
3943 sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3944 break;
3945 case DEFINEP:
3946 sw = 0;
3947 break;
3948 case IFTHEN:
3949 PL_reg_leftiter = PL_reg_maxiter;
3950 if (sw)
3951 next = NEXTOPER(NEXTOPER(scan));
3952 else {
3953 next = scan + ARG(scan);
3954 if (OP(next) == IFTHEN)
3955 next = NEXTOPER(NEXTOPER(next));
3956 }
3957 break;
3958 case LOGICAL:
3959 logical = scan->flags;
3960 break;
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045#define ST st->u.curlyx
4046
4047 case CURLYX:
4048 {
4049
4050 I32 parenfloor = scan->flags;
4051
4052 assert(next);
4053 if (OP(PREVOPER(next)) == NOTHING)
4054 next += ARG(next);
4055
4056
4057
4058 if (parenfloor > (I32)*PL_reglastparen)
4059 parenfloor = *PL_reglastparen;
4060
4061 ST.prev_curlyx= cur_curlyx;
4062 cur_curlyx = st;
4063 ST.cp = PL_savestack_ix;
4064
4065
4066
4067 ST.parenfloor = parenfloor;
4068 ST.min = ARG1(scan);
4069 ST.max = ARG2(scan);
4070 ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4071 ST.B = next;
4072 ST.minmod = minmod;
4073 minmod = 0;
4074 ST.count = -1;
4075 ST.lastloc = NULL;
4076
4077 PL_reginput = locinput;
4078 PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4079
4080 }
4081
4082 case CURLYX_end:
4083 cur_curlyx = ST.prev_curlyx;
4084 sayYES;
4085
4086
4087 case CURLYX_end_fail:
4088 regcpblow(ST.cp);
4089 cur_curlyx = ST.prev_curlyx;
4090 sayNO;
4091
4092
4093
4094#undef ST
4095#define ST st->u.whilem
4096
4097 case WHILEM:
4098 {
4099
4100 I32 n;
4101 assert(cur_curlyx);
4102 n = ++cur_curlyx->u.curlyx.count;
4103 ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4104 ST.cache_offset = 0;
4105 ST.cache_mask = 0;
4106
4107 PL_reginput = locinput;
4108
4109 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4110 "%*s whilem: matched %ld out of %ld..%ld\n",
4111 REPORT_CODE_OFF+depth*2, "", (long)n,
4112 (long)cur_curlyx->u.curlyx.min,
4113 (long)cur_curlyx->u.curlyx.max)
4114 );
4115
4116
4117
4118 if (n < cur_curlyx->u.curlyx.min) {
4119 cur_curlyx->u.curlyx.lastloc = locinput;
4120 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4121
4122 }
4123
4124
4125
4126 if (locinput == cur_curlyx->u.curlyx.lastloc) {
4127 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4128 "%*s whilem: empty match detected, trying continuation...\n",
4129 REPORT_CODE_OFF+depth*2, "")
4130 );
4131 goto do_whilem_B_max;
4132 }
4133
4134
4135
4136 if (scan->flags) {
4137
4138 if (!PL_reg_maxiter) {
4139
4140
4141 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4142
4143 if (PL_reg_maxiter < 0)
4144 PL_reg_maxiter = I32_MAX;
4145 PL_reg_leftiter = PL_reg_maxiter;
4146 }
4147
4148 if (PL_reg_leftiter-- == 0) {
4149
4150 const I32 size = (PL_reg_maxiter + 7)/8;
4151 if (PL_reg_poscache) {
4152 if ((I32)PL_reg_poscache_size < size) {
4153 Renew(PL_reg_poscache, size, char);
4154 PL_reg_poscache_size = size;
4155 }
4156 Zero(PL_reg_poscache, size, char);
4157 }
4158 else {
4159 PL_reg_poscache_size = size;
4160 Newxz(PL_reg_poscache, size, char);
4161 }
4162 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4163 "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4164 PL_colors[4], PL_colors[5])
4165 );
4166 }
4167
4168 if (PL_reg_leftiter < 0) {
4169
4170 I32 offset, mask;
4171 offset = (scan->flags & 0xf) - 1
4172 + (locinput - PL_bostr) * (scan->flags>>4);
4173 mask = 1 << (offset % 8);
4174 offset /= 8;
4175 if (PL_reg_poscache[offset] & mask) {
4176 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4177 "%*s whilem: (cache) already tried at this position...\n",
4178 REPORT_CODE_OFF+depth*2, "")
4179 );
4180 sayNO;
4181 }
4182 ST.cache_offset = offset;
4183 ST.cache_mask = mask;
4184 }
4185 }
4186
4187
4188
4189 if (cur_curlyx->u.curlyx.minmod) {
4190 ST.save_curlyx = cur_curlyx;
4191 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4192 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4193 REGCP_SET(ST.lastcp);
4194 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4195
4196 }
4197
4198
4199
4200 if (n < cur_curlyx->u.curlyx.max) {
4201 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4202 cur_curlyx->u.curlyx.lastloc = locinput;
4203 REGCP_SET(ST.lastcp);
4204 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4205
4206 }
4207 goto do_whilem_B_max;
4208 }
4209
4210
4211 case WHILEM_B_min:
4212 case WHILEM_B_max:
4213 cur_curlyx = ST.save_curlyx;
4214 sayYES;
4215
4216
4217 case WHILEM_B_max_fail:
4218 cur_curlyx = ST.save_curlyx;
4219 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4220 cur_curlyx->u.curlyx.count--;
4221 CACHEsayNO;
4222
4223
4224 case WHILEM_A_min_fail:
4225 REGCP_UNWIND(ST.lastcp);
4226 regcppop(rex);
4227
4228 case WHILEM_A_pre_fail:
4229 cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4230 cur_curlyx->u.curlyx.count--;
4231 CACHEsayNO;
4232
4233
4234 case WHILEM_A_max_fail:
4235 REGCP_UNWIND(ST.lastcp);
4236 regcppop(rex);
4237 PL_reginput = locinput;
4238 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4239 "%*s whilem: failed, trying continuation...\n",
4240 REPORT_CODE_OFF+depth*2, "")
4241 );
4242 do_whilem_B_max:
4243 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4244 && ckWARN(WARN_REGEXP)
4245 && !(PL_reg_flags & RF_warned))
4246 {
4247 PL_reg_flags |= RF_warned;
4248 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4249 "Complex regular subexpression recursion",
4250 REG_INFTY - 1);
4251 }
4252
4253
4254 ST.save_curlyx = cur_curlyx;
4255 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4256 PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4257
4258
4259 case WHILEM_B_min_fail:
4260 cur_curlyx = ST.save_curlyx;
4261 REGCP_UNWIND(ST.lastcp);
4262 regcppop(rex);
4263
4264 if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4265
4266 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4267 && ckWARN(WARN_REGEXP)
4268 && !(PL_reg_flags & RF_warned))
4269 {
4270 PL_reg_flags |= RF_warned;
4271 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4272 "%s limit (%d) exceeded",
4273 "Complex regular subexpression recursion",
4274 REG_INFTY - 1);
4275 }
4276 cur_curlyx->u.curlyx.count--;
4277 CACHEsayNO;
4278 }
4279
4280 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4281 "%*s trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4282 );
4283
4284 PL_reginput = locinput;
4285 cur_curlyx->u.curlyx.lastloc = locinput;
4286 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4287 REGCP_SET(ST.lastcp);
4288 PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4289
4290
4291#undef ST
4292#define ST st->u.branch
4293
4294 case BRANCHJ:
4295 next = scan + ARG(scan);
4296 if (next == scan)
4297 next = NULL;
4298 scan = NEXTOPER(scan);
4299
4300
4301 case BRANCH:
4302 scan = NEXTOPER(scan);
4303 ST.lastparen = *PL_reglastparen;
4304 ST.next_branch = next;
4305 REGCP_SET(ST.cp);
4306 PL_reginput = locinput;
4307
4308
4309 if (has_cutgroup) {
4310 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4311 } else {
4312 PUSH_STATE_GOTO(BRANCH_next, scan);
4313 }
4314
4315 case CUTGROUP:
4316 PL_reginput = locinput;
4317 sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4318 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4319 PUSH_STATE_GOTO(CUTGROUP_next,next);
4320
4321 case CUTGROUP_next_fail:
4322 do_cutgroup = 1;
4323 no_final = 1;
4324 if (st->u.mark.mark_name)
4325 sv_commit = st->u.mark.mark_name;
4326 sayNO;
4327
4328 case BRANCH_next:
4329 sayYES;
4330
4331 case BRANCH_next_fail:
4332 if (do_cutgroup) {
4333 do_cutgroup = 0;
4334 no_final = 0;
4335 }
4336 REGCP_UNWIND(ST.cp);
4337 for (n = *PL_reglastparen; n > ST.lastparen; n--)
4338 PL_regoffs[n].end = -1;
4339 *PL_reglastparen = n;
4340
4341 scan = ST.next_branch;
4342
4343 if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4344 DEBUG_EXECUTE_r({
4345 PerlIO_printf( Perl_debug_log,
4346 "%*s %sBRANCH failed...%s\n",
4347 REPORT_CODE_OFF+depth*2, "",
4348 PL_colors[4],
4349 PL_colors[5] );
4350 });
4351 sayNO_SILENT;
4352 }
4353 continue;
4354
4355
4356 case MINMOD:
4357 minmod = 1;
4358 break;
4359
4360#undef ST
4361#define ST st->u.curlym
4362
4363 case CURLYM:
4364
4365
4366
4367
4368
4369
4370
4371 ST.me = scan;
4372 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4373
4374
4375 if (ST.me->flags) {
4376 U32 paren = ST.me->flags;
4377 if (paren > PL_regsize)
4378 PL_regsize = paren;
4379 if (paren > *PL_reglastparen)
4380 *PL_reglastparen = paren;
4381 scan += NEXT_OFF(scan);
4382 }
4383 ST.A = scan;
4384 ST.B = next;
4385 ST.alen = 0;
4386 ST.count = 0;
4387 ST.minmod = minmod;
4388 minmod = 0;
4389 ST.c1 = CHRTEST_UNINIT;
4390 REGCP_SET(ST.cp);
4391
4392 if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me)))
4393 goto curlym_do_B;
4394
4395 curlym_do_A:
4396 PL_reginput = locinput;
4397 PUSH_YES_STATE_GOTO(CURLYM_A, ST.A);
4398
4399
4400 case CURLYM_A:
4401 locinput = st->locinput;
4402 nextchr = UCHARAT(locinput);
4403
4404 ST.count++;
4405
4406 if (ST.count == 1) {
4407 if (PL_reg_match_utf8) {
4408 char *s = locinput;
4409 while (s < PL_reginput) {
4410 ST.alen++;
4411 s += UTF8SKIP(s);
4412 }
4413 }
4414 else {
4415 ST.alen = PL_reginput - locinput;
4416 }
4417 if (ST.alen == 0)
4418 ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4419 }
4420 DEBUG_EXECUTE_r(
4421 PerlIO_printf(Perl_debug_log,
4422 "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4423 (int)(REPORT_CODE_OFF+(depth*2)), "",
4424 (IV) ST.count, (IV)ST.alen)
4425 );
4426
4427 locinput = PL_reginput;
4428
4429 if (cur_eval && cur_eval->u.eval.close_paren &&
4430 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4431 goto fake_end;
4432
4433 {
4434 I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4435 if ( max == REG_INFTY || ST.count < max )
4436 goto curlym_do_A;
4437 }
4438 goto curlym_do_B;
4439
4440 case CURLYM_A_fail:
4441 REGCP_UNWIND(ST.cp);
4442
4443 if (ST.minmod || ST.count < ARG1(ST.me)
4444 || (cur_eval && cur_eval->u.eval.close_paren &&
4445 cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4446 sayNO;
4447
4448 curlym_do_B:
4449 PL_reginput = locinput;
4450 if (ST.c1 == CHRTEST_UNINIT) {
4451
4452
4453 ST.c1 = ST.c2 = CHRTEST_VOID;
4454 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4455 regnode *text_node = ST.B;
4456 if (! HAS_TEXT(text_node))
4457 FIND_NEXT_IMPT(text_node);
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467 if (PL_regkind[OP(text_node)] == EXACT)
4468 {
4469
4470 ST.c1 = (U8)*STRING(text_node);
4471 ST.c2 =
4472 (IS_TEXTF(text_node))
4473 ? PL_fold[ST.c1]
4474 : (IS_TEXTFL(text_node))
4475 ? PL_fold_locale[ST.c1]
4476 : ST.c1;
4477 }
4478 }
4479 }
4480
4481 DEBUG_EXECUTE_r(
4482 PerlIO_printf(Perl_debug_log,
4483 "%*s CURLYM trying tail with matches=%"IVdf"...\n",
4484 (int)(REPORT_CODE_OFF+(depth*2)),
4485 "", (IV)ST.count)
4486 );
4487 if (ST.c1 != CHRTEST_VOID
4488 && UCHARAT(PL_reginput) != ST.c1
4489 && UCHARAT(PL_reginput) != ST.c2)
4490 {
4491
4492 DEBUG_OPTIMISE_r(
4493 PerlIO_printf(Perl_debug_log,
4494 "%*s CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4495 (int)(REPORT_CODE_OFF+(depth*2)),"",
4496 (IV)ST.c1,(IV)ST.c2
4497 ));
4498 state_num = CURLYM_B_fail;
4499 goto reenter_switch;
4500 }
4501
4502 if (ST.me->flags) {
4503
4504 I32 paren = ST.me->flags;
4505 if (ST.count) {
4506 PL_regoffs[paren].start
4507 = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4508 PL_regoffs[paren].end = PL_reginput - PL_bostr;
4509
4510 }
4511 else
4512 PL_regoffs[paren].end = -1;
4513 if (cur_eval && cur_eval->u.eval.close_paren &&
4514 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4515 {
4516 if (ST.count)
4517 goto fake_end;
4518 else
4519 sayNO;
4520 }
4521 }
4522
4523 PUSH_STATE_GOTO(CURLYM_B, ST.B);
4524
4525
4526 case CURLYM_B_fail:
4527 REGCP_UNWIND(ST.cp);
4528 if (ST.minmod) {
4529 I32 max = ARG2(ST.me);
4530 if (max != REG_INFTY && ST.count == max)
4531 sayNO;
4532 goto curlym_do_A;
4533 }
4534
4535 if (ST.count == ARG1(ST.me) )
4536 sayNO;
4537 ST.count--;
4538 locinput = HOPc(locinput, -ST.alen);
4539 goto curlym_do_B;
4540
4541#undef ST
4542#define ST st->u.curly
4543
4544#define CURLY_SETPAREN(paren, success) \
4545 if (paren) { \
4546 if (success) { \
4547 PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4548 PL_regoffs[paren].end = locinput - PL_bostr; \
4549 *PL_reglastcloseparen = paren; \
4550 } \
4551 else \
4552 PL_regoffs[paren].end = -1; \
4553 }
4554
4555 case STAR:
4556 ST.paren = 0;
4557 ST.min = 0;
4558 ST.max = REG_INFTY;
4559 scan = NEXTOPER(scan);
4560 goto repeat;
4561 case PLUS:
4562 ST.paren = 0;
4563 ST.min = 1;
4564 ST.max = REG_INFTY;
4565 scan = NEXTOPER(scan);
4566 goto repeat;
4567 case CURLYN:
4568 ST.paren = scan->flags;
4569 if (ST.paren > PL_regsize)
4570 PL_regsize = ST.paren;
4571 if (ST.paren > *PL_reglastparen)
4572 *PL_reglastparen = ST.paren;
4573 ST.min = ARG1(scan);
4574 ST.max = ARG2(scan);
4575 if (cur_eval && cur_eval->u.eval.close_paren &&
4576 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4577 ST.min=1;
4578 ST.max=1;
4579 }
4580 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4581 goto repeat;
4582 case CURLY:
4583 ST.paren = 0;
4584 ST.min = ARG1(scan);
4585 ST.max = ARG2(scan);
4586 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4587 repeat:
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597 if (ST.min > ST.max)
4598 sayNO;
4599 if (HAS_TEXT(next) || JUMPABLE(next)) {
4600 U8 *s;
4601 regnode *text_node = next;
4602
4603 if (! HAS_TEXT(text_node))
4604 FIND_NEXT_IMPT(text_node);
4605
4606 if (! HAS_TEXT(text_node))
4607 ST.c1 = ST.c2 = CHRTEST_VOID;
4608 else {
4609 if ( PL_regkind[OP(text_node)] != EXACT ) {
4610 ST.c1 = ST.c2 = CHRTEST_VOID;
4611 goto assume_ok_easy;
4612 }
4613 else
4614 s = (U8*)STRING(text_node);
4615
4616
4617
4618
4619
4620
4621
4622 if (!UTF) {
4623 ST.c2 = ST.c1 = *s;
4624 if (IS_TEXTF(text_node))
4625 ST.c2 = PL_fold[ST.c1];
4626 else if (IS_TEXTFL(text_node))
4627 ST.c2 = PL_fold_locale[ST.c1];
4628 }
4629 else {
4630 if (IS_TEXTF(text_node)) {
4631 STRLEN ulen1, ulen2;
4632 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4633 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4634
4635 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4636 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4637#ifdef EBCDIC
4638 ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4639 ckWARN(WARN_UTF8) ?
4640 0 : UTF8_ALLOW_ANY);
4641 ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4642 ckWARN(WARN_UTF8) ?
4643 0 : UTF8_ALLOW_ANY);
4644#else
4645 ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4646 uniflags);
4647 ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4648 uniflags);
4649#endif
4650 }
4651 else {
4652 ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4653 uniflags);
4654 }
4655 }
4656 }
4657 }
4658 else
4659 ST.c1 = ST.c2 = CHRTEST_VOID;
4660 assume_ok_easy:
4661
4662 ST.A = scan;
4663 ST.B = next;
4664 PL_reginput = locinput;
4665 if (minmod) {
4666 minmod = 0;
4667 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4668 sayNO;
4669 ST.count = ST.min;
4670 locinput = PL_reginput;
4671 REGCP_SET(ST.cp);
4672 if (ST.c1 == CHRTEST_VOID)
4673 goto curly_try_B_min;
4674
4675 ST.oldloc = locinput;
4676
4677
4678
4679 if (ST.max == REG_INFTY) {
4680 ST.maxpos = PL_regeol - 1;
4681 if (do_utf8)
4682 while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4683 ST.maxpos--;
4684 }
4685 else if (do_utf8) {
4686 int m = ST.max - ST.min;
4687 for (ST.maxpos = locinput;
4688 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4689 ST.maxpos += UTF8SKIP(ST.maxpos);
4690 }
4691 else {
4692 ST.maxpos = locinput + ST.max - ST.min;
4693 if (ST.maxpos >= PL_regeol)
4694 ST.maxpos = PL_regeol - 1;
4695 }
4696 goto curly_try_B_min_known;
4697
4698 }
4699 else {
4700 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4701 locinput = PL_reginput;
4702 if (ST.count < ST.min)
4703 sayNO;
4704 if ((ST.count > ST.min)
4705 && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4706 {
4707
4708
4709 ST.min = ST.count;
4710
4711
4712
4713 if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4714 ST.min--;
4715 }
4716 REGCP_SET(ST.cp);
4717 goto curly_try_B_max;
4718 }
4719
4720
4721
4722 case CURLY_B_min_known_fail:
4723
4724 if (ST.paren && ST.count)
4725 PL_regoffs[ST.paren].end = -1;
4726
4727 PL_reginput = locinput;
4728 REGCP_UNWIND(ST.cp);
4729
4730 ST.oldloc = locinput;
4731 if (do_utf8)
4732 locinput += UTF8SKIP(locinput);
4733 else
4734 locinput++;
4735 ST.count++;
4736 curly_try_B_min_known:
4737
4738 {
4739 int n;
4740 if (do_utf8) {
4741 n = (ST.oldloc == locinput) ? 0 : 1;
4742 if (ST.c1 == ST.c2) {
4743 STRLEN len;
4744
4745 while (locinput <= ST.maxpos &&
4746 utf8n_to_uvchr((U8*)locinput,
4747 UTF8_MAXBYTES, &len,
4748 uniflags) != (UV)ST.c1) {
4749 locinput += len;
4750 n++;
4751 }
4752 }
4753 else {
4754
4755 while (locinput <= ST.maxpos) {
4756 STRLEN len;
4757 const UV c = utf8n_to_uvchr((U8*)locinput,
4758 UTF8_MAXBYTES, &len,
4759 uniflags);
4760 if (c == (UV)ST.c1 || c == (UV)ST.c2)
4761 break;
4762 locinput += len;
4763 n++;
4764 }
4765 }
4766 }
4767 else {
4768 if (ST.c1 == ST.c2) {
4769 while (locinput <= ST.maxpos &&
4770 UCHARAT(locinput) != ST.c1)
4771 locinput++;
4772 }
4773 else {
4774 while (locinput <= ST.maxpos
4775 && UCHARAT(locinput) != ST.c1
4776 && UCHARAT(locinput) != ST.c2)
4777 locinput++;
4778 }
4779 n = locinput - ST.oldloc;
4780 }
4781 if (locinput > ST.maxpos)
4782 sayNO;
4783
4784 if (n) {
4785 ST.count += n;
4786 if (regrepeat(rex, ST.A, n, depth) < n)
4787 sayNO;
4788 }
4789 PL_reginput = locinput;
4790 CURLY_SETPAREN(ST.paren, ST.count);
4791 if (cur_eval && cur_eval->u.eval.close_paren &&
4792 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4793 goto fake_end;
4794 }
4795 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4796 }
4797
4798
4799
4800 case CURLY_B_min_fail:
4801
4802 if (ST.paren && ST.count)
4803 PL_regoffs[ST.paren].end = -1;
4804
4805 REGCP_UNWIND(ST.cp);
4806
4807 PL_reginput = locinput;
4808 if (regrepeat(rex, ST.A, 1, depth)) {
4809 ST.count++;
4810 locinput = PL_reginput;
4811 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4812 ST.count > 0))
4813 {
4814 curly_try_B_min:
4815 CURLY_SETPAREN(ST.paren, ST.count);
4816 if (cur_eval && cur_eval->u.eval.close_paren &&
4817 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4818 goto fake_end;
4819 }
4820 PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4821 }
4822 }
4823 sayNO;
4824
4825
4826
4827 curly_try_B_max:
4828
4829 if (cur_eval && cur_eval->u.eval.close_paren &&
4830 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4831 goto fake_end;
4832 }
4833 {
4834 UV c = 0;
4835 if (ST.c1 != CHRTEST_VOID)
4836 c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4837 UTF8_MAXBYTES, 0, uniflags)
4838 : (UV) UCHARAT(PL_reginput);
4839
4840 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4841 CURLY_SETPAREN(ST.paren, ST.count);
4842 PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4843
4844 }
4845 }
4846
4847 case CURLY_B_max_fail:
4848
4849 if (ST.paren && ST.count)
4850 PL_regoffs[ST.paren].end = -1;
4851
4852 REGCP_UNWIND(ST.cp);
4853
4854 if (--ST.count < ST.min)
4855 sayNO;
4856 PL_reginput = locinput = HOPc(locinput, -1);
4857 goto curly_try_B_max;
4858
4859#undef ST
4860
4861 case END:
4862 fake_end:
4863 if (cur_eval) {
4864
4865 I32 tmpix;
4866 st->u.eval.toggle_reg_flags
4867 = cur_eval->u.eval.toggle_reg_flags;
4868 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4869
4870 st->u.eval.prev_rex = rex;
4871 SETREX(rex,cur_eval->u.eval.prev_rex);
4872 rexi = RXi_GET(rex);
4873 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4874 ReREFCNT_inc(rex);
4875 st->u.eval.cp = regcppush(0);
4876
4877
4878 PL_reglastparen = &rex->lastparen;
4879 PL_reglastcloseparen = &rex->lastcloseparen;
4880
4881 REGCP_SET(st->u.eval.lastcp);
4882 PL_reginput = locinput;
4883
4884
4885
4886 tmpix = PL_savestack_ix;
4887 PL_savestack_ix = cur_eval->u.eval.lastcp;
4888 regcppop(rex);
4889 PL_savestack_ix = tmpix;
4890
4891 st->u.eval.prev_eval = cur_eval;
4892 cur_eval = cur_eval->u.eval.prev_eval;
4893 DEBUG_EXECUTE_r(
4894 PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
4895 REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4896 if ( nochange_depth )
4897 nochange_depth--;
4898
4899 PUSH_YES_STATE_GOTO(EVAL_AB,
4900 st->u.eval.prev_eval->u.eval.B);
4901 }
4902
4903 if (locinput < reginfo->till) {
4904 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4905 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4906 PL_colors[4],
4907 (long)(locinput - PL_reg_starttry),
4908 (long)(reginfo->till - PL_reg_starttry),
4909 PL_colors[5]));
4910
4911 sayNO_SILENT;
4912 }
4913 PL_reginput = locinput;
4914 sayYES;
4915
4916 case SUCCEED:
4917 DEBUG_EXECUTE_r(
4918 PerlIO_printf(Perl_debug_log,
4919 "%*s %ssubpattern success...%s\n",
4920 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4921 PL_reginput = locinput;
4922 sayYES;
4923
4924#undef ST
4925#define ST st->u.ifmatch
4926
4927 case SUSPEND:
4928 ST.wanted = 1;
4929 PL_reginput = locinput;
4930 goto do_ifmatch;
4931
4932 case UNLESSM:
4933 ST.wanted = 0;
4934 goto ifmatch_trivial_fail_test;
4935
4936 case IFMATCH:
4937 ST.wanted = 1;
4938 ifmatch_trivial_fail_test:
4939 if (scan->flags) {
4940 char * const s = HOPBACKc(locinput, scan->flags);
4941 if (!s) {
4942
4943 if (logical) {
4944 logical = 0;
4945 sw = 1 - (bool)ST.wanted;
4946 }
4947 else if (ST.wanted)
4948 sayNO;
4949 next = scan + ARG(scan);
4950 if (next == scan)
4951 next = NULL;
4952 break;
4953 }
4954 PL_reginput = s;
4955 }
4956 else
4957 PL_reginput = locinput;
4958
4959 do_ifmatch:
4960 ST.me = scan;
4961 ST.logical = logical;
4962 logical = 0;
4963
4964
4965 PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4966
4967
4968 case IFMATCH_A_fail:
4969 ST.wanted = !ST.wanted;
4970
4971
4972 case IFMATCH_A:
4973 if (ST.logical) {
4974 sw = (bool)ST.wanted;
4975 }
4976 else if (!ST.wanted)
4977 sayNO;
4978
4979 if (OP(ST.me) == SUSPEND)
4980 locinput = PL_reginput;
4981 else {
4982 locinput = PL_reginput = st->locinput;
4983 nextchr = UCHARAT(locinput);
4984 }
4985 scan = ST.me + ARG(ST.me);
4986 if (scan == ST.me)
4987 scan = NULL;
4988 continue;
4989
4990#undef ST
4991
4992 case LONGJMP:
4993 next = scan + ARG(scan);
4994 if (next == scan)
4995 next = NULL;
4996 break;
4997 case COMMIT:
4998 reginfo->cutpoint = PL_regeol;
4999
5000 case PRUNE:
5001 PL_reginput = locinput;
5002 if (!scan->flags)
5003 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5004 PUSH_STATE_GOTO(COMMIT_next,next);
5005
5006 case COMMIT_next_fail:
5007 no_final = 1;
5008
5009 case OPFAIL:
5010 sayNO;
5011
5012
5013#define ST st->u.mark
5014 case MARKPOINT:
5015 ST.prev_mark = mark_state;
5016 ST.mark_name = sv_commit = sv_yes_mark
5017 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5018 mark_state = st;
5019 ST.mark_loc = PL_reginput = locinput;
5020 PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5021
5022 case MARKPOINT_next:
5023 mark_state = ST.prev_mark;
5024 sayYES;
5025
5026 case MARKPOINT_next_fail:
5027 if (popmark && sv_eq(ST.mark_name,popmark))
5028 {
5029 if (ST.mark_loc > startpoint)
5030 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5031 popmark = NULL;
5032 sv_commit = ST.mark_name;
5033
5034 DEBUG_EXECUTE_r({
5035 PerlIO_printf(Perl_debug_log,
5036 "%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
5037 REPORT_CODE_OFF+depth*2, "",
5038 PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5039 });
5040 }
5041 mark_state = ST.prev_mark;
5042 sv_yes_mark = mark_state ?
5043 mark_state->u.mark.mark_name : NULL;
5044 sayNO;
5045
5046 case SKIP:
5047 PL_reginput = locinput;
5048 if (scan->flags) {
5049
5050 ST.mark_name = NULL;
5051 ST.mark_loc = locinput;
5052 PUSH_STATE_GOTO(SKIP_next,next);
5053 } else {
5054
5055
5056
5057 regmatch_state *cur = mark_state;
5058 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5059
5060 while (cur) {
5061 if ( sv_eq( cur->u.mark.mark_name,
5062 find ) )
5063 {
5064 ST.mark_name = find;
5065 PUSH_STATE_GOTO( SKIP_next, next );
5066 }
5067 cur = cur->u.mark.prev_mark;
5068 }
5069 }
5070
5071 break;
5072 case SKIP_next_fail:
5073 if (ST.mark_name) {
5074
5075
5076 popmark = ST.mark_name;
5077 } else {
5078
5079 if (ST.mark_loc > startpoint)
5080 reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5081
5082
5083
5084 if (mark_state)
5085 sv_commit=mark_state->u.mark.mark_name;
5086 }
5087 no_final = 1;
5088 sayNO;
5089
5090#undef ST
5091 case FOLDCHAR:
5092 n = ARG(scan);
5093 if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5094 locinput += ln;
5095 } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5096 sayNO;
5097 } else {
5098 U8 folded[UTF8_MAXBYTES_CASE+1];
5099 STRLEN foldlen;
5100 const char * const l = locinput;
5101 char *e = PL_regeol;
5102 to_uni_fold(n, folded, &foldlen);
5103
5104 if (ibcmp_utf8((const char*) folded, 0, foldlen, 1,
5105 l, &e, 0, do_utf8)) {
5106 sayNO;
5107 }
5108 locinput = e;
5109 }
5110 nextchr = UCHARAT(locinput);
5111 break;
5112 case LNBREAK:
5113 if ((n=is_LNBREAK(locinput,do_utf8))) {
5114 locinput += n;
5115 nextchr = UCHARAT(locinput);
5116 } else
5117 sayNO;
5118 break;
5119
5120#define CASE_CLASS(nAmE) \
5121 case nAmE: \
5122 if ((n=is_##nAmE(locinput,do_utf8))) { \
5123 locinput += n; \
5124 nextchr = UCHARAT(locinput); \
5125 } else \
5126 sayNO; \
5127 break; \
5128 case N##nAmE: \
5129 if ((n=is_##nAmE(locinput,do_utf8))) { \
5130 sayNO; \
5131 } else { \
5132 locinput += UTF8SKIP(locinput); \
5133 nextchr = UCHARAT(locinput); \
5134 } \
5135 break
5136
5137 CASE_CLASS(VERTWS);
5138 CASE_CLASS(HORIZWS);
5139#undef CASE_CLASS
5140
5141 default:
5142 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5143 PTR2UV(scan), OP(scan));
5144 Perl_croak(aTHX_ "regexp memory corruption");
5145
5146 }
5147
5148
5149 scan = next;
5150 continue;
5151
5152
5153 push_yes_state:
5154
5155 st->u.yes.prev_yes_state = yes_state;
5156 yes_state = st;
5157
5158 push_state:
5159
5160 {
5161 regmatch_state *newst;
5162
5163 DEBUG_STACK_r({
5164 regmatch_state *cur = st;
5165 regmatch_state *curyes = yes_state;
5166 int curd = depth;
5167 regmatch_slab *slab = PL_regmatch_slab;
5168 for (;curd > -1;cur--,curd--) {
5169 if (cur < SLAB_FIRST(slab)) {
5170 slab = slab->prev;
5171 cur = SLAB_LAST(slab);
5172 }
5173 PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5174 REPORT_CODE_OFF + 2 + depth * 2,"",
5175 curd, PL_reg_name[cur->resume_state],
5176 (curyes == cur) ? "yes" : ""
5177 );
5178 if (curyes == cur)
5179 curyes = cur->u.yes.prev_yes_state;
5180 }
5181 } else
5182 DEBUG_STATE_pp("push")
5183 );
5184 depth++;
5185 st->locinput = locinput;
5186 newst = st+1;
5187 if (newst > SLAB_LAST(PL_regmatch_slab))
5188 newst = S_push_slab(aTHX);
5189 PL_regmatch_state = newst;
5190
5191 locinput = PL_reginput;
5192 nextchr = UCHARAT(locinput);
5193 st = newst;
5194 continue;
5195
5196 }
5197 }
5198
5199
5200
5201
5202
5203 Perl_croak(aTHX_ "corrupted regexp pointers");
5204
5205 sayNO;
5206
5207yes:
5208 if (yes_state) {
5209
5210
5211 assert(st != yes_state);
5212#ifdef DEBUGGING
5213 while (st != yes_state) {
5214 st--;
5215 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5216 PL_regmatch_slab = PL_regmatch_slab->prev;
5217 st = SLAB_LAST(PL_regmatch_slab);
5218 }
5219 DEBUG_STATE_r({
5220 if (no_final) {
5221 DEBUG_STATE_pp("pop (no final)");
5222 } else {
5223 DEBUG_STATE_pp("pop (yes)");
5224 }
5225 });
5226 depth--;
5227 }
5228#else
5229 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5230 || yes_state > SLAB_LAST(PL_regmatch_slab))
5231 {
5232
5233 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5234 PL_regmatch_slab = PL_regmatch_slab->prev;
5235 st = SLAB_LAST(PL_regmatch_slab);
5236 }
5237 depth -= (st - yes_state);
5238#endif
5239 st = yes_state;
5240 yes_state = st->u.yes.prev_yes_state;
5241 PL_regmatch_state = st;
5242
5243 if (no_final) {
5244 locinput= st->locinput;
5245 nextchr = UCHARAT(locinput);
5246 }
5247 state_num = st->resume_state + no_final;
5248 goto reenter_switch;
5249 }
5250
5251 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5252 PL_colors[4], PL_colors[5]));
5253
5254 if (PL_reg_eval_set) {
5255
5256
5257
5258
5259
5260 if (oreplsv != GvSV(PL_replgv))
5261 sv_setsv(oreplsv, GvSV(PL_replgv));
5262 }
5263 result = 1;
5264 goto final_exit;
5265
5266no:
5267 DEBUG_EXECUTE_r(
5268 PerlIO_printf(Perl_debug_log,
5269 "%*s %sfailed...%s\n",
5270 REPORT_CODE_OFF+depth*2, "",
5271 PL_colors[4], PL_colors[5])
5272 );
5273
5274no_silent:
5275 if (no_final) {
5276 if (yes_state) {
5277 goto yes;
5278 } else {
5279 goto final_exit;
5280 }
5281 }
5282 if (depth) {
5283
5284 st--;
5285 if (st < SLAB_FIRST(PL_regmatch_slab)) {
5286 PL_regmatch_slab = PL_regmatch_slab->prev;
5287 st = SLAB_LAST(PL_regmatch_slab);
5288 }
5289 PL_regmatch_state = st;
5290 locinput= st->locinput;
5291 nextchr = UCHARAT(locinput);
5292
5293 DEBUG_STATE_pp("pop");
5294 depth--;
5295 if (yes_state == st)
5296 yes_state = st->u.yes.prev_yes_state;
5297
5298 state_num = st->resume_state + 1;
5299 goto reenter_switch;
5300 }
5301 result = 0;
5302
5303 final_exit:
5304 if (rex->intflags & PREGf_VERBARG_SEEN) {
5305 SV *sv_err = get_sv("REGERROR", 1);
5306 SV *sv_mrk = get_sv("REGMARK", 1);
5307 if (result) {
5308 sv_commit = &PL_sv_no;
5309 if (!sv_yes_mark)
5310 sv_yes_mark = &PL_sv_yes;
5311 } else {
5312 if (!sv_commit)
5313 sv_commit = &PL_sv_yes;
5314 sv_yes_mark = &PL_sv_no;
5315 }
5316 sv_setsv(sv_err, sv_commit);
5317 sv_setsv(sv_mrk, sv_yes_mark);
5318 }
5319
5320
5321 LEAVE_SCOPE(oldsave);
5322
5323 return result;
5324}
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334STATIC I32
5335S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5336{
5337 dVAR;
5338 register char *scan;
5339 register I32 c;
5340 register char *loceol = PL_regeol;
5341 register I32 hardcount = 0;
5342 register bool do_utf8 = PL_reg_match_utf8;
5343#ifndef DEBUGGING
5344 PERL_UNUSED_ARG(depth);
5345#endif
5346
5347 PERL_ARGS_ASSERT_REGREPEAT;
5348
5349 scan = PL_reginput;
5350 if (max == REG_INFTY)
5351 max = I32_MAX;
5352 else if (max < loceol - scan)
5353 loceol = scan + max;
5354 switch (OP(p)) {
5355 case REG_ANY:
5356 if (do_utf8) {
5357 loceol = PL_regeol;
5358 while (scan < loceol && hardcount < max && *scan != '\n') {
5359 scan += UTF8SKIP(scan);
5360 hardcount++;
5361 }
5362 } else {
5363 while (scan < loceol && *scan != '\n')
5364 scan++;
5365 }
5366 break;
5367 case SANY:
5368 if (do_utf8) {
5369 loceol = PL_regeol;
5370 while (scan < loceol && hardcount < max) {
5371 scan += UTF8SKIP(scan);
5372 hardcount++;
5373 }
5374 }
5375 else
5376 scan = loceol;
5377 break;
5378 case CANY:
5379 scan = loceol;
5380 break;
5381 case EXACT:
5382 c = (U8)*STRING(p);
5383 while (scan < loceol && UCHARAT(scan) == c)
5384 scan++;
5385 break;
5386 case EXACTF:
5387 c = (U8)*STRING(p);
5388 while (scan < loceol &&
5389 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5390 scan++;
5391 break;
5392 case EXACTFL:
5393 PL_reg_flags |= RF_tainted;
5394 c = (U8)*STRING(p);
5395 while (scan < loceol &&
5396 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5397 scan++;
5398 break;
5399 case ANYOF:
5400 if (do_utf8) {
5401 loceol = PL_regeol;
5402 while (hardcount < max && scan < loceol &&
5403 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5404 scan += UTF8SKIP(scan);
5405 hardcount++;
5406 }
5407 } else {
5408 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5409 scan++;
5410 }
5411 break;
5412 case ALNUM:
5413 if (do_utf8) {
5414 loceol = PL_regeol;
5415 LOAD_UTF8_CHARCLASS_ALNUM();
5416 while (hardcount < max && scan < loceol &&
5417 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5418 scan += UTF8SKIP(scan);
5419 hardcount++;
5420 }
5421 } else {
5422 while (scan < loceol && isALNUM(*scan))
5423 scan++;
5424 }
5425 break;
5426 case ALNUML:
5427 PL_reg_flags |= RF_tainted;
5428 if (do_utf8) {
5429 loceol = PL_regeol;
5430 while (hardcount < max && scan < loceol &&
5431 isALNUM_LC_utf8((U8*)scan)) {
5432 scan += UTF8SKIP(scan);
5433 hardcount++;
5434 }
5435 } else {
5436 while (scan < loceol && isALNUM_LC(*scan))
5437 scan++;
5438 }
5439 break;
5440 case NALNUM:
5441 if (do_utf8) {
5442 loceol = PL_regeol;
5443 LOAD_UTF8_CHARCLASS_ALNUM();
5444 while (hardcount < max && scan < loceol &&
5445 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5446 scan += UTF8SKIP(scan);
5447 hardcount++;
5448 }
5449 } else {
5450 while (scan < loceol && !isALNUM(*scan))
5451 scan++;
5452 }
5453 break;
5454 case NALNUML:
5455 PL_reg_flags |= RF_tainted;
5456 if (do_utf8) {
5457 loceol = PL_regeol;
5458 while (hardcount < max && scan < loceol &&
5459 !isALNUM_LC_utf8((U8*)scan)) {
5460 scan += UTF8SKIP(scan);
5461 hardcount++;
5462 }
5463 } else {
5464 while (scan < loceol && !isALNUM_LC(*scan))
5465 scan++;
5466 }
5467 break;
5468 case SPACE:
5469 if (do_utf8) {
5470 loceol = PL_regeol;
5471 LOAD_UTF8_CHARCLASS_SPACE();
5472 while (hardcount < max && scan < loceol &&
5473 (*scan == ' ' ||
5474 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5475 scan += UTF8SKIP(scan);
5476 hardcount++;
5477 }
5478 } else {
5479 while (scan < loceol && isSPACE(*scan))
5480 scan++;
5481 }
5482 break;
5483 case SPACEL:
5484 PL_reg_flags |= RF_tainted;
5485 if (do_utf8) {
5486 loceol = PL_regeol;
5487 while (hardcount < max && scan < loceol &&
5488 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5489 scan += UTF8SKIP(scan);
5490 hardcount++;
5491 }
5492 } else {
5493 while (scan < loceol && isSPACE_LC(*scan))
5494 scan++;
5495 }
5496 break;
5497 case NSPACE:
5498 if (do_utf8) {
5499 loceol = PL_regeol;
5500 LOAD_UTF8_CHARCLASS_SPACE();
5501 while (hardcount < max && scan < loceol &&
5502 !(*scan == ' ' ||
5503 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5504 scan += UTF8SKIP(scan);
5505 hardcount++;
5506 }
5507 } else {
5508 while (scan < loceol && !isSPACE(*scan))
5509 scan++;
5510 }
5511 break;
5512 case NSPACEL:
5513 PL_reg_flags |= RF_tainted;
5514 if (do_utf8) {
5515 loceol = PL_regeol;
5516 while (hardcount < max && scan < loceol &&
5517 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5518 scan += UTF8SKIP(scan);
5519 hardcount++;
5520 }
5521 } else {
5522 while (scan < loceol && !isSPACE_LC(*scan))
5523 scan++;
5524 }
5525 break;
5526 case DIGIT:
5527 if (do_utf8) {
5528 loceol = PL_regeol;
5529 LOAD_UTF8_CHARCLASS_DIGIT();
5530 while (hardcount < max && scan < loceol &&
5531 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5532 scan += UTF8SKIP(scan);
5533 hardcount++;
5534 }
5535 } else {
5536 while (scan < loceol && isDIGIT(*scan))
5537 scan++;
5538 }
5539 break;
5540 case NDIGIT:
5541 if (do_utf8) {
5542 loceol = PL_regeol;
5543 LOAD_UTF8_CHARCLASS_DIGIT();
5544 while (hardcount < max && scan < loceol &&
5545 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5546 scan += UTF8SKIP(scan);
5547 hardcount++;
5548 }
5549 } else {
5550 while (scan < loceol && !isDIGIT(*scan))
5551 scan++;
5552 }
5553 case LNBREAK:
5554 if (do_utf8) {
5555 loceol = PL_regeol;
5556 while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5557 scan += c;
5558 hardcount++;
5559 }
5560 } else {
5561
5562
5563
5564
5565
5566 while (scan < loceol && (c=is_LNBREAK_latin1(scan))) {
5567 scan+=c;
5568 hardcount++;
5569 }
5570 }
5571 break;
5572 case HORIZWS:
5573 if (do_utf8) {
5574 loceol = PL_regeol;
5575 while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5576 scan += c;
5577 hardcount++;
5578 }
5579 } else {
5580 while (scan < loceol && is_HORIZWS_latin1(scan))
5581 scan++;
5582 }
5583 break;
5584 case NHORIZWS:
5585 if (do_utf8) {
5586 loceol = PL_regeol;
5587 while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5588 scan += UTF8SKIP(scan);
5589 hardcount++;
5590 }
5591 } else {
5592 while (scan < loceol && !is_HORIZWS_latin1(scan))
5593 scan++;
5594
5595 }
5596 break;
5597 case VERTWS:
5598 if (do_utf8) {
5599 loceol = PL_regeol;
5600 while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5601 scan += c;
5602 hardcount++;
5603 }
5604 } else {
5605 while (scan < loceol && is_VERTWS_latin1(scan))
5606 scan++;
5607
5608 }
5609 break;
5610 case NVERTWS:
5611 if (do_utf8) {
5612 loceol = PL_regeol;
5613 while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5614 scan += UTF8SKIP(scan);
5615 hardcount++;
5616 }
5617 } else {
5618 while (scan < loceol && !is_VERTWS_latin1(scan))
5619 scan++;
5620
5621 }
5622 break;
5623
5624 default:
5625 break;
5626 }
5627
5628 if (hardcount)
5629 c = hardcount;
5630 else
5631 c = scan - PL_reginput;
5632 PL_reginput = scan;
5633
5634 DEBUG_r({
5635 GET_RE_DEBUG_FLAGS_DECL;
5636 DEBUG_EXECUTE_r({
5637 SV * const prop = sv_newmortal();
5638 regprop(prog, prop, p);
5639 PerlIO_printf(Perl_debug_log,
5640 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
5641 REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5642 });
5643 });
5644
5645 return(c);
5646}
5647
5648
5649#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5650
5651
5652
5653
5654SV *
5655Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5656{
5657 dVAR;
5658 SV *sw = NULL;
5659 SV *si = NULL;
5660 SV *alt = NULL;
5661 RXi_GET_DECL(prog,progi);
5662 const struct reg_data * const data = prog ? progi->data : NULL;
566