perl/regcomp.sym
<<
>>
Prefs
   1# regcomp.sym
   2#
   3# File has two sections, divided by a line of dashes '-'. 
   4#
   5# Empty rows after #-comment are removed from input are ignored
   6#
   7# First section is for regops, second sectionis for regmatch-states
   8#
   9# Note that the order in this file is important.
  10#
  11# Format for first section: 
  12# NAME \t TYPE, arg-description [num-args] [longjump-len] \t DESCRIPTION
  13#
  14#
  15# run perl regen.pl after editing this file
  16
  17
  18
  19#* Exit points (0,1)
  20
  21END             END,    no      End of program.
  22SUCCEED         END,    no      Return from a subroutine, basically.
  23
  24#* Anchors: (2..13)
  25
  26BOL             BOL,    no      Match "" at beginning of line.
  27MBOL            BOL,    no      Same, assuming multiline.
  28SBOL            BOL,    no      Same, assuming singleline.
  29EOS             EOL,    no      Match "" at end of string.
  30EOL             EOL,    no      Match "" at end of line.
  31MEOL            EOL,    no      Same, assuming multiline.
  32SEOL            EOL,    no      Same, assuming singleline.
  33BOUND           BOUND,  no      Match "" at any word boundary
  34BOUNDL          BOUND,  no      Match "" at any word boundary
  35NBOUND          NBOUND, no      Match "" at any word non-boundary
  36NBOUNDL         NBOUND, no      Match "" at any word non-boundary
  37GPOS            GPOS,   no      Matches where last m//g left off.
  38
  39#* [Special] alternatives: (14..30)
  40
  41REG_ANY         REG_ANY,    no  Match any one character (except newline).
  42SANY            REG_ANY,    no  Match any one character.
  43CANY            REG_ANY,    no  Match any one byte.
  44ANYOF           ANYOF,  sv      Match character in (or not in) this class.
  45ALNUM           ALNUM,  no      Match any alphanumeric character
  46ALNUML          ALNUM,  no      Match any alphanumeric char in locale
  47NALNUM          NALNUM, no      Match any non-alphanumeric character
  48NALNUML         NALNUM, no      Match any non-alphanumeric char in locale
  49SPACE           SPACE,  no      Match any whitespace character
  50SPACEL          SPACE,  no      Match any whitespace char in locale
  51NSPACE          NSPACE, no      Match any non-whitespace character
  52NSPACEL         NSPACE, no      Match any non-whitespace char in locale
  53DIGIT           DIGIT,  no      Match any numeric character
  54DIGITL          DIGIT,  no      Match any numeric character in locale
  55NDIGIT          NDIGIT, no      Match any non-numeric character
  56NDIGITL         NDIGIT, no      Match any non-numeric character in locale
  57CLUMP           CLUMP,  no      Match any combining character sequence
  58
  59#* Alternation (31)
  60
  61# BRANCH        The set of branches constituting a single choice are hooked
  62#               together with their "next" pointers, since precedence prevents
  63#               anything being concatenated to any individual branch.  The
  64#               "next" pointer of the last BRANCH in a choice points to the
  65#               thing following the whole choice.  This is also where the
  66#               final "next" pointer of each individual branch points; each
  67#               branch starts with the operand node of a BRANCH node.
  68#
  69BRANCH          BRANCH, node    Match this alternative, or the next...
  70
  71#*Back pointer (32)
  72
  73# BACK          Normal "next" pointers all implicitly point forward; BACK
  74#               exists to make loop structures possible.
  75# not used
  76BACK            BACK,   no      Match "", "next" ptr points backward.
  77
  78#*Literals (33..35)
  79
  80EXACT           EXACT,  str     Match this string (preceded by length).
  81EXACTF          EXACT,  str     Match this string, folded (prec. by length).
  82EXACTFL         EXACT,  str     Match this string, folded in locale (w/len).
  83
  84#*Do nothing types (36..37)
  85
  86NOTHING         NOTHING,no      Match empty string.
  87# A variant of above which delimits a group, thus stops optimizations
  88TAIL            NOTHING,no      Match empty string. Can jump here from outside.
  89
  90#*Loops (38..44)
  91
  92# STAR,PLUS     '?', and complex '*' and '+', are implemented as circular
  93#               BRANCH structures using BACK.  Simple cases (one character
  94#               per match) are implemented with STAR and PLUS for speed
  95#               and to minimize recursive plunges.
  96#
  97STAR            STAR,   node    Match this (simple) thing 0 or more times.
  98PLUS            PLUS,   node    Match this (simple) thing 1 or more times.
  99
 100CURLY           CURLY,  sv 2    Match this simple thing {n,m} times.
 101CURLYN          CURLY,  no 2    Capture next-after-this simple thing 
 102CURLYM          CURLY,  no 2    Capture this medium-complex thing {n,m} times. 
 103CURLYX          CURLY,  sv 2    Match this complex thing {n,m} times.
 104
 105# This terminator creates a loop structure for CURLYX
 106WHILEM          WHILEM, no      Do curly processing and see if rest matches.
 107
 108#*Buffer related (45..49)
 109
 110# OPEN,CLOSE,GROUPP     ...are numbered at compile time.
 111OPEN            OPEN,   num 1   Mark this point in input as start of #n.
 112CLOSE           CLOSE,  num 1   Analogous to OPEN.
 113
 114REF             REF,    num 1   Match some already matched string
 115REFF            REF,    num 1   Match already matched string, folded
 116REFFL           REF,    num 1   Match already matched string, folded in loc.
 117
 118#*Grouping assertions (50..54)
 119
 120IFMATCH         BRANCHJ,off 1 2 Succeeds if the following matches.
 121UNLESSM         BRANCHJ,off 1 2 Fails if the following matches.
 122SUSPEND         BRANCHJ,off 1 1 "Independent" sub-RE.
 123IFTHEN          BRANCHJ,off 1 1 Switch, should be preceeded by switcher .
 124GROUPP          GROUPP, num 1   Whether the group matched.
 125
 126#*Support for long RE (55..56)
 127
 128LONGJMP         LONGJMP,off 1 1 Jump far away.
 129BRANCHJ         BRANCHJ,off 1 1 BRANCH with long offset.
 130
 131#*The heavy worker (57..58)
 132
 133EVAL            EVAL,   evl 1   Execute some Perl code.
 134
 135#*Modifiers (59..60)
 136
 137MINMOD          MINMOD, no      Next operator is not greedy.
 138LOGICAL         LOGICAL,no      Next opcode should set the flag only.
 139
 140# This is not used yet (61)
 141RENUM           BRANCHJ,off 1 1 Group with independently numbered parens.
 142
 143#*Trie Related (62..64)
 144
 145# Behave the same as A|LIST|OF|WORDS would. The '..C' variants have  
 146# inline charclass data (ascii only), the 'C' store it in the structure.
 147# NOTE: the relative order of the TRIE-like regops  is signifigant
 148
 149TRIE            TRIE,     trie 1        Match many EXACT(FL?)? at once. flags==type
 150TRIEC           TRIE,trie charclass     Same as TRIE, but with embedded charclass data
 151
 152# For start classes, contains an added fail table.
 153AHOCORASICK     TRIE,        trie 1     Aho Corasick stclass. flags==type
 154AHOCORASICKC    TRIE,trie charclass     Same as AHOCORASICK, but with embedded charclass data
 155
 156#*Regex Subroutines (65..66) 
 157GOSUB           GOSUB,     num/ofs 2L   recurse to paren arg1 at (signed) ofs arg2
 158GOSTART         GOSTART,   no           recurse to start of pattern
 159
 160#*Named references (67..69)
 161NREF            REF,       no-sv 1      Match some already matched string
 162NREFF           REF,       no-sv 1      Match already matched string, folded
 163NREFFL          REF,       no-sv 1      Match already matched string, folded in loc.
 164
 165
 166#*Special conditionals  (70..72)
 167NGROUPP         NGROUPP,   no-sv 1      Whether the group matched.            
 168INSUBP          INSUBP,    num 1        Whether we are in a specific recurse.  
 169DEFINEP         DEFINEP,   none 1       Never execute directly.               
 170
 171#*Bactracking Verbs
 172ENDLIKE         ENDLIKE,   none         Used only for the type field of verbs
 173OPFAIL          ENDLIKE,   none         Same as (?!)
 174ACCEPT          ENDLIKE,   parno 1      Accepts the current matched string.
 175
 176
 177#*Verbs With Arguments
 178VERB            VERB,      no-sv 1      Used only for the type field of verbs
 179PRUNE           VERB,      no-sv 1      Pattern fails at this startpoint if no-backtracking through this 
 180MARKPOINT       VERB,      no-sv 1      Push the current location for rollback by cut.
 181SKIP            VERB,      no-sv 1      On failure skip forward (to the mark) before retrying
 182COMMIT          VERB,      no-sv 1      Pattern fails outright if backtracking through this
 183CUTGROUP        VERB,      no-sv 1      On failure go to the next alternation in the group
 184
 185#*Control what to keep in $&.
 186KEEPS           KEEPS,  no      $& begins here.
 187
 188#*New charclass like patterns
 189LNBREAK         LNBREAK,   none         generic newline pattern
 190VERTWS          VERTWS,    none         vertical whitespace         (Perl 6)
 191NVERTWS         NVERTWS,   none         not vertical whitespace     (Perl 6)
 192HORIZWS         HORIZWS,   none         horizontal whitespace       (Perl 6)
 193NHORIZWS        NHORIZWS,  none         not horizontal whitespace   (Perl 6)
 194
 195FOLDCHAR        FOLDCHAR,  codepoint 1  codepoint with tricky case folding properties.
 196
 197# NEW STUFF ABOVE THIS LINE  
 198
 199################################################################################
 200
 201#*SPECIAL  REGOPS
 202
 203# This is not really a node, but an optimized away piece of a "long" node.
 204# To simplify debugging output, we mark it as if it were a node
 205OPTIMIZED       NOTHING,off     Placeholder for dump.
 206
 207# Special opcode with the property that no opcode in a compiled program
 208# will ever be of this type. Thus it can be used as a flag value that
 209# no other opcode has been seen. END is used similarly, in that an END
 210# node cant be optimized. So END implies "unoptimizable" and PSEUDO mean
 211# "not seen anything to optimize yet".
 212PSEUDO          PSEUDO,off      Pseudo opcode for internal use.
 213
 214-------------------------------------------------------------------------------
 215# Format for second section:
 216# REGOP \t typelist [ \t typelist] [# Comment]
 217# typelist= namelist
 218#         = namelist:FAIL
 219#         = name:count
 220
 221# Anything below is a state
 222#
 223#
 224TRIE            next:FAIL       
 225EVAL            AB:FAIL 
 226CURLYX          end:FAIL        
 227WHILEM          A_pre,A_min,A_max,B_min,B_max:FAIL
 228BRANCH          next:FAIL       
 229CURLYM          A,B:FAIL        
 230IFMATCH         A:FAIL  
 231CURLY           B_min_known,B_min,B_max:FAIL    
 232COMMIT          next:FAIL
 233MARKPOINT       next:FAIL
 234SKIP            next:FAIL
 235CUTGROUP        next:FAIL
 236KEEPS           next:FAIL
 237
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.