libphobos/src/std/regex/internal/parser.d - gcc - Git at Google

 //Written in the D programming language
 /*
     Regular expression pattern parser.
 */
 module std.regex.internal.parser;

 import std.regex.internal.ir;
 import std.range.primitives, std.uni, std.meta,
     std.traits, std.typecons, std.exception;
 static import std.ascii;

 // package relevant info from parser into a regex object
 auto makeRegex(S, CG)(Parser!(S, CG) p)
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
     import std.regex.internal.thompson : ThompsonMatcher;
     import std.algorithm.searching : canFind;
     alias Char = BasicElementOf!S;
     Regex!Char re;
     auto g = p.g;
     with(re)
     {
         ir = g.ir;
         dict = g.dict;
         ngroup = g.ngroup;
         maxCounterDepth = g.counterDepth;
         flags = p.re_flags;
         charsets = g.charsets;
         matchers = g.matchers;
         backrefed = g.backrefed;
         re.pattern = p.origin.idup;
         re.postprocess();
         // check if we have backreferences, if so - use backtracking
         if (__ctfe) factory = null; // allows us to use the awful enum re = regex(...);
         else if (re.backrefed.canFind!"a != 0")
             factory =  new RuntimeFactory!(BacktrackingMatcher, Char);
         else
             factory = new RuntimeFactory!(ThompsonMatcher, Char);
         debug(std_regex_parser)
         {
             __ctfe || print();
         }
         //@@@BUG@@@ (not reduced)
         //somehow just using validate _collides_ with std.utf.validate (!)
         version (assert) re.validateRe();
     }
     return re;
 }

 // helper for unittest
 auto makeRegex(S)(S arg)
 if (isSomeString!S)
 {
     return makeRegex(Parser!(S, CodeGen)(arg, ""));
 }

 @system unittest
 {
     import std.algorithm.comparison : equal;
     auto re = makeRegex(`(?P<name>\w+) = (?P<var>\d+)`);
     auto nc = re.namedCaptures;
     static assert(isRandomAccessRange!(typeof(nc)));
     assert(!nc.empty);
     assert(nc.length == 2);
     assert(nc.equal(["name", "var"]));
     assert(nc[0] == "name");
     assert(nc[1..$].equal(["var"]));

     re = makeRegex(`(\w+) (?P<named>\w+) (\w+)`);
     nc = re.namedCaptures;
     assert(nc.length == 1);
     assert(nc[0] == "named");
     assert(nc.front == "named");
     assert(nc.back == "named");

     re = makeRegex(`(\w+) (\w+)`);
     nc = re.namedCaptures;
     assert(nc.empty);

     re = makeRegex(`(?P<year>\d{4})/(?P<month>\d{2})/(?P<day>\d{2})/`);
     nc = re.namedCaptures;
     auto cp = nc.save;
     assert(nc.equal(cp));
     nc.popFront();
     assert(nc.equal(cp[1..$]));
     nc.popBack();
     assert(nc.equal(cp[1 .. $ - 1]));
 }


 @trusted void reverseBytecode()(Bytecode[] code)
 {
     Bytecode[] rev = new Bytecode[code.length];
     uint revPc = cast(uint) rev.length;
     Stack!(Tuple!(uint, uint, uint)) stack;
     uint start = 0;
     uint end = cast(uint) code.length;
     for (;;)
     {
         for (uint pc = start; pc < end; )
         {
             immutable len = code[pc].length;
             if (code[pc].code == IR.GotoEndOr)
                 break; //pick next alternation branch
             if (code[pc].isAtom)
             {
                 rev[revPc - len .. revPc] = code[pc .. pc + len];
                 revPc -= len;
                 pc += len;
             }
             else if (code[pc].isStart || code[pc].isEnd)
             {
                 //skip over other embedded lookbehinds they are reversed
                 if (code[pc].code == IR.LookbehindStart
                     || code[pc].code == IR.NeglookbehindStart)
                 {
                     immutable blockLen = len + code[pc].data
                          + code[pc].pairedLength;
                     rev[revPc - blockLen .. revPc] = code[pc .. pc + blockLen];
                     pc += blockLen;
                     revPc -= blockLen;
                     continue;
                 }
                 immutable second = code[pc].indexOfPair(pc);
                 immutable secLen = code[second].length;
                 rev[revPc - secLen .. revPc] = code[second .. second + secLen];
                 revPc -= secLen;
                 if (code[pc].code == IR.OrStart)
                 {
                     //we pass len bytes forward, but secLen in reverse
                     immutable revStart = revPc - (second + len - secLen - pc);
                     uint r = revStart;
                     uint i = pc + IRL!(IR.OrStart);
                     while (code[i].code == IR.Option)
                     {
                         if (code[i - 1].code != IR.OrStart)
                         {
                             assert(code[i - 1].code == IR.GotoEndOr);
                             rev[r - 1] = code[i - 1];
                         }
                         rev[r] = code[i];
                         auto newStart = i + IRL!(IR.Option);
                         auto newEnd = newStart + code[i].data;
                         auto newRpc = r + code[i].data + IRL!(IR.Option);
                         if (code[newEnd].code != IR.OrEnd)
                         {
                             newRpc--;
                         }
                         stack.push(tuple(newStart, newEnd, newRpc));
                         r += code[i].data + IRL!(IR.Option);
                         i += code[i].data + IRL!(IR.Option);
                     }
                     pc = i;
                     revPc = revStart;
                     assert(code[pc].code == IR.OrEnd);
                 }
                 else
                     pc += len;
             }
         }
         if (stack.empty)
             break;
         start = stack.top[0];
         end = stack.top[1];
         revPc = stack.top[2];
         stack.pop();
     }
     code[] = rev[];
 }

 struct CodeGen
 {
     Bytecode[] ir;                 // resulting bytecode
     Stack!(uint) fixupStack;       // stack of opened start instructions
     NamedGroup[] dict;             // maps name -> user group number
     Stack!(uint) groupStack;       // stack of current number of group
     uint nesting = 0;              // group nesting level and repetitions step
     uint lookaroundNest = 0;       // nesting of lookaround
     uint counterDepth = 0;         // current depth of nested counted repetitions
     CodepointSet[] charsets;       // sets for char classes
     const(CharMatcher)[] matchers; // matchers for char classes
     uint[] backrefed;              // bitarray for groups refered by backref
     uint ngroup;                   // final number of groups (of all patterns)

     void start(uint length)
     {
         if (!__ctfe)
             ir.reserve((length*5+2)/4);
         fixupStack.push(0);
         groupStack.push(1);//0 - whole match
     }

     //mark referenced groups for latter processing
     void markBackref(uint n)
     {
         if (n/32 >= backrefed.length)
             backrefed.length = n/32 + 1;
         backrefed[n / 32] |= 1 << (n & 31);
     }

     bool isOpenGroup(uint n)
     {
         import std.algorithm.searching : canFind;
         // walk the fixup stack and see if there are groups labeled 'n'
         // fixup '0' is reserved for alternations
         return fixupStack.data[1..$].
             canFind!(fix => ir[fix].code == IR.GroupStart && ir[fix].data == n)();
     }

     void put(Bytecode code)
     {
         enforce(ir.length < maxCompiledLength,
             "maximum compiled pattern length is exceeded");
         ir ~= code;
     }

     void putRaw(uint number)
     {
         enforce(ir.length < maxCompiledLength,
             "maximum compiled pattern length is exceeded");
         ir ~= Bytecode.fromRaw(number);
     }

     //try to generate optimal IR code for this CodepointSet
     @trusted void charsetToIr(CodepointSet set)
     {//@@@BUG@@@ writeln is @system
         uint chars = cast(uint) set.length;
         if (chars < Bytecode.maxSequence)
         {
             switch (chars)
             {
                 case 1:
                     put(Bytecode(IR.Char, set.byCodepoint.front));
                     break;
                 case 0:
                     throw new RegexException("empty CodepointSet not allowed");
                 default:
                     foreach (ch; set.byCodepoint)
                         put(Bytecode(IR.OrChar, ch, chars));
             }
         }
         else
         {
             import std.algorithm.searching : countUntil;
             const ivals = set.byInterval;
             immutable n = charsets.countUntil(set);
             if (n >= 0)
             {
                 if (ivals.length*2 > maxCharsetUsed)
                     put(Bytecode(IR.Trie, cast(uint) n));
                 else
                     put(Bytecode(IR.CodepointSet, cast(uint) n));
                 return;
             }
             if (ivals.length*2 > maxCharsetUsed)
             {
                 auto t  = getMatcher(set);
                 put(Bytecode(IR.Trie, cast(uint) matchers.length));
                 matchers ~= t;
                 debug(std_regex_allocation) writeln("Trie generated");
             }
             else
             {
                 put(Bytecode(IR.CodepointSet, cast(uint) charsets.length));
                 matchers ~= CharMatcher.init;
             }
             charsets ~= set;
             assert(charsets.length == matchers.length);
         }
     }

     void genLogicGroup()
     {
         nesting++;
         pushFixup(length);
         put(Bytecode(IR.Nop, 0));
     }

     void genGroup()
     {
         nesting++;
         pushFixup(length);
         immutable nglob = groupStack.top++;
         enforce(groupStack.top <= maxGroupNumber, "limit on number of submatches is exceeded");
         put(Bytecode(IR.GroupStart, nglob));
     }

     void genNamedGroup(string name)
     {
         import std.array : insertInPlace;
         import std.range : assumeSorted;
         nesting++;
         pushFixup(length);
         immutable nglob = groupStack.top++;
         enforce(groupStack.top <= maxGroupNumber, "limit on submatches is exceeded");
         auto t = NamedGroup(name, nglob);
         auto d = assumeSorted!"a.name < b.name"(dict);
         immutable ind = d.lowerBound(t).length;
         insertInPlace(dict, ind, t);
         put(Bytecode(IR.GroupStart, nglob));
     }

         //generate code for start of lookaround: (?= (?! (?<= (?<!
     void genLookaround(IR opcode)
     {
         nesting++;
         pushFixup(length);
         put(Bytecode(opcode, 0));
         put(Bytecode.fromRaw(0));
         put(Bytecode.fromRaw(0));
         groupStack.push(0);
         lookaroundNest++;
         enforce(lookaroundNest <= maxLookaroundDepth,
             "maximum lookaround depth is exceeded");
     }

     void endPattern(uint num)
     {
         import std.algorithm.comparison : max;
         put(Bytecode(IR.End, num));
         ngroup = max(ngroup, groupStack.top);
         groupStack.top = 1; // reset group counter
     }

     //fixup lookaround with start at offset fix and append a proper *-End opcode
     void fixLookaround(uint fix)
     {
         lookaroundNest--;
         ir[fix] = Bytecode(ir[fix].code,
             cast(uint) ir.length - fix - IRL!(IR.LookaheadStart));
         auto g = groupStack.pop();
         assert(!groupStack.empty);
         ir[fix+1] = Bytecode.fromRaw(groupStack.top);
         //groups are cumulative across lookarounds
         ir[fix+2] = Bytecode.fromRaw(groupStack.top+g);
         groupStack.top += g;
         if (ir[fix].code == IR.LookbehindStart || ir[fix].code == IR.NeglookbehindStart)
         {
             reverseBytecode(ir[fix + IRL!(IR.LookbehindStart) .. $]);
         }
         put(ir[fix].paired);
     }

     // repetition of {1,1}
     void fixRepetition(uint offset)
     {
         import std.algorithm.mutation : copy;
         immutable replace = ir[offset].code == IR.Nop;
         if (replace)
         {
             copy(ir[offset + 1 .. $], ir[offset .. $ - 1]);
             ir.length -= 1;
         }
     }

     // repetition of {x,y}
     void fixRepetition(uint offset, uint min, uint max, bool greedy)
     {
         static import std.algorithm.comparison;
         import std.algorithm.mutation : copy;
         import std.array : insertInPlace;
         immutable replace = ir[offset].code == IR.Nop;
         immutable len = cast(uint) ir.length - offset - replace;
         if (max != infinite)
         {
             if (min != 1 || max != 1)
             {
                 Bytecode op = Bytecode(greedy ? IR.RepeatStart : IR.RepeatQStart, len);
                 if (replace)
                     ir[offset] = op;
                 else
                     insertInPlace(ir, offset, op);
                 put(Bytecode(greedy ? IR.RepeatEnd : IR.RepeatQEnd, len));
                 put(Bytecode.init); //hotspot
                 putRaw(1);
                 putRaw(min);
                 putRaw(max);
                 counterDepth = std.algorithm.comparison.max(counterDepth, nesting+1);
             }
         }
         else if (min) //&& max is infinite
         {
             if (min != 1)
             {
                 Bytecode op = Bytecode(greedy ? IR.RepeatStart : IR.RepeatQStart, len);
                 if (replace)
                     ir[offset] = op;
                 else
                     insertInPlace(ir, offset, op);
                 offset += 1;//so it still points to the repeated block
                 put(Bytecode(greedy ? IR.RepeatEnd : IR.RepeatQEnd, len));
                 put(Bytecode.init); //hotspot
                 putRaw(1);
                 putRaw(min);
                 putRaw(min);
                 counterDepth = std.algorithm.comparison.max(counterDepth, nesting+1);
             }
             else if (replace)
             {
                 copy(ir[offset+1 .. $], ir[offset .. $-1]);
                 ir.length -= 1;
             }
             put(Bytecode(greedy ? IR.InfiniteStart : IR.InfiniteQStart, len));
             enforce(ir.length + len < maxCompiledLength,  "maximum compiled pattern length is exceeded");
             ir ~= ir[offset .. offset+len];
             //IR.InfinteX is always a hotspot
             put(Bytecode(greedy ? IR.InfiniteEnd : IR.InfiniteQEnd, len));
             put(Bytecode.init); //merge index
         }
         else//vanila {0,inf}
         {
             Bytecode op = Bytecode(greedy ? IR.InfiniteStart : IR.InfiniteQStart, len);
             if (replace)
                 ir[offset] = op;
             else
                 insertInPlace(ir, offset, op);
             //IR.InfinteX is always a hotspot
             put(Bytecode(greedy ? IR.InfiniteEnd : IR.InfiniteQEnd, len));
             put(Bytecode.init); //merge index
         }
     }

     void fixAlternation()
     {
         import std.array : insertInPlace;
         uint fix = fixupStack.top;
         if (ir.length > fix && ir[fix].code == IR.Option)
         {
             ir[fix] = Bytecode(ir[fix].code, cast(uint) ir.length - fix);
             put(Bytecode(IR.GotoEndOr, 0));
             fixupStack.top = cast(uint) ir.length; //replace latest fixup for Option
             put(Bytecode(IR.Option, 0));
             return;
         }
         uint len, orStart;
         //start a new option
         if (fixupStack.length == 1)
         {//only root entry, effectively no fixup
             len = cast(uint) ir.length + IRL!(IR.GotoEndOr);
             orStart = 0;
         }
         else
         {//IR.lookahead, etc. fixups that have length > 1, thus check ir[x].length
             len = cast(uint) ir.length - fix - (ir[fix].length - 1);
             orStart = fix + ir[fix].length;
         }
         insertInPlace(ir, orStart, Bytecode(IR.OrStart, 0), Bytecode(IR.Option, len));
         assert(ir[orStart].code == IR.OrStart);
         put(Bytecode(IR.GotoEndOr, 0));
         fixupStack.push(orStart); //fixup for StartOR
         fixupStack.push(cast(uint) ir.length); //for second Option
         put(Bytecode(IR.Option, 0));
     }

     // finalizes IR.Option, fix points to the first option of sequence
     void finishAlternation(uint fix)
     {
         enforce(ir[fix].code == IR.Option, "no matching ')'");
         ir[fix] = Bytecode(ir[fix].code, cast(uint) ir.length - fix - IRL!(IR.OrStart));
         fix = fixupStack.pop();
         enforce(ir[fix].code == IR.OrStart, "no matching ')'");
         ir[fix] = Bytecode(IR.OrStart, cast(uint) ir.length - fix - IRL!(IR.OrStart));
         put(Bytecode(IR.OrEnd, cast(uint) ir.length - fix - IRL!(IR.OrStart)));
         uint pc = fix + IRL!(IR.OrStart);
         while (ir[pc].code == IR.Option)
         {
             pc = pc + ir[pc].data;
             if (ir[pc].code != IR.GotoEndOr)
                 break;
             ir[pc] = Bytecode(IR.GotoEndOr, cast(uint)(ir.length - pc - IRL!(IR.OrEnd)));
             pc += IRL!(IR.GotoEndOr);
         }
         put(Bytecode.fromRaw(0));
     }

     // returns: (flag - repetition possible?, fixup of the start of this "group")
     Tuple!(bool, uint) onClose()
     {
         nesting--;
         uint fix = popFixup();
         switch (ir[fix].code)
         {
         case IR.GroupStart:
             put(Bytecode(IR.GroupEnd, ir[fix].data));
             return tuple(true, fix);
         case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart:
             assert(lookaroundNest);
             fixLookaround(fix);
             return tuple(false, 0u);
         case IR.Option: //| xxx )
             //two fixups: last option + full OR
             finishAlternation(fix);
             fix = topFixup;
             switch (ir[fix].code)
             {
             case IR.GroupStart:
                 popFixup();
                 put(Bytecode(IR.GroupEnd, ir[fix].data));
                 return tuple(true, fix);
             case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart:
                 assert(lookaroundNest);
                 fix = popFixup();
                 fixLookaround(fix);
                 return tuple(false, 0u);
             default://(?:xxx)
                 popFixup();
                 return tuple(true, fix);
             }
         default://(?:xxx)
             return tuple(true, fix);
         }
     }

     uint popFixup(){ return fixupStack.pop(); }

     void pushFixup(uint val){ return fixupStack.push(val); }

     @property uint topFixup(){ return fixupStack.top; }

     @property size_t fixupLength(){ return fixupStack.data.length; }

     @property uint length(){ return cast(uint) ir.length; }
 }

 // safety limits
 enum maxGroupNumber = 2^^19;
 enum maxLookaroundDepth = 16;
 // *Bytecode.sizeof, i.e. 1Mb of bytecode alone
 enum maxCompiledLength = 2^^18;
 // amounts to up to 4 Mb of auxilary table for matching
 enum maxCumulativeRepetitionLength = 2^^20;
 // marker to indicate infinite repetition
 enum infinite = ~0u;

 struct Parser(R, Generator)
 if (isForwardRange!R && is(ElementType!R : dchar))
 {
     dchar front;
     bool empty;
     R pat, origin;       //keep full pattern for pretty printing error messages
     uint re_flags = 0;   //global flags e.g. multiline + internal ones
     Generator g;

     @trusted this(S)(R pattern, S flags)
         if (isSomeString!S)
     {
         pat = origin = pattern;
         //reserve slightly more then avg as sampled from unittests
         parseFlags(flags);
         front = ' ';//a safe default for freeform parsing
         popFront();
         g.start(cast(uint) pat.length);
         try
         {
             parseRegex();
         }
         catch (Exception e)
         {
             error(e.msg);//also adds pattern location
         }
         g.endPattern(1);
     }

     void _popFront()
     {
         if (pat.empty)
         {
             empty =  true;
         }
         else
         {
             front = pat.front;
             pat.popFront();
         }
     }

     void skipSpace()
     {
         while (!empty && isWhite(front)) _popFront();
     }

     void popFront()
     {
         _popFront();
         if (re_flags & RegexOption.freeform) skipSpace();
     }

     auto save(){ return this; }

     //parsing number with basic overflow check
     uint parseDecimal()
     {
         uint r = 0;
         while (std.ascii.isDigit(front))
         {
             if (r >= (uint.max/10))
                 error("Overflow in decimal number");
             r = 10*r + cast(uint)(front-'0');
             popFront();
             if (empty) break;
         }
         return r;
     }

     //
     @trusted void parseFlags(S)(S flags)
     {//@@@BUG@@@ text is @system
         import std.conv : text;
         foreach (ch; flags)//flags are ASCII anyway
         {
         L_FlagSwitch:
             switch (ch)
             {

                 foreach (i, op; __traits(allMembers, RegexOption))
                 {
                     case RegexOptionNames[i]:
                             if (re_flags & mixin("RegexOption."~op))
                                 throw new RegexException(text("redundant flag specified: ",ch));
                             re_flags |= mixin("RegexOption."~op);
                             break L_FlagSwitch;
                 }
                 default:
                     throw new RegexException(text("unknown regex flag '",ch,"'"));
             }
         }
     }

     //parse and store IR for regex pattern
     @trusted void parseRegex()
     {
         uint fix;//fixup pointer

         while (!empty)
         {
             debug(std_regex_parser)
                 __ctfe || writeln("*LR*\nSource: ", pat, "\nStack: ",fixupStack.data);
             switch (front)
             {
             case '(':
                 popFront();
                 if (front == '?')
                 {
                     popFront();
                     switch (front)
                     {
                     case '#':
                         for (;;)
                         {
                             popFront();
                             enforce(!empty, "Unexpected end of pattern");
                             if (front == ')')
                             {
                                 popFront();
                                 break;
                             }
                         }
                         break;
                     case ':':
                         g.genLogicGroup();
                         popFront();
                         break;
                     case '=':
                         g.genLookaround(IR.LookaheadStart);
                         popFront();
                         break;
                     case '!':
                         g.genLookaround(IR.NeglookaheadStart);
                         popFront();
                         break;
                     case 'P':
                         popFront();
                         enforce(front == '<', "Expected '<' in named group");
                         string name;
                         popFront();
                         if (empty || !(isAlpha(front) || front == '_'))
                             error("Expected alpha starting a named group");
                         name ~= front;
                         popFront();
                         while (!empty && (isAlpha(front) ||
                             front == '_' || std.ascii.isDigit(front)))
                         {
                             name ~= front;
                             popFront();
                         }
                         enforce(front == '>', "Expected '>' closing named group");
                         popFront();
                         g.genNamedGroup(name);
                         break;
                     case '<':
                         popFront();
                         if (front == '=')
                             g.genLookaround(IR.LookbehindStart);
                         else if (front == '!')
                             g.genLookaround(IR.NeglookbehindStart);
                         else
                             error("'!' or '=' expected after '<'");
                         popFront();
                         break;
                     default:
                         uint enableFlags, disableFlags;
                         bool enable = true;
                         do
                         {
                             switch (front)
                             {
                             case 's':
                                 if (enable)
                                     enableFlags |= RegexOption.singleline;
                                 else
                                     disableFlags |= RegexOption.singleline;
                                 break;
                             case 'x':
                                 if (enable)
                                     enableFlags |= RegexOption.freeform;
                                 else
                                     disableFlags |= RegexOption.freeform;
                                 break;
                             case 'i':
                                 if (enable)
                                     enableFlags |= RegexOption.casefold;
                                 else
                                     disableFlags |= RegexOption.casefold;
                                 break;
                             case 'm':
                                 if (enable)
                                     enableFlags |= RegexOption.multiline;
                                 else
                                     disableFlags |= RegexOption.multiline;
                                 break;
                             case '-':
                                 if (!enable)
                                     error(" unexpected second '-' in flags");
                                 enable = false;
                                 break;
                             default:
                                 error(" 's', 'x', 'i', 'm' or '-' expected after '(?' ");
                             }
                             popFront();
                         }while (front != ')');
                         popFront();
                         re_flags |= enableFlags;
                         re_flags &= ~disableFlags;
                     }
                 }
                 else
                 {
                     g.genGroup();
                 }
                 break;
             case ')':
                 enforce(g.nesting, "Unmatched ')'");
                 popFront();
                 auto pair = g.onClose();
                 if (pair[0])
                     parseQuantifier(pair[1]);
                 break;
             case '|':
                 popFront();
                 g.fixAlternation();
                 break;
             default://no groups or whatever
                 immutable start = g.length;
                 parseAtom();
                 parseQuantifier(start);
             }
         }

         if (g.fixupLength != 1)
         {
             fix = g.popFixup();
             g.finishAlternation(fix);
             enforce(g.fixupLength == 1, "no matching ')'");
         }
     }


     //parse and store IR for atom-quantifier pair
     @trusted void parseQuantifier(uint offset)
     {//copy is @system
         if (empty)
             return g.fixRepetition(offset);
         uint min, max;
         switch (front)
         {
         case '*':
             min = 0;
             max = infinite;
             break;
         case '?':
             min = 0;
             max = 1;
             break;
         case '+':
             min = 1;
             max = infinite;
             break;
         case '{':
             popFront();
             enforce(!empty, "Unexpected end of regex pattern");
             enforce(std.ascii.isDigit(front), "First number required in repetition");
             min = parseDecimal();
             if (front == '}')
                 max = min;
             else if (front == ',')
             {
                 popFront();
                 if (std.ascii.isDigit(front))
                     max = parseDecimal();
                 else if (front == '}')
                     max = infinite;
                 else
                     error("Unexpected symbol in regex pattern");
                 skipSpace();
                 enforce(front == '}', "Unmatched '{' in regex pattern");
             }
             else
                 error("Unexpected symbol in regex pattern");
             enforce(min <= max, "Illegal {n,m} quantifier");
             break;
         default:
             g.fixRepetition(offset);
             return;
         }
         bool greedy = true;
         //check only if we managed to get new symbol
         popFront();
         if (!empty && front == '?')
         {
             greedy = false;
             popFront();
         }
         g.fixRepetition(offset, min, max, greedy);
     }

     //parse and store IR for atom
     void parseAtom()
     {
         if (empty)
             return;
         switch (front)
         {
         case '*', '?', '+', '|', '{', '}':
             return error("'*', '+', '?', '{', '}' not allowed in atom");
         case '.':
             if (re_flags & RegexOption.singleline)
                 g.put(Bytecode(IR.Any, 0));
             else
             {
                 CodepointSet set;
                 g.charsetToIr(set.add('\n','\n'+1).add('\r', '\r'+1).inverted);
             }
             popFront();
             break;
         case '[':
             parseCharset();
             break;
         case '\\':
             _popFront();
             enforce(!empty, "Unfinished escape sequence");
             parseEscape();
             break;
         case '^':
             if (re_flags & RegexOption.multiline)
                 g.put(Bytecode(IR.Bol, 0));
             else
                 g.put(Bytecode(IR.Bof, 0));
             popFront();
             break;
         case '$':
             if (re_flags & RegexOption.multiline)
                 g.put(Bytecode(IR.Eol, 0));
             else
                 g.put(Bytecode(IR.Eof, 0));
             popFront();
             break;
         default:
             if (re_flags & RegexOption.casefold)
             {
                 auto range = simpleCaseFoldings(front);
                 assert(range.length <= 5);
                 if (range.length == 1)
                     g.put(Bytecode(IR.Char, range.front));
                 else
                     foreach (v; range)
                         g.put(Bytecode(IR.OrChar, v, cast(uint) range.length));
             }
             else
                 g.put(Bytecode(IR.Char, front));
             popFront();
         }
     }

     //parse and store IR for CodepointSet
     void parseCharset()
     {
         const save = re_flags;
         re_flags &= ~RegexOption.freeform; // stop ignoring whitespace if we did
         bool casefold = cast(bool)(re_flags & RegexOption.casefold);
         g.charsetToIr(unicode.parseSet(this, casefold));
         re_flags = save;
         // Last next() in parseCharset is executed w/o freeform flag
         if (re_flags & RegexOption.freeform) skipSpace();
     }

     //parse and generate IR for escape stand alone escape sequence
     @trusted void parseEscape()
     {//accesses array of appender
         import std.algorithm.iteration : sum;
         switch (front)
         {
         case 'f':   popFront(); g.put(Bytecode(IR.Char, '\f')); break;
         case 'n':   popFront(); g.put(Bytecode(IR.Char, '\n')); break;
         case 'r':   popFront(); g.put(Bytecode(IR.Char, '\r')); break;
         case 't':   popFront(); g.put(Bytecode(IR.Char, '\t')); break;
         case 'v':   popFront(); g.put(Bytecode(IR.Char, '\v')); break;

         case 'd':
             popFront();
             g.charsetToIr(unicode.Nd);
             break;
         case 'D':
             popFront();
             g.charsetToIr(unicode.Nd.inverted);
             break;
         case 'b':   popFront(); g.put(Bytecode(IR.Wordboundary, 0)); break;
         case 'B':   popFront(); g.put(Bytecode(IR.Notwordboundary, 0)); break;
         case 's':
             popFront();
             g.charsetToIr(unicode.White_Space);
             break;
         case 'S':
             popFront();
             g.charsetToIr(unicode.White_Space.inverted);
             break;
         case 'w':
             popFront();
             g.charsetToIr(wordCharacter);
             break;
         case 'W':
             popFront();
             g.charsetToIr(wordCharacter.inverted);
             break;
         case 'p': case 'P':
             bool casefold = cast(bool)(re_flags & RegexOption.casefold);
             auto set = unicode.parsePropertySpec(this, front == 'P', casefold);
             g.charsetToIr(set);
             break;
         case 'x':
             immutable code = parseUniHex(pat, 2);
             popFront();
             g.put(Bytecode(IR.Char,code));
             break;
         case 'u': case 'U':
             immutable code = parseUniHex(pat, front == 'u' ? 4 : 8);
             popFront();
             g.put(Bytecode(IR.Char, code));
             break;
         case 'c': //control codes
             Bytecode code = Bytecode(IR.Char, unicode.parseControlCode(this));
             popFront();
             g.put(code);
             break;
         case '0':
             popFront();
             g.put(Bytecode(IR.Char, 0));//NUL character
             break;
         case '1': .. case '9':
             uint nref = cast(uint) front - '0';
             immutable maxBackref = sum(g.groupStack.data);
             enforce(nref < maxBackref, "Backref to unseen group");
             //perl's disambiguation rule i.e.
             //get next digit only if there is such group number
             popFront();
             while (nref < maxBackref && !empty && std.ascii.isDigit(front))
             {
                 nref = nref * 10 + front - '0';
                 popFront();
             }
             if (nref >= maxBackref)
                 nref /= 10;
             enforce(!g.isOpenGroup(nref), "Backref to open group");
             uint localLimit = maxBackref - g.groupStack.top;
             if (nref >= localLimit)
             {
                 g.put(Bytecode(IR.Backref, nref-localLimit));
                 g.ir[$-1].setLocalRef();
             }
             else
                 g.put(Bytecode(IR.Backref, nref));
             g.markBackref(nref);
             break;
         default:
             if (front == '\\' && !pat.empty)
             {
                 if (pat.front >= privateUseStart && pat.front <= privateUseEnd)
                     enforce(false, "invalid escape sequence");
             }
             if (front >= privateUseStart && front <= privateUseEnd)
             {
                 g.endPattern(front - privateUseStart + 1);
                 break;
             }
             auto op = Bytecode(IR.Char, front);
             popFront();
             g.put(op);
         }
     }

     //
     @trusted void error(string msg)
     {
         import std.array : appender;
         import std.format.write : formattedWrite;
         auto app = appender!string();
         formattedWrite(app, "%s\nPattern with error: `%s` <--HERE-- `%s`",
                        msg, origin[0..$-pat.length], pat);
         throw new RegexException(app.data);
     }

     alias Char = BasicElementOf!R;

     @property program()
     {
         return makeRegex(this);
     }
 }

 /+
     Postproces the IR, then optimize.
 +/
 @trusted void postprocess(Char)(ref Regex!Char zis)
 {//@@@BUG@@@ write is @system
     with(zis)
     {
         struct FixedStack(T)
         {
             T[] arr;
             uint _top;
             //this(T[] storage){   arr = storage; _top = -1; }
             @property ref T top(){  assert(!empty); return arr[_top]; }
             void push(T x){  arr[++_top] = x; }
             T pop() { assert(!empty);   return arr[_top--]; }
             @property bool empty(){   return _top == -1; }
         }
         auto counterRange = FixedStack!uint(new uint[maxCounterDepth+1], -1);
         counterRange.push(1);
         ulong cumRange = 0;
         for (uint i = 0; i < ir.length; i += ir[i].length)
         {
             if (ir[i].hotspot)
             {
                 assert(i + 1 < ir.length,
                     "unexpected end of IR while looking for hotspot");
                 ir[i+1] = Bytecode.fromRaw(hotspotTableSize);
                 hotspotTableSize += counterRange.top;
             }
             switch (ir[i].code)
             {
             case IR.RepeatStart, IR.RepeatQStart:
                 uint repEnd = cast(uint)(i + ir[i].data + IRL!(IR.RepeatStart));
                 assert(ir[repEnd].code == ir[i].paired.code);
                 immutable max = ir[repEnd + 4].raw;
                 ir[repEnd+2].raw = counterRange.top;
                 ir[repEnd+3].raw *= counterRange.top;
                 ir[repEnd+4].raw *= counterRange.top;
                 ulong cntRange = cast(ulong)(max)*counterRange.top;
                 cumRange += cntRange;
                 enforce(cumRange < maxCumulativeRepetitionLength,
                     "repetition length limit is exceeded");
                 counterRange.push(cast(uint) cntRange + counterRange.top);
                 threadCount += counterRange.top;
                 break;
             case IR.RepeatEnd, IR.RepeatQEnd:
                 threadCount += counterRange.top;
                 counterRange.pop();
                 break;
             case IR.GroupStart:
                 if (isBackref(ir[i].data))
                     ir[i].setBackrefence();
                 threadCount += counterRange.top;
                 break;
             case IR.GroupEnd:
                 if (isBackref(ir[i].data))
                     ir[i].setBackrefence();
                 threadCount += counterRange.top;
                 break;
             default:
                 threadCount += counterRange.top;
             }
         }
         checkIfOneShot();
         if (!(flags & RegexInfo.oneShot))
             kickstart = Kickstart!Char(zis, new uint[](256));
         debug(std_regex_allocation) writefln("IR processed, max threads: %d", threadCount);
         optimize(zis);
     }
 }

 void fixupBytecode()(Bytecode[] ir)
 {
     Stack!uint fixups;

     with(IR) for (uint i=0; i<ir.length; i+= ir[i].length)
     {
         if (ir[i].isStart || ir[i].code == Option)
             fixups.push(i);
         else if (ir[i].code == OrEnd)
         {
             // Alternatives need more care
             auto j = fixups.pop(); // last Option
             ir[j].data = i -  j - ir[j].length;
             j = fixups.pop(); // OrStart
             ir[j].data = i - j - ir[j].length;
             ir[i].data = ir[j].data;

             // fixup all GotoEndOrs
             j = j + IRL!(OrStart);
             assert(ir[j].code == Option);
             for (;;)
             {
                 auto next = j + ir[j].data + IRL!(Option);
                 if (ir[next].code == IR.OrEnd)
                     break;
                 ir[next - IRL!(GotoEndOr)].data = i - next;
                 j = next;
             }
         }
         else if (ir[i].code == GotoEndOr)
         {
             auto j = fixups.pop(); // Option
             ir[j].data = i - j + IRL!(GotoEndOr)- IRL!(Option); // to the next option
         }
         else if (ir[i].isEnd)
         {
             auto j = fixups.pop();
             ir[i].data = i - j - ir[j].length;
             ir[j].data = ir[i].data;
         }
     }
     assert(fixups.empty);
 }

 void optimize(Char)(ref Regex!Char zis)
 {
     import std.array : insertInPlace;
     CodepointSet nextSet(uint idx)
     {
         CodepointSet set;
         with(zis) with(IR)
     Outer:
         for (uint i = idx; i < ir.length; i += ir[i].length)
         {
             switch (ir[i].code)
             {
                 case Char:
                     set.add(ir[i].data, ir[i].data+1);
                     goto default;
                 //TODO: OrChar
                 case Trie, CodepointSet:
                     set = zis.charsets[ir[i].data];
                     goto default;
                 case GroupStart,GroupEnd:
                     break;
                 default:
                     break Outer;
             }
         }
         return set;
     }

     with(zis) with(IR) for (uint i = 0; i < ir.length; i += ir[i].length)
     {
         if (ir[i].code == InfiniteEnd)
         {
             auto set = nextSet(i+IRL!(InfiniteEnd));
             if (!set.empty && set.length < 10_000)
             {
                 ir[i] = Bytecode(InfiniteBloomEnd, ir[i].data);
                 ir[i - ir[i].data - IRL!(InfiniteStart)] =
                     Bytecode(InfiniteBloomStart, ir[i].data);
                 ir.insertInPlace(i+IRL!(InfiniteEnd),
                     Bytecode.fromRaw(cast(uint) zis.filters.length));
                 zis.filters ~= BitTable(set);
                 fixupBytecode(ir);
             }
         }
     }
 }

 //IR code validator - proper nesting, illegal instructions, etc.
 @trusted void validateRe(Char)(ref Regex!Char zis)
 {//@@@BUG@@@ text is @system
     import std.conv : text;
     with(zis)
     {
         for (uint pc = 0; pc < ir.length; pc += ir[pc].length)
         {
             if (ir[pc].isStart || ir[pc].isEnd)
             {
                 immutable dest = ir[pc].indexOfPair(pc);
                 assert(dest < ir.length, text("Wrong length in opcode at pc=",
                     pc, " ", dest, " vs ", ir.length));
                 assert(ir[dest].paired ==  ir[pc],
                     text("Wrong pairing of opcodes at pc=", pc, "and pc=", dest));
             }
             else if (ir[pc].isAtom)
             {

             }
             else
                assert(0, text("Unknown type of instruction at pc=", pc));
         }
     }
 }