libphobos/src/std/uni.d - gcc - Git at Google

 // Written in the D programming language.

 /++
     $(P The $(D std.uni) module provides an implementation
     of fundamental Unicode algorithms and data structures.
     This doesn't include UTF encoding and decoding primitives,
     see $(REF decode, std,_utf) and $(REF encode, std,_utf) in $(MREF std, utf)
     for this functionality. )

 $(SCRIPT inhibitQuickIndex = 1;)
 $(BOOKTABLE,
 $(TR $(TH Category) $(TH Functions))
 $(TR $(TD Decode) $(TD
     $(LREF byCodePoint)
     $(LREF byGrapheme)
     $(LREF decodeGrapheme)
     $(LREF graphemeStride)
 ))
 $(TR $(TD Comparison) $(TD
     $(LREF icmp)
     $(LREF sicmp)
 ))
 $(TR $(TD Classification) $(TD
     $(LREF isAlpha)
     $(LREF isAlphaNum)
     $(LREF isCodepointSet)
     $(LREF isControl)
     $(LREF isFormat)
     $(LREF isGraphical)
     $(LREF isIntegralPair)
     $(LREF isMark)
     $(LREF isNonCharacter)
     $(LREF isNumber)
     $(LREF isPrivateUse)
     $(LREF isPunctuation)
     $(LREF isSpace)
     $(LREF isSurrogate)
     $(LREF isSurrogateHi)
     $(LREF isSurrogateLo)
     $(LREF isSymbol)
     $(LREF isWhite)
 ))
 $(TR $(TD Normalization) $(TD
     $(LREF NFC)
     $(LREF NFD)
     $(LREF NFKD)
     $(LREF NormalizationForm)
     $(LREF normalize)
 ))
 $(TR $(TD Decompose) $(TD
     $(LREF decompose)
     $(LREF decomposeHangul)
     $(LREF UnicodeDecomposition)
 ))
 $(TR $(TD Compose) $(TD
     $(LREF compose)
     $(LREF composeJamo)
 ))
 $(TR $(TD Sets) $(TD
     $(LREF CodepointInterval)
     $(LREF CodepointSet)
     $(LREF InversionList)
     $(LREF unicode)
 ))
 $(TR $(TD Trie) $(TD
     $(LREF codepointSetTrie)
     $(LREF CodepointSetTrie)
     $(LREF codepointTrie)
     $(LREF CodepointTrie)
     $(LREF toTrie)
     $(LREF toDelegate)
 ))
 $(TR $(TD Casing) $(TD
     $(LREF asCapitalized)
     $(LREF asLowerCase)
     $(LREF asUpperCase)
     $(LREF isLower)
     $(LREF isUpper)
     $(LREF toLower)
     $(LREF toLowerInPlace)
     $(LREF toUpper)
     $(LREF toUpperInPlace)
 ))
 $(TR $(TD Utf8Matcher) $(TD
     $(LREF isUtfMatcher)
     $(LREF MatcherConcept)
     $(LREF utfMatcher)
 ))
 $(TR $(TD Separators) $(TD
     $(LREF lineSep)
     $(LREF nelSep)
     $(LREF paraSep)
 ))
 $(TR $(TD Building blocks) $(TD
     $(LREF allowedIn)
     $(LREF combiningClass)
     $(LREF Grapheme)
 ))
 )

     $(P All primitives listed operate on Unicode characters and
         sets of characters. For functions which operate on ASCII characters
         and ignore Unicode $(CHARACTERS), see $(MREF std, ascii).
         For definitions of Unicode $(CHARACTER), $(CODEPOINT) and other terms
         used throughout this module see the $(S_LINK Terminology, terminology) section
         below.
     )
     $(P The focus of this module is the core needs of developing Unicode-aware
         applications. To that effect it provides the following optimized primitives:
     )
     $(UL
         $(LI Character classification by category and common properties:
             $(LREF isAlpha), $(LREF isWhite) and others.
         )
         $(LI
             Case-insensitive string comparison ($(LREF sicmp), $(LREF icmp)).
         )
         $(LI
             Converting text to any of the four normalization forms via $(LREF normalize).
         )
         $(LI
             Decoding ($(LREF decodeGrapheme))  and iteration ($(LREF byGrapheme), $(LREF graphemeStride))
             by user-perceived characters, that is by $(LREF Grapheme) clusters.
         )
         $(LI
             Decomposing and composing of individual character(s) according to canonical
             or compatibility rules, see $(LREF compose) and $(LREF decompose),
             including the specific version for Hangul syllables $(LREF composeJamo)
             and $(LREF decomposeHangul).
         )
     )
     $(P It's recognized that an application may need further enhancements
         and extensions, such as less commonly known algorithms,
         or tailoring existing ones for region specific needs. To help users
         with building any extra functionality beyond the core primitives,
         the module provides:
     )
     $(UL
         $(LI
             $(LREF CodepointSet), a type for easy manipulation of sets of characters.
             Besides the typical set algebra it provides an unusual feature:
             a D source code generator for detection of $(CODEPOINTS) in this set.
             This is a boon for meta-programming parser frameworks,
             and is used internally to power classification in small
             sets like $(LREF isWhite).
         )
         $(LI
             A way to construct optimal packed multi-stage tables also known as a
             special case of $(LINK2 https://en.wikipedia.org/wiki/Trie, Trie).
             The functions $(LREF codepointTrie), $(LREF codepointSetTrie)
             construct custom tries that map dchar to value.
             The end result is a fast and predictable $(BIGOH 1) lookup that powers
             functions like $(LREF isAlpha) and $(LREF combiningClass),
             but for user-defined data sets.
         )
         $(LI
             A useful technique for Unicode-aware parsers that perform
             character classification of encoded $(CODEPOINTS)
             is to avoid unnecassary decoding at all costs.
             $(LREF utfMatcher) provides an improvement over the usual workflow
             of decode-classify-process, combining the decoding and classification
             steps. By extracting necessary bits directly from encoded
             $(S_LINK Code unit, code units) matchers achieve
             significant performance improvements. See $(LREF MatcherConcept) for
             the common interface of UTF matchers.
         )
         $(LI
             Generally useful building blocks for customized normalization:
             $(LREF combiningClass) for querying combining class
             and $(LREF allowedIn) for testing the Quick_Check
             property of a given normalization form.
         )
         $(LI
             Access to a large selection of commonly used sets of $(CODEPOINTS).
             $(S_LINK Unicode properties, Supported sets) include Script,
             Block and General Category. The exact contents of a set can be
             observed in the CLDR utility, on the
             $(HTTP www.unicode.org/cldr/utility/properties.jsp, property index) page
             of the Unicode website.
             See $(LREF unicode) for easy and (optionally) compile-time checked set
             queries.
         )
     )
     $(SECTION Synopsis)
     ---
     import std.uni;
     void main()
     {
         // initialize code point sets using script/block or property name
         // now 'set' contains code points from both scripts.
         auto set = unicode("Cyrillic") | unicode("Armenian");
         // same thing but simpler and checked at compile-time
         auto ascii = unicode.ASCII;
         auto currency = unicode.Currency_Symbol;

         // easy set ops
         auto a = set & ascii;
         assert(a.empty); // as it has no intersection with ascii
         a = set | ascii;
         auto b = currency - a; // subtract all ASCII, Cyrillic and Armenian

         // some properties of code point sets
         assert(b.length > 45); // 46 items in Unicode 6.1, even more in 6.2
         // testing presence of a code point in a set
         // is just fine, it is O(logN)
         assert(!b['$']);
         assert(!b['\u058F']); // Armenian dram sign
         assert(b['¥']);

         // building fast lookup tables, these guarantee O(1) complexity
         // 1-level Trie lookup table essentially a huge bit-set ~262Kb
         auto oneTrie = toTrie!1(b);
         // 2-level far more compact but typically slightly slower
         auto twoTrie = toTrie!2(b);
         // 3-level even smaller, and a bit slower yet
         auto threeTrie = toTrie!3(b);
         assert(oneTrie['£']);
         assert(twoTrie['£']);
         assert(threeTrie['£']);

         // build the trie with the most sensible trie level
         // and bind it as a functor
         auto cyrillicOrArmenian = toDelegate(set);
         auto balance = find!(cyrillicOrArmenian)("Hello ընկեր!");
         assert(balance == "ընկեր!");
         // compatible with bool delegate(dchar)
         bool delegate(dchar) bindIt = cyrillicOrArmenian;

         // Normalization
         string s = "Plain ascii (and not only), is always normalized!";
         assert(s is normalize(s));// is the same string

         string nonS = "A\u0308ffin"; // A ligature
         auto nS = normalize(nonS); // to NFC, the W3C endorsed standard
         assert(nS == "Äffin");
         assert(nS != nonS);
         string composed = "Äffin";

         assert(normalize!NFD(composed) == "A\u0308ffin");
         // to NFKD, compatibility decomposition useful for fuzzy matching/searching
         assert(normalize!NFKD("2¹⁰") == "210");
     }
     ---
     $(SECTION Terminology
     )
     $(P The following is a list of important Unicode notions
     and definitions. Any conventions used specifically in this
     module alone are marked as such. The descriptions are based on the formal
     definition as found in $(HTTP www.unicode.org/versions/Unicode6.2.0/ch03.pdf,
     chapter three of The Unicode Standard Core Specification.)
     )
     $(P $(DEF Abstract character) A unit of information used for the organization,
         control, or representation of textual data.
         Note that:
         $(UL
             $(LI When representing data, the nature of that data
                 is generally symbolic as opposed to some other
                 kind of data (for example, visual).
             )
              $(LI An abstract character has no concrete form
                 and should not be confused with a $(S_LINK Glyph, glyph).
             )
             $(LI An abstract character does not necessarily
                 correspond to what a user thinks of as a “character”
                 and should not be confused with a $(LREF Grapheme).
             )
             $(LI The abstract characters encoded (see Encoded character)
                 are known as Unicode abstract characters.
             )
             $(LI Abstract characters not directly
                 encoded by the Unicode Standard can often be
                 represented by the use of combining character sequences.
             )
         )
     )
     $(P $(DEF Canonical decomposition)
         The decomposition of a character or character sequence
         that results from recursively applying the canonical
         mappings found in the Unicode Character Database
         and these described in Conjoining Jamo Behavior
         (section 12 of
         $(HTTP www.unicode.org/uni2book/ch03.pdf, Unicode Conformance)).
     )
     $(P $(DEF Canonical composition)
         The precise definition of the Canonical composition
         is the algorithm as specified in $(HTTP www.unicode.org/uni2book/ch03.pdf,
         Unicode Conformance) section 11.
         Informally it's the process that does the reverse of the canonical
         decomposition with the addition of certain rules
         that e.g. prevent legacy characters from appearing in the composed result.
     )
     $(P $(DEF Canonical equivalent)
         Two character sequences are said to be canonical equivalents if
         their full canonical decompositions are identical.
     )
     $(P $(DEF Character) Typically differs by context.
         For the purpose of this documentation the term $(I character)
         implies $(I encoded character), that is, a code point having
         an assigned abstract character (a symbolic meaning).
     )
     $(P $(DEF Code point) Any value in the Unicode codespace;
         that is, the range of integers from 0 to 10FFFF (hex).
         Not all code points are assigned to encoded characters.
     )
     $(P $(DEF Code unit) The minimal bit combination that can represent
         a unit of encoded text for processing or interchange.
         Depending on the encoding this could be:
         8-bit code units in the UTF-8 ($(D char)),
         16-bit code units in the UTF-16 ($(D wchar)),
         and 32-bit code units in the UTF-32 ($(D dchar)).
         $(I Note that in UTF-32, a code unit is a code point
         and is represented by the D $(D dchar) type.)
     )
     $(P $(DEF Combining character) A character with the General Category
         of Combining Mark(M).
         $(UL
             $(LI All characters with non-zero canonical combining class
             are combining characters, but the reverse is not the case:
             there are combining characters with a zero combining class.
             )
             $(LI These characters are not normally used in isolation
             unless they are being described. They include such characters
             as accents, diacritics, Hebrew points, Arabic vowel signs,
             and Indic matras.
             )
         )
     )
     $(P $(DEF Combining class)
         A numerical value used by the Unicode Canonical Ordering Algorithm
         to determine which sequences of combining marks are to be
         considered canonically equivalent and  which are not.
     )
     $(P $(DEF Compatibility decomposition)
         The decomposition of a character or character sequence that results
         from recursively applying both the compatibility mappings and
         the canonical mappings found in the Unicode Character Database, and those
         described in Conjoining Jamo Behavior no characters
         can be further decomposed.
     )
     $(P $(DEF Compatibility equivalent)
         Two character sequences are said to be compatibility
         equivalents if their full compatibility decompositions are identical.
     )
     $(P $(DEF Encoded character) An association (or mapping)
         between an abstract character and a code point.
     )
     $(P $(DEF Glyph) The actual, concrete image of a glyph representation
         having been rasterized or otherwise imaged onto some display surface.
     )
     $(P $(DEF Grapheme base) A character with the property
         Grapheme_Base, or any standard Korean syllable block.
     )
     $(P $(DEF Grapheme cluster) Defined as the text between
         grapheme boundaries  as specified by Unicode Standard Annex #29,
         $(HTTP www.unicode.org/reports/tr29/, Unicode text segmentation).
         Important general properties of a grapheme:
         $(UL
             $(LI The grapheme cluster represents a horizontally segmentable
             unit of text, consisting of some grapheme base (which may
             consist of a Korean syllable) together with any number of
             nonspacing marks applied to it.
             )
             $(LI  A grapheme cluster typically starts with a grapheme base
             and then extends across any subsequent sequence of nonspacing marks.
             A grapheme cluster is most directly relevant to text rendering and
             processes such as cursor placement and text selection in editing,
             but may also be relevant to comparison and searching.
             )
             $(LI For many processes, a grapheme cluster behaves as if it was a
             single character with the same properties as its grapheme base.
             Effectively, nonspacing marks apply $(I graphically) to the base,
             but do not change its properties.
             )
         )
         $(P This module defines a number of primitives that work with graphemes:
         $(LREF Grapheme), $(LREF decodeGrapheme) and $(LREF graphemeStride).
         All of them are using $(I extended grapheme) boundaries
         as defined in the aforementioned standard annex.
         )
     )
     $(P $(DEF Nonspacing mark) A combining character with the
         General Category of Nonspacing Mark (Mn) or Enclosing Mark (Me).
     )
     $(P $(DEF Spacing mark) A combining character that is not a nonspacing mark.
     )
     $(SECTION Normalization
     )
     $(P The concepts of $(S_LINK Canonical equivalent, canonical equivalent)
         or $(S_LINK Compatibility equivalent, compatibility equivalent)
         characters in the Unicode Standard make it necessary to have a full, formal
         definition of equivalence for Unicode strings.
         String equivalence is determined by a process called normalization,
         whereby strings are converted into forms which are compared
         directly for identity. This is the primary goal of the normalization process,
         see the function $(LREF normalize) to convert into any of
         the four defined forms.
     )
     $(P A very important attribute of the Unicode Normalization Forms
         is that they must remain stable between versions of the Unicode Standard.
         A Unicode string normalized to a particular Unicode Normalization Form
         in one version of the standard is guaranteed to remain in that Normalization
         Form for implementations of future versions of the standard.
     )
     $(P The Unicode Standard specifies four normalization forms.
         Informally, two of these forms are defined by maximal decomposition
         of equivalent sequences, and two of these forms are defined
         by maximal $(I composition) of equivalent sequences.
             $(UL
             $(LI Normalization Form D (NFD): The $(S_LINK Canonical decomposition,
                 canonical decomposition) of a character sequence.)
             $(LI Normalization Form KD (NFKD): The $(S_LINK Compatibility decomposition,
                 compatibility decomposition) of a character sequence.)
             $(LI Normalization Form C (NFC): The canonical composition of the
                 $(S_LINK Canonical decomposition, canonical decomposition)
                 of a coded character sequence.)
             $(LI Normalization Form KC (NFKC): The canonical composition
             of the $(S_LINK Compatibility decomposition,
                 compatibility decomposition) of a character sequence)
             )
     )
     $(P The choice of the normalization form depends on the particular use case.
         NFC is the best form for general text, since it's more compatible with
         strings converted from legacy encodings. NFKC is the preferred form for
         identifiers, especially where there are security concerns. NFD and NFKD
         are the most useful for internal processing.
     )
     $(SECTION Construction of lookup tables
     )
     $(P The Unicode standard describes a set of algorithms that
         depend on having the ability to quickly look up various properties
         of a code point. Given the the codespace of about 1 million $(CODEPOINTS),
         it is not a trivial task to provide a space-efficient solution for
         the multitude of properties.
     )
     $(P Common approaches such as hash-tables or binary search over
         sorted code point intervals (as in $(LREF InversionList)) are insufficient.
         Hash-tables have enormous memory footprint and binary search
         over intervals is not fast enough for some heavy-duty algorithms.
     )
     $(P The recommended solution (see Unicode Implementation Guidelines)
         is using multi-stage tables that are an implementation of the
         $(HTTP en.wikipedia.org/wiki/Trie, Trie) data structure with integer
         keys and a fixed number of stages. For the remainder of the section
         this will be called a fixed trie. The following describes a particular
         implementation that is aimed for the speed of access at the expense
         of ideal size savings.
     )
     $(P Taking a 2-level Trie as an example the principle of operation is as follows.
         Split the number of bits in a key (code point, 21 bits) into 2 components
         (e.g. 15 and 8).  The first is the number of bits in the index of the trie
          and the other is number of bits in each page of the trie.
         The layout of the trie is then an array of size 2^^bits-of-index followed
         an array of memory chunks of size 2^^bits-of-page/bits-per-element.
     )
     $(P The number of pages is variable (but not less then 1)
         unlike the number of entries in the index. The slots of the index
         all have to contain a number of a page that is present. The lookup is then
         just a couple of operations - slice the upper bits,
         lookup an index for these, take a page at this index and use
         the lower bits as an offset within this page.

         Assuming that pages are laid out consequently
         in one array at $(D pages), the pseudo-code is:
     )
     ---
     auto elemsPerPage = (2 ^^ bits_per_page) / Value.sizeOfInBits;
     pages[index[n >> bits_per_page]][n & (elemsPerPage - 1)];
     ---
     $(P Where if $(D elemsPerPage) is a power of 2 the whole process is
         a handful of simple instructions and 2 array reads. Subsequent levels
         of the trie are introduced by recursing on this notion - the index array
         is treated as values. The number of bits in index is then again
         split into 2 parts, with pages over 'current-index' and the new 'upper-index'.
     )

     $(P For completeness a level 1 trie is simply an array.
         The current implementation takes advantage of bit-packing values
         when the range is known to be limited in advance (such as $(D bool)).
         See also $(LREF BitPacked) for enforcing it manually.
         The major size advantage however comes from the fact
         that multiple $(B identical pages on every level are merged) by construction.
     )
     $(P The process of constructing a trie is more involved and is hidden from
         the user in a form of the convenience functions $(LREF codepointTrie),
         $(LREF codepointSetTrie) and the even more convenient $(LREF toTrie).
         In general a set or built-in AA with $(D dchar) type
         can be turned into a trie. The trie object in this module
         is read-only (immutable); it's effectively frozen after construction.
     )
     $(SECTION Unicode properties
     )
     $(P This is a full list of Unicode properties accessible through $(LREF unicode)
         with specific helpers per category nested within. Consult the
         $(HTTP www.unicode.org/cldr/utility/properties.jsp, CLDR utility)
         when in doubt about the contents of a particular set.
     )
     $(P General category sets listed below are only accessible with the
         $(LREF unicode) shorthand accessor.)
         $(BOOKTABLE $(B General category ),
              $(TR $(TH Abb.) $(TH Long form)
                 $(TH Abb.) $(TH Long form)$(TH Abb.) $(TH Long form))
             $(TR $(TD L) $(TD Letter)
                 $(TD Cn) $(TD Unassigned)  $(TD Po) $(TD Other_Punctuation))
             $(TR $(TD Ll) $(TD Lowercase_Letter)
                 $(TD Co) $(TD Private_Use) $(TD Ps) $(TD Open_Punctuation))
             $(TR $(TD Lm) $(TD Modifier_Letter)
                 $(TD Cs) $(TD Surrogate)   $(TD S) $(TD Symbol))
             $(TR $(TD Lo) $(TD Other_Letter)
                 $(TD N) $(TD Number)  $(TD Sc) $(TD Currency_Symbol))
             $(TR $(TD Lt) $(TD Titlecase_Letter)
               $(TD Nd) $(TD Decimal_Number)  $(TD Sk) $(TD Modifier_Symbol))
             $(TR $(TD Lu) $(TD Uppercase_Letter)
               $(TD Nl) $(TD Letter_Number)   $(TD Sm) $(TD Math_Symbol))
             $(TR $(TD M) $(TD Mark)
               $(TD No) $(TD Other_Number)    $(TD So) $(TD Other_Symbol))
             $(TR $(TD Mc) $(TD Spacing_Mark)
               $(TD P) $(TD Punctuation) $(TD Z) $(TD Separator))
             $(TR $(TD Me) $(TD Enclosing_Mark)
               $(TD Pc) $(TD Connector_Punctuation)   $(TD Zl) $(TD Line_Separator))
             $(TR $(TD Mn) $(TD Nonspacing_Mark)
               $(TD Pd) $(TD Dash_Punctuation)    $(TD Zp) $(TD Paragraph_Separator))
             $(TR $(TD C) $(TD Other)
               $(TD Pe) $(TD Close_Punctuation) $(TD Zs) $(TD Space_Separator))
             $(TR $(TD Cc) $(TD Control) $(TD Pf)
               $(TD Final_Punctuation)   $(TD -) $(TD Any))
             $(TR $(TD Cf) $(TD Format)
               $(TD Pi) $(TD Initial_Punctuation) $(TD -) $(TD ASCII))
     )
     $(P Sets for other commonly useful properties that are
         accessible with $(LREF unicode):)
         $(BOOKTABLE $(B Common binary properties),
             $(TR $(TH Name) $(TH Name) $(TH Name))
             $(TR $(TD Alphabetic)  $(TD Ideographic) $(TD Other_Uppercase))
             $(TR $(TD ASCII_Hex_Digit) $(TD IDS_Binary_Operator) $(TD Pattern_Syntax))
             $(TR $(TD Bidi_Control)    $(TD ID_Start)    $(TD Pattern_White_Space))
             $(TR $(TD Cased)   $(TD IDS_Trinary_Operator)    $(TD Quotation_Mark))
             $(TR $(TD Case_Ignorable)  $(TD Join_Control)    $(TD Radical))
             $(TR $(TD Dash)    $(TD Logical_Order_Exception) $(TD Soft_Dotted))
             $(TR $(TD Default_Ignorable_Code_Point)    $(TD Lowercase)   $(TD STerm))
             $(TR $(TD Deprecated)  $(TD Math)    $(TD Terminal_Punctuation))
             $(TR $(TD Diacritic)   $(TD Noncharacter_Code_Point) $(TD Unified_Ideograph))
             $(TR $(TD Extender)    $(TD Other_Alphabetic)    $(TD Uppercase))
             $(TR $(TD Grapheme_Base)   $(TD Other_Default_Ignorable_Code_Point)  $(TD Variation_Selector))
             $(TR $(TD Grapheme_Extend) $(TD Other_Grapheme_Extend)   $(TD White_Space))
             $(TR $(TD Grapheme_Link)   $(TD Other_ID_Continue)   $(TD XID_Continue))
             $(TR $(TD Hex_Digit)   $(TD Other_ID_Start)  $(TD XID_Start))
             $(TR $(TD Hyphen)  $(TD Other_Lowercase) )
             $(TR $(TD ID_Continue) $(TD Other_Math)  )
     )
     $(P Below is the table with block names accepted by $(LREF unicode.block).
         Note that the shorthand version $(LREF unicode) requires "In"
         to be prepended to the names of blocks so as to disambiguate
         scripts and blocks.
     )
     $(BOOKTABLE $(B Blocks),
         $(TR $(TD Aegean Numbers)    $(TD Ethiopic Extended) $(TD Mongolian))
         $(TR $(TD Alchemical Symbols)    $(TD Ethiopic Extended-A)   $(TD Musical Symbols))
         $(TR $(TD Alphabetic Presentation Forms) $(TD Ethiopic Supplement)   $(TD Myanmar))
         $(TR $(TD Ancient Greek Musical Notation)    $(TD General Punctuation)   $(TD Myanmar Extended-A))
         $(TR $(TD Ancient Greek Numbers) $(TD Geometric Shapes)  $(TD New Tai Lue))
         $(TR $(TD Ancient Symbols)   $(TD Georgian)  $(TD NKo))
         $(TR $(TD Arabic)    $(TD Georgian Supplement)   $(TD Number Forms))
         $(TR $(TD Arabic Extended-A) $(TD Glagolitic)    $(TD Ogham))
         $(TR $(TD Arabic Mathematical Alphabetic Symbols)    $(TD Gothic)    $(TD Ol Chiki))
         $(TR $(TD Arabic Presentation Forms-A)   $(TD Greek and Coptic)  $(TD Old Italic))
         $(TR $(TD Arabic Presentation Forms-B)   $(TD Greek Extended)    $(TD Old Persian))
         $(TR $(TD Arabic Supplement) $(TD Gujarati)  $(TD Old South Arabian))
         $(TR $(TD Armenian)  $(TD Gurmukhi)  $(TD Old Turkic))
         $(TR $(TD Arrows)    $(TD Halfwidth and Fullwidth Forms) $(TD Optical Character Recognition))
         $(TR $(TD Avestan)   $(TD Hangul Compatibility Jamo) $(TD Oriya))
         $(TR $(TD Balinese)  $(TD Hangul Jamo)   $(TD Osmanya))
         $(TR $(TD Bamum) $(TD Hangul Jamo Extended-A)    $(TD Phags-pa))
         $(TR $(TD Bamum Supplement)  $(TD Hangul Jamo Extended-B)    $(TD Phaistos Disc))
         $(TR $(TD Basic Latin)   $(TD Hangul Syllables)  $(TD Phoenician))
         $(TR $(TD Batak) $(TD Hanunoo)   $(TD Phonetic Extensions))
         $(TR $(TD Bengali)   $(TD Hebrew)    $(TD Phonetic Extensions Supplement))
         $(TR $(TD Block Elements)    $(TD High Private Use Surrogates)   $(TD Playing Cards))
         $(TR $(TD Bopomofo)  $(TD High Surrogates)   $(TD Private Use Area))
         $(TR $(TD Bopomofo Extended) $(TD Hiragana)  $(TD Rejang))
         $(TR $(TD Box Drawing)   $(TD Ideographic Description Characters)    $(TD Rumi Numeral Symbols))
         $(TR $(TD Brahmi)    $(TD Imperial Aramaic)  $(TD Runic))
         $(TR $(TD Braille Patterns)  $(TD Inscriptional Pahlavi) $(TD Samaritan))
         $(TR $(TD Buginese)  $(TD Inscriptional Parthian)    $(TD Saurashtra))
         $(TR $(TD Buhid) $(TD IPA Extensions)    $(TD Sharada))
         $(TR $(TD Byzantine Musical Symbols) $(TD Javanese)  $(TD Shavian))
         $(TR $(TD Carian)    $(TD Kaithi)    $(TD Sinhala))
         $(TR $(TD Chakma)    $(TD Kana Supplement)   $(TD Small Form Variants))
         $(TR $(TD Cham)  $(TD Kanbun)    $(TD Sora Sompeng))
         $(TR $(TD Cherokee)  $(TD Kangxi Radicals)   $(TD Spacing Modifier Letters))
         $(TR $(TD CJK Compatibility) $(TD Kannada)   $(TD Specials))
         $(TR $(TD CJK Compatibility Forms)   $(TD Katakana)  $(TD Sundanese))
         $(TR $(TD CJK Compatibility Ideographs)  $(TD Katakana Phonetic Extensions)  $(TD Sundanese Supplement))
         $(TR $(TD CJK Compatibility Ideographs Supplement)   $(TD Kayah Li)  $(TD Superscripts and Subscripts))
         $(TR $(TD CJK Radicals Supplement)   $(TD Kharoshthi)    $(TD Supplemental Arrows-A))
         $(TR $(TD CJK Strokes)   $(TD Khmer) $(TD Supplemental Arrows-B))
         $(TR $(TD CJK Symbols and Punctuation)   $(TD Khmer Symbols) $(TD Supplemental Mathematical Operators))
         $(TR $(TD CJK Unified Ideographs)    $(TD Lao)   $(TD Supplemental Punctuation))
         $(TR $(TD CJK Unified Ideographs Extension A)    $(TD Latin-1 Supplement)    $(TD Supplementary Private Use Area-A))
         $(TR $(TD CJK Unified Ideographs Extension B)    $(TD Latin Extended-A)  $(TD Supplementary Private Use Area-B))
         $(TR $(TD CJK Unified Ideographs Extension C)    $(TD Latin Extended Additional) $(TD Syloti Nagri))
         $(TR $(TD CJK Unified Ideographs Extension D)    $(TD Latin Extended-B)  $(TD Syriac))
         $(TR $(TD Combining Diacritical Marks)   $(TD Latin Extended-C)  $(TD Tagalog))
         $(TR $(TD Combining Diacritical Marks for Symbols)   $(TD Latin Extended-D)  $(TD Tagbanwa))
         $(TR $(TD Combining Diacritical Marks Supplement)    $(TD Lepcha)    $(TD Tags))
         $(TR $(TD Combining Half Marks)  $(TD Letterlike Symbols)    $(TD Tai Le))
         $(TR $(TD Common Indic Number Forms) $(TD Limbu) $(TD Tai Tham))
         $(TR $(TD Control Pictures)  $(TD Linear B Ideograms)    $(TD Tai Viet))
         $(TR $(TD Coptic)    $(TD Linear B Syllabary)    $(TD Tai Xuan Jing Symbols))
         $(TR $(TD Counting Rod Numerals) $(TD Lisu)  $(TD Takri))
         $(TR $(TD Cuneiform) $(TD Low Surrogates)    $(TD Tamil))
         $(TR $(TD Cuneiform Numbers and Punctuation) $(TD Lycian)    $(TD Telugu))
         $(TR $(TD Currency Symbols)  $(TD Lydian)    $(TD Thaana))
         $(TR $(TD Cypriot Syllabary) $(TD Mahjong Tiles) $(TD Thai))
         $(TR $(TD Cyrillic)  $(TD Malayalam) $(TD Tibetan))
         $(TR $(TD Cyrillic Extended-A)   $(TD Mandaic)   $(TD Tifinagh))
         $(TR $(TD Cyrillic Extended-B)   $(TD Mathematical Alphanumeric Symbols) $(TD Transport And Map Symbols))
         $(TR $(TD Cyrillic Supplement)   $(TD Mathematical Operators)    $(TD Ugaritic))
         $(TR $(TD Deseret)   $(TD Meetei Mayek)  $(TD Unified Canadian Aboriginal Syllabics))
         $(TR $(TD Devanagari)    $(TD Meetei Mayek Extensions)   $(TD Unified Canadian Aboriginal Syllabics Extended))
         $(TR $(TD Devanagari Extended)   $(TD Meroitic Cursive)  $(TD Vai))
         $(TR $(TD Dingbats)  $(TD Meroitic Hieroglyphs)  $(TD Variation Selectors))
         $(TR $(TD Domino Tiles)  $(TD Miao)  $(TD Variation Selectors Supplement))
         $(TR $(TD Egyptian Hieroglyphs)  $(TD Miscellaneous Mathematical Symbols-A)  $(TD Vedic Extensions))
         $(TR $(TD Emoticons) $(TD Miscellaneous Mathematical Symbols-B)  $(TD Vertical Forms))
         $(TR $(TD Enclosed Alphanumerics)    $(TD Miscellaneous Symbols) $(TD Yijing Hexagram Symbols))
         $(TR $(TD Enclosed Alphanumeric Supplement)  $(TD Miscellaneous Symbols and Arrows)  $(TD Yi Radicals))
         $(TR $(TD Enclosed CJK Letters and Months)   $(TD Miscellaneous Symbols And Pictographs) $(TD Yi Syllables))
         $(TR $(TD Enclosed Ideographic Supplement)   $(TD Miscellaneous Technical)   )
         $(TR $(TD Ethiopic)  $(TD Modifier Tone Letters) )
     )
     $(P Below is the table with script names accepted by $(LREF unicode.script)
         and by the shorthand version $(LREF unicode):)
         $(BOOKTABLE $(B Scripts),
             $(TR $(TD Arabic)  $(TD Hanunoo) $(TD Old_Italic))
             $(TR $(TD Armenian)    $(TD Hebrew)  $(TD Old_Persian))
             $(TR $(TD Avestan) $(TD Hiragana)    $(TD Old_South_Arabian))
             $(TR $(TD Balinese)    $(TD Imperial_Aramaic)    $(TD Old_Turkic))
             $(TR $(TD Bamum)   $(TD Inherited)   $(TD Oriya))
             $(TR $(TD Batak)   $(TD Inscriptional_Pahlavi)   $(TD Osmanya))
             $(TR $(TD Bengali) $(TD Inscriptional_Parthian)  $(TD Phags_Pa))
             $(TR $(TD Bopomofo)    $(TD Javanese)    $(TD Phoenician))
             $(TR $(TD Brahmi)  $(TD Kaithi)  $(TD Rejang))
             $(TR $(TD Braille) $(TD Kannada) $(TD Runic))
             $(TR $(TD Buginese)    $(TD Katakana)    $(TD Samaritan))
             $(TR $(TD Buhid)   $(TD Kayah_Li)    $(TD Saurashtra))
             $(TR $(TD Canadian_Aboriginal) $(TD Kharoshthi)  $(TD Sharada))
             $(TR $(TD Carian)  $(TD Khmer)   $(TD Shavian))
             $(TR $(TD Chakma)  $(TD Lao) $(TD Sinhala))
             $(TR $(TD Cham)    $(TD Latin)   $(TD Sora_Sompeng))
             $(TR $(TD Cherokee)    $(TD Lepcha)  $(TD Sundanese))
             $(TR $(TD Common)  $(TD Limbu)   $(TD Syloti_Nagri))
             $(TR $(TD Coptic)  $(TD Linear_B)    $(TD Syriac))
             $(TR $(TD Cuneiform)   $(TD Lisu)    $(TD Tagalog))
             $(TR $(TD Cypriot) $(TD Lycian)  $(TD Tagbanwa))
             $(TR $(TD Cyrillic)    $(TD Lydian)  $(TD Tai_Le))
             $(TR $(TD Deseret) $(TD Malayalam)   $(TD Tai_Tham))
             $(TR $(TD Devanagari)  $(TD Mandaic) $(TD Tai_Viet))
             $(TR $(TD Egyptian_Hieroglyphs)    $(TD Meetei_Mayek)    $(TD Takri))
             $(TR $(TD Ethiopic)    $(TD Meroitic_Cursive)    $(TD Tamil))
             $(TR $(TD Georgian)    $(TD Meroitic_Hieroglyphs)    $(TD Telugu))
             $(TR $(TD Glagolitic)  $(TD Miao)    $(TD Thaana))
             $(TR $(TD Gothic)  $(TD Mongolian)   $(TD Thai))
             $(TR $(TD Greek)   $(TD Myanmar) $(TD Tibetan))
             $(TR $(TD Gujarati)    $(TD New_Tai_Lue) $(TD Tifinagh))
             $(TR $(TD Gurmukhi)    $(TD Nko) $(TD Ugaritic))
             $(TR $(TD Han) $(TD Ogham)   $(TD Vai))
             $(TR $(TD Hangul)  $(TD Ol_Chiki)    $(TD Yi))
     )
     $(P Below is the table of names accepted by $(LREF unicode.hangulSyllableType).)
         $(BOOKTABLE $(B Hangul syllable type),
             $(TR $(TH Abb.) $(TH Long form))
             $(TR $(TD L)   $(TD Leading_Jamo))
             $(TR $(TD LV)  $(TD LV_Syllable))
             $(TR $(TD LVT) $(TD LVT_Syllable) )
             $(TR $(TD T)   $(TD Trailing_Jamo))
             $(TR $(TD V)   $(TD Vowel_Jamo))
     )
     References:
         $(HTTP www.digitalmars.com/d/ascii-table.html, ASCII Table),
         $(HTTP en.wikipedia.org/wiki/Unicode, Wikipedia),
         $(HTTP www.unicode.org, The Unicode Consortium),
         $(HTTP www.unicode.org/reports/tr15/, Unicode normalization forms),
         $(HTTP www.unicode.org/reports/tr29/, Unicode text segmentation)
         $(HTTP www.unicode.org/uni2book/ch05.pdf,
             Unicode Implementation Guidelines)
         $(HTTP www.unicode.org/uni2book/ch03.pdf,
             Unicode Conformance)
     Trademarks:
         Unicode(tm) is a trademark of Unicode, Inc.

     Copyright: Copyright 2013 -
     License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
     Authors:   Dmitry Olshansky
     Source:    $(PHOBOSSRC std/_uni.d)
     Standards: $(HTTP www.unicode.org/versions/Unicode6.2.0/, Unicode v6.2)

 Macros:

 SECTION = <h3><a id="$1">$0</a></h3>
 DEF = <div><a id="$1"><i>$0</i></a></div>
 S_LINK = <a href="#$1">$+</a>
 CODEPOINT = $(S_LINK Code point, code point)
 CODEPOINTS = $(S_LINK Code point, code points)
 CHARACTER = $(S_LINK Character, character)
 CHARACTERS = $(S_LINK Character, characters)
 CLUSTER = $(S_LINK Grapheme cluster, grapheme cluster)
 +/
 module std.uni;

 import std.meta; // AliasSeq
 import std.range.primitives; // back, ElementEncodingType, ElementType, empty,
     // front, isForwardRange, isInputRange, isRandomAccessRange, popFront, put,
     // save
 import std.traits; // isConvertibleToString, isIntegral, isSomeChar,
     // isSomeString, Unqual
 import std.exception;// : enforce;
 import core.memory; //: pureMalloc, pureRealloc, pureFree;
 import core.exception; // : onOutOfMemoryError;
 static import std.ascii;
 // debug = std_uni;

 debug(std_uni) import std.stdio; // writefln, writeln

 private:

 version (unittest)
 {
 private:
     struct TestAliasedString
     {
         string get() @safe @nogc pure nothrow { return _s; }
         alias get this;
         @disable this(this);
         string _s;
     }

     bool testAliasedString(alias func, Args...)(string s, Args args)
     {
         import std.algorithm.comparison : equal;
         auto a = func(TestAliasedString(s), args);
         auto b = func(s, args);
         static if (is(typeof(equal(a, b))))
         {
             // For ranges, compare contents instead of object identity.
             return equal(a, b);
         }
         else
         {
             return a == b;
         }
     }
 }

 void copyBackwards(T,U)(T[] src, U[] dest)
 {
     assert(src.length == dest.length);
     for (size_t i=src.length; i-- > 0; )
         dest[i] = src[i];
 }

 void copyForward(T,U)(T[] src, U[] dest)
 {
     assert(src.length == dest.length);
     for (size_t i=0; i<src.length; i++)
         dest[i] = src[i];
 }

 // TODO: update to reflect all major CPUs supporting unaligned reads
 version (X86)
     enum hasUnalignedReads = true;
 else version (X86_64)
     enum hasUnalignedReads = true;
 else version (SystemZ)
     enum hasUnalignedReads = true;
 else
     enum hasUnalignedReads = false; // better be safe then sorry

 public enum dchar lineSep = '\u2028'; /// Constant $(CODEPOINT) (0x2028) - line separator.
 public enum dchar paraSep = '\u2029'; /// Constant $(CODEPOINT) (0x2029) - paragraph separator.
 public enum dchar nelSep  = '\u0085'; /// Constant $(CODEPOINT) (0x0085) - next line.

 // test the intro example
 @safe unittest
 {
     import std.algorithm.searching : find;
     // initialize code point sets using script/block or property name
     // set contains code points from both scripts.
     auto set = unicode("Cyrillic") | unicode("Armenian");
     // or simpler and statically-checked look
     auto ascii = unicode.ASCII;
     auto currency = unicode.Currency_Symbol;

     // easy set ops
     auto a = set & ascii;
     assert(a.empty); // as it has no intersection with ascii
     a = set | ascii;
     auto b = currency - a; // subtract all ASCII, Cyrillic and Armenian

     // some properties of code point sets
     assert(b.length > 45); // 46 items in Unicode 6.1, even more in 6.2
     // testing presence of a code point in a set
     // is just fine, it is O(logN)
     assert(!b['$']);
     assert(!b['\u058F']); // Armenian dram sign
     assert(b['¥']);

     // building fast lookup tables, these guarantee O(1) complexity
     // 1-level Trie lookup table essentially a huge bit-set ~262Kb
     auto oneTrie = toTrie!1(b);
     // 2-level far more compact but typically slightly slower
     auto twoTrie = toTrie!2(b);
     // 3-level even smaller, and a bit slower yet
     auto threeTrie = toTrie!3(b);
     assert(oneTrie['£']);
     assert(twoTrie['£']);
     assert(threeTrie['£']);

     // build the trie with the most sensible trie level
     // and bind it as a functor
     auto cyrillicOrArmenian = toDelegate(set);
     auto balance = find!(cyrillicOrArmenian)("Hello ընկեր!");
     assert(balance == "ընկեր!");
     // compatible with bool delegate(dchar)
     bool delegate(dchar) bindIt = cyrillicOrArmenian;

     // Normalization
     string s = "Plain ascii (and not only), is always normalized!";
     assert(s is normalize(s));// is the same string

     string nonS = "A\u0308ffin"; // A ligature
     auto nS = normalize(nonS); // to NFC, the W3C endorsed standard
     assert(nS == "Äffin");
     assert(nS != nonS);
     string composed = "Äffin";

     assert(normalize!NFD(composed) == "A\u0308ffin");
     // to NFKD, compatibility decomposition useful for fuzzy matching/searching
     assert(normalize!NFKD("2¹⁰") == "210");
 }

 enum lastDchar = 0x10FFFF;

 auto force(T, F)(F from)
 if (isIntegral!T && !is(T == F))
 {
     assert(from <= T.max && from >= T.min);
     return cast(T) from;
 }

 auto force(T, F)(F from)
 if (isBitPacked!T && !is(T == F))
 {
     assert(from <= 2^^bitSizeOf!T-1);
     return T(cast(TypeOfBitPacked!T) from);
 }

 auto force(T, F)(F from)
 if (is(T == F))
 {
     return from;
 }

 // repeat X times the bit-pattern in val assuming it's length is 'bits'
 size_t replicateBits(size_t times, size_t bits)(size_t val) @safe pure nothrow @nogc
 {
     static if (times == 1)
         return val;
     else static if (bits == 1)
     {
         static if (times == size_t.sizeof*8)
             return val ? size_t.max : 0;
         else
             return val ? (1 << times)-1 : 0;
     }
     else static if (times % 2)
         return (replicateBits!(times-1, bits)(val)<<bits) | val;
     else
         return replicateBits!(times/2, bits*2)((val << bits) | val);
 }

 @safe pure nothrow @nogc unittest // for replicate
 {
     import std.algorithm.iteration : sum, map;
     import std.range : iota;
     size_t m = 0b111;
     size_t m2 = 0b01;
     foreach (i; AliasSeq!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
     {
         assert(replicateBits!(i, 3)(m)+1 == (1<<(3*i)));
         assert(replicateBits!(i, 2)(m2) == iota(0, i).map!"2^^(2*a)"().sum());
     }
 }

 // multiple arrays squashed into one memory block
 struct MultiArray(Types...)
 {
     import std.range.primitives : isOutputRange;
     this(size_t[] sizes...) @safe pure nothrow
     {
         assert(dim == sizes.length);
         size_t full_size;
         foreach (i, v; Types)
         {
             full_size += spaceFor!(bitSizeOf!v)(sizes[i]);
             sz[i] = sizes[i];
             static if (i >= 1)
                 offsets[i] = offsets[i-1] +
                     spaceFor!(bitSizeOf!(Types[i-1]))(sizes[i-1]);
         }

         storage = new size_t[full_size];
     }

     this(const(size_t)[] raw_offsets,
         const(size_t)[] raw_sizes, const(size_t)[] data)const @safe pure nothrow @nogc
     {
         offsets[] = raw_offsets[];
         sz[] = raw_sizes[];
         storage = data;
     }

     @property auto slice(size_t n)()inout pure nothrow @nogc
     {
         auto ptr = raw_ptr!n;
         return packedArrayView!(Types[n])(ptr, sz[n]);
     }

     @property auto ptr(size_t n)()inout pure nothrow @nogc
     {
         auto ptr = raw_ptr!n;
         return inout(PackedPtr!(Types[n]))(ptr);
     }

     template length(size_t n)
     {
         @property size_t length()const @safe pure nothrow @nogc{ return sz[n]; }

         @property void length(size_t new_size)
         {
             if (new_size > sz[n])
             {// extend
                 size_t delta = (new_size - sz[n]);
                 sz[n] += delta;
                 delta = spaceFor!(bitSizeOf!(Types[n]))(delta);
                 storage.length +=  delta;// extend space at end
                 // raw_slice!x must follow resize as it could be moved!
                 // next stmts move all data past this array, last-one-goes-first
                 static if (n != dim-1)
                 {
                     auto start = raw_ptr!(n+1);
                     // len includes delta
                     size_t len = (storage.ptr+storage.length-start);

                     copyBackwards(start[0 .. len-delta], start[delta .. len]);

                     start[0 .. delta] = 0;
                     // offsets are used for raw_slice, ptr etc.
                     foreach (i; n+1 .. dim)
                         offsets[i] += delta;
                 }
             }
             else if (new_size < sz[n])
             {// shrink
                 size_t delta = (sz[n] - new_size);
                 sz[n] -= delta;
                 delta = spaceFor!(bitSizeOf!(Types[n]))(delta);
                 // move all data past this array, forward direction
                 static if (n != dim-1)
                 {
                     auto start = raw_ptr!(n+1);
                     size_t len = (storage.ptr+storage.length-start);
                     copyForward(start[0 .. len-delta], start[delta .. len]);

                     // adjust offsets last, they affect raw_slice
                     foreach (i; n+1 .. dim)
                         offsets[i] -= delta;
                 }
                 storage.length -= delta;
             }
             // else - NOP
         }
     }

     @property size_t bytes(size_t n=size_t.max)() const @safe
     {
         static if (n == size_t.max)
             return storage.length*size_t.sizeof;
         else static if (n != Types.length-1)
             return (raw_ptr!(n+1)-raw_ptr!n)*size_t.sizeof;
         else
             return (storage.ptr+storage.length - raw_ptr!n)*size_t.sizeof;
     }

     void store(OutRange)(scope OutRange sink) const
         if (isOutputRange!(OutRange, char))
     {
         import std.format : formattedWrite;
         formattedWrite(sink, "[%( 0x%x, %)]", offsets[]);
         formattedWrite(sink, ", [%( 0x%x, %)]", sz[]);
         formattedWrite(sink, ", [%( 0x%x, %)]", storage);
     }

 private:
     import std.meta : staticMap;
     @property auto raw_ptr(size_t n)()inout pure nothrow @nogc
     {
         static if (n == 0)
             return storage.ptr;
         else
         {
             return storage.ptr+offsets[n];
         }
     }
     enum dim = Types.length;
     size_t[dim] offsets;// offset for level x
     size_t[dim] sz;// size of level x
     alias bitWidth = staticMap!(bitSizeOf, Types);
     size_t[] storage;
 }

 @system unittest
 {
     import std.conv : text;
     enum dg = (){
         // sizes are:
         // lvl0: 3, lvl1 : 2, lvl2: 1
         auto m = MultiArray!(int, ubyte, int)(3,2,1);

         static void check(size_t k, T)(ref T m, int n)
         {
             foreach (i; 0 .. n)
                 assert(m.slice!(k)[i] == i+1, text("level:",i," : ",m.slice!(k)[0 .. n]));
         }

         static void checkB(size_t k, T)(ref T m, int n)
         {
             foreach (i; 0 .. n)
                 assert(m.slice!(k)[i] == n-i, text("level:",i," : ",m.slice!(k)[0 .. n]));
         }

         static void fill(size_t k, T)(ref T m, int n)
         {
             foreach (i; 0 .. n)
                 m.slice!(k)[i] = force!ubyte(i+1);
         }

         static void fillB(size_t k, T)(ref T m, int n)
         {
             foreach (i; 0 .. n)
                 m.slice!(k)[i] = force!ubyte(n-i);
         }

         m.length!1 = 100;
         fill!1(m, 100);
         check!1(m, 100);

         m.length!0 = 220;
         fill!0(m, 220);
         check!1(m, 100);
         check!0(m, 220);

         m.length!2 = 17;
         fillB!2(m, 17);
         checkB!2(m, 17);
         check!0(m, 220);
         check!1(m, 100);

         m.length!2 = 33;
         checkB!2(m, 17);
         fillB!2(m, 33);
         checkB!2(m, 33);
         check!0(m, 220);
         check!1(m, 100);

         m.length!1 = 195;
         fillB!1(m, 195);
         checkB!1(m, 195);
         checkB!2(m, 33);
         check!0(m, 220);

         auto marr = MultiArray!(BitPacked!(uint, 4), BitPacked!(uint, 6))(20, 10);
         marr.length!0 = 15;
         marr.length!1 = 30;
         fill!1(marr, 30);
         fill!0(marr, 15);
         check!1(marr, 30);
         check!0(marr, 15);
         return 0;
     };
     enum ct = dg();
     auto rt = dg();
 }

 @system unittest
 {// more bitpacking tests
     import std.conv : text;

     alias Bitty =
       MultiArray!(BitPacked!(size_t, 3)
                 , BitPacked!(size_t, 4)
                 , BitPacked!(size_t, 3)
                 , BitPacked!(size_t, 6)
                 , bool);
     alias fn1 = sliceBits!(13, 16);
     alias fn2 = sliceBits!( 9, 13);
     alias fn3 = sliceBits!( 6,  9);
     alias fn4 = sliceBits!( 0,  6);
     static void check(size_t lvl, MA)(ref MA arr){
         for (size_t i = 0; i< arr.length!lvl; i++)
             assert(arr.slice!(lvl)[i] == i, text("Mismatch on lvl ", lvl, " idx ", i, " value: ", arr.slice!(lvl)[i]));
     }

     static void fillIdx(size_t lvl, MA)(ref MA arr){
         for (size_t i = 0; i< arr.length!lvl; i++)
             arr.slice!(lvl)[i] = i;
     }
     Bitty m1;

     m1.length!4 = 10;
     m1.length!3 = 2^^6;
     m1.length!2 = 2^^3;
     m1.length!1 = 2^^4;
     m1.length!0 = 2^^3;

     m1.length!4 = 2^^16;

     for (size_t i = 0; i< m1.length!4; i++)
         m1.slice!(4)[i] = i % 2;

     fillIdx!1(m1);
     check!1(m1);
     fillIdx!2(m1);
     check!2(m1);
     fillIdx!3(m1);
     check!3(m1);
     fillIdx!0(m1);
     check!0(m1);
     check!3(m1);
     check!2(m1);
     check!1(m1);
     for (size_t i=0; i < 2^^16; i++)
     {
         m1.slice!(4)[i] = i % 2;
         m1.slice!(0)[fn1(i)] = fn1(i);
         m1.slice!(1)[fn2(i)] = fn2(i);
         m1.slice!(2)[fn3(i)] = fn3(i);
         m1.slice!(3)[fn4(i)] = fn4(i);
     }
     for (size_t i=0; i < 2^^16; i++)
     {
         assert(m1.slice!(4)[i] == i % 2);
         assert(m1.slice!(0)[fn1(i)] == fn1(i));
         assert(m1.slice!(1)[fn2(i)] == fn2(i));
         assert(m1.slice!(2)[fn3(i)] == fn3(i));
         assert(m1.slice!(3)[fn4(i)] == fn4(i));
     }
 }

 size_t spaceFor(size_t _bits)(size_t new_len) @safe pure nothrow @nogc
 {
     import std.math : nextPow2;
     enum bits = _bits == 1 ? 1 : nextPow2(_bits - 1);// see PackedArrayView
     static if (bits > 8*size_t.sizeof)
     {
         static assert(bits % (size_t.sizeof*8) == 0);
         return new_len * bits/(8*size_t.sizeof);
     }
     else
     {
         enum factor = size_t.sizeof*8/bits;
         return (new_len+factor-1)/factor; // rounded up
     }
 }

 template isBitPackableType(T)
 {
     enum isBitPackableType = isBitPacked!T
         || isIntegral!T || is(T == bool) || isSomeChar!T;
 }

 //============================================================================
 template PackedArrayView(T)
 if ((is(T dummy == BitPacked!(U, sz), U, size_t sz)
     && isBitPackableType!U) || isBitPackableType!T)
 {
     import std.math : nextPow2;
     private enum bits = bitSizeOf!T;
     alias PackedArrayView = PackedArrayViewImpl!(T, bits > 1 ? nextPow2(bits - 1) : 1);
 }

 //unsafe and fast access to a chunk of RAM as if it contains packed values
 template PackedPtr(T)
 if ((is(T dummy == BitPacked!(U, sz), U, size_t sz)
     && isBitPackableType!U) || isBitPackableType!T)
 {
     import std.math : nextPow2;
     private enum bits = bitSizeOf!T;
     alias PackedPtr = PackedPtrImpl!(T, bits > 1 ? nextPow2(bits - 1) : 1);
 }

 struct PackedPtrImpl(T, size_t bits)
 {
 pure nothrow:
     static assert(isPow2OrZero(bits));

     this(inout(size_t)* ptr)inout @safe @nogc
     {
         origin = ptr;
     }

     private T simpleIndex(size_t n) inout
     {
         immutable q = n / factor;
         immutable r = n % factor;
         return cast(T)((origin[q] >> bits*r) & mask);
     }

     private void simpleWrite(TypeOfBitPacked!T val, size_t n)
     in
     {
         static if (isIntegral!T)
             assert(val <= mask);
     }
     body
     {
         immutable q = n / factor;
         immutable r = n % factor;
         immutable tgt_shift = bits*r;
         immutable word = origin[q];
         origin[q] = (word & ~(mask << tgt_shift))
             | (cast(size_t) val << tgt_shift);
     }

     static if (factor == bytesPerWord// can safely pack by byte
          || factor == 1 // a whole word at a time
          || ((factor == bytesPerWord/2 || factor == bytesPerWord/4)
                 && hasUnalignedReads)) // this needs unaligned reads
     {
         static if (factor == bytesPerWord)
             alias U = ubyte;
         else static if (factor == bytesPerWord/2)
             alias U = ushort;
         else static if (factor == bytesPerWord/4)
             alias U = uint;
         else static if (size_t.sizeof == 8 && factor == bytesPerWord/8)
             alias U = ulong;

         T opIndex(size_t idx) inout
         {
             T ret;
             version (LittleEndian)
                 ret = __ctfe ? simpleIndex(idx) :
                     cast(inout(T))(cast(U*) origin)[idx];
             else
                 ret = simpleIndex(idx);
             return ret;
         }

         static if (isBitPacked!T) // lack of user-defined implicit conversion
         {
             void opIndexAssign(T val, size_t idx)
             {
                 return opIndexAssign(cast(TypeOfBitPacked!T) val, idx);
             }
         }

         void opIndexAssign(TypeOfBitPacked!T val, size_t idx)
         {
             version (LittleEndian)
             {
                 if (__ctfe)
                     simpleWrite(val, idx);
                 else
                     (cast(U*) origin)[idx] = cast(U) val;
             }
             else
                 simpleWrite(val, idx);
         }
     }
     else
     {
         T opIndex(size_t n) inout
         {
             return simpleIndex(n);
         }

         static if (isBitPacked!T) // lack of user-defined implicit conversion
         {
             void opIndexAssign(T val, size_t idx)
             {
                 return opIndexAssign(cast(TypeOfBitPacked!T) val, idx);
             }
         }

         void opIndexAssign(TypeOfBitPacked!T val, size_t n)
         {
             return simpleWrite(val, n);
         }
     }

 private:
     // factor - number of elements in one machine word
     enum factor = size_t.sizeof*8/bits, mask = 2^^bits-1;
     enum bytesPerWord =  size_t.sizeof;
     size_t* origin;
 }

 // data is packed only by power of two sized packs per word,
 // thus avoiding mul/div overhead at the cost of ultimate packing
 // this construct doesn't own memory, only provides access, see MultiArray for usage
 struct PackedArrayViewImpl(T, size_t bits)
 {
 pure nothrow:

     this(inout(size_t)* origin, size_t offset, size_t items) inout @safe
     {
         ptr = inout(PackedPtr!(T))(origin);
         ofs = offset;
         limit = items;
     }

     bool zeros(size_t s, size_t e)
     in
     {
         assert(s <= e);
     }
     body
     {
         s += ofs;
         e += ofs;
         immutable pad_s = roundUp(s);
         if ( s >= e)
         {
             foreach (i; s .. e)
                 if (ptr[i])
                     return false;
             return true;
         }
         immutable pad_e = roundDown(e);
         size_t i;
         for (i=s; i<pad_s; i++)
             if (ptr[i])
                 return false;
         // all in between is x*factor elements
         for (size_t j=i/factor; i<pad_e; i+=factor, j++)
             if (ptr.origin[j])
                 return false;
         for (; i<e; i++)
             if (ptr[i])
                 return false;
         return true;
     }

     T opIndex(size_t idx) inout
     in
     {
         assert(idx < limit);
     }
     body
     {
         return ptr[ofs + idx];
     }

     static if (isBitPacked!T) // lack of user-defined implicit conversion
     {
         void opIndexAssign(T val, size_t idx)
         {
             return opIndexAssign(cast(TypeOfBitPacked!T) val, idx);
         }
     }

     void opIndexAssign(TypeOfBitPacked!T val, size_t idx)
     in
     {
         assert(idx < limit);
     }
     body
     {
         ptr[ofs + idx] = val;
     }

     static if (isBitPacked!T) // lack of user-defined implicit conversions
     {
         void opSliceAssign(T val, size_t start, size_t end)
         {
             opSliceAssign(cast(TypeOfBitPacked!T) val, start, end);
         }
     }

     void opSliceAssign(TypeOfBitPacked!T val, size_t start, size_t end)
     in
     {
         assert(start <= end);
         assert(end <= limit);
     }
     body
     {
         // account for ofsetted view
         start += ofs;
         end += ofs;
         // rounded to factor granularity
         immutable pad_start = roundUp(start);// rounded up
         if (pad_start >= end) //rounded up >= then end of slice
         {
             //nothing to gain, use per element assignment
             foreach (i; start .. end)
                 ptr[i] = val;
             return;
         }
         immutable pad_end = roundDown(end); // rounded down
         size_t i;
         for (i=start; i<pad_start; i++)
             ptr[i] = val;
         // all in between is x*factor elements
         if (pad_start != pad_end)
         {
             immutable repval = replicateBits!(factor, bits)(val);
             for (size_t j=i/factor; i<pad_end; i+=factor, j++)
                 ptr.origin[j] = repval;// so speed it up by factor
         }
         for (; i<end; i++)
             ptr[i] = val;
     }

     auto opSlice(size_t from, size_t to)inout
     in
     {
         assert(from <= to);
         assert(ofs + to <= limit);
     }
     body
     {
         return typeof(this)(ptr.origin, ofs + from, to - from);
     }

     auto opSlice(){ return opSlice(0, length); }

     bool opEquals(T)(auto ref T arr) const
     {
         if (limit != arr.limit)
            return false;
         size_t s1 = ofs, s2 = arr.ofs;
         size_t e1 = s1 + limit, e2 = s2 + limit;
         if (s1 % factor == 0 && s2 % factor == 0 && length % factor == 0)
         {
             return ptr.origin[s1/factor .. e1/factor]
                 == arr.ptr.origin[s2/factor .. e2/factor];
         }
         for (size_t i=0;i<limit; i++)
             if (this[i] != arr[i])
                 return false;
         return true;
     }

     @property size_t length()const{ return limit; }

 private:
     auto roundUp()(size_t val){ return (val+factor-1)/factor*factor; }
     auto roundDown()(size_t val){ return val/factor*factor; }
     // factor - number of elements in one machine word
     enum factor = size_t.sizeof*8/bits;
     PackedPtr!(T) ptr;
     size_t ofs, limit;
 }


 private struct SliceOverIndexed(T)
 {
     enum assignableIndex = is(typeof((){ T.init[0] = Item.init; }));
     enum assignableSlice = is(typeof((){ T.init[0 .. 0] = Item.init; }));
     auto opIndex(size_t idx)const
     in
     {
         assert(idx < to - from);
     }
     body
     {
         return (*arr)[from+idx];
     }

     static if (assignableIndex)
     void opIndexAssign(Item val, size_t idx)
     in
     {
         assert(idx < to - from);
     }
     body
     {
        (*arr)[from+idx] = val;
     }

     auto opSlice(size_t a, size_t b)
     {
         return typeof(this)(from+a, from+b, arr);
     }

     // static if (assignableSlice)
     void opSliceAssign(T)(T val, size_t start, size_t end)
     {
         (*arr)[start+from .. end+from] = val;
     }

     auto opSlice()
     {
         return typeof(this)(from, to, arr);
     }

     @property size_t length()const { return to-from;}

     auto opDollar()const { return length; }

     @property bool empty()const { return from == to; }

     @property auto front()const { return (*arr)[from]; }

     static if (assignableIndex)
     @property void front(Item val) { (*arr)[from] = val; }

     @property auto back()const { return (*arr)[to-1]; }

     static if (assignableIndex)
     @property void back(Item val) { (*arr)[to-1] = val; }

     @property auto save() inout { return this; }

     void popFront() {   from++; }

     void popBack() {    to--; }

     bool opEquals(T)(auto ref T arr) const
     {
         if (arr.length != length)
             return false;
         for (size_t i=0; i <length; i++)
             if (this[i] != arr[i])
                 return false;
         return true;
     }
 private:
     alias Item = typeof(T.init[0]);
     size_t from, to;
     T* arr;
 }

 static assert(isRandomAccessRange!(SliceOverIndexed!(int[])));

 SliceOverIndexed!(const(T)) sliceOverIndexed(T)(size_t a, size_t b, const(T)* x)
 if (is(Unqual!T == T))
 {
     return SliceOverIndexed!(const(T))(a, b, x);
 }

 // BUG? inout is out of reach
 //...SliceOverIndexed.arr only parameters or stack based variables can be inout
 SliceOverIndexed!T sliceOverIndexed(T)(size_t a, size_t b, T* x)
 if (is(Unqual!T == T))
 {
     return SliceOverIndexed!T(a, b, x);
 }

 @system unittest
 {
     int[] idxArray = [2, 3, 5, 8, 13];
     auto sliced = sliceOverIndexed(0, idxArray.length, &idxArray);

     assert(!sliced.empty);
     assert(sliced.front == 2);
     sliced.front = 1;
     assert(sliced.front == 1);
     assert(sliced.back == 13);
     sliced.popFront();
     assert(sliced.front == 3);
     assert(sliced.back == 13);
     sliced.back = 11;
     assert(sliced.back == 11);
     sliced.popBack();

     assert(sliced.front == 3);
     assert(sliced[$-1] == 8);
     sliced = sliced[];
     assert(sliced[0] == 3);
     assert(sliced.back == 8);
     sliced = sliced[1..$];
     assert(sliced.front == 5);
     sliced = sliced[0..$-1];
     assert(sliced[$-1] == 5);

     int[] other = [2, 5];
     assert(sliced[] == sliceOverIndexed(1, 2, &other));
     sliceOverIndexed(0, 2, &idxArray)[0 .. 2] = -1;
     assert(idxArray[0 .. 2] == [-1, -1]);
     uint[] nullArr = null;
     auto nullSlice = sliceOverIndexed(0, 0, &idxArray);
     assert(nullSlice.empty);
 }

 private auto packedArrayView(T)(inout(size_t)* ptr, size_t items) @trusted pure nothrow
 {
     return inout(PackedArrayView!T)(ptr, 0, items);
 }


 //============================================================================
 // Partially unrolled binary search using Shar's method
 //============================================================================

 string genUnrolledSwitchSearch(size_t size) @safe pure nothrow
 {
     import core.bitop : bsr;
     import std.array : replace;
     import std.conv : to;
     assert(isPow2OrZero(size));
     string code = `
     import core.bitop : bsr;
     auto power = bsr(m)+1;
     switch (power){`;
     size_t i = bsr(size);
     foreach_reverse (val; 0 .. bsr(size))
     {
         auto v = 2^^val;
         code ~= `
         case pow:
             if (pred(range[idx+m], needle))
                 idx +=  m;
             goto case;
         `.replace("m", to!string(v))
         .replace("pow", to!string(i));
         i--;
     }
     code ~= `
         case 0:
             if (pred(range[idx], needle))
                 idx += 1;
             goto default;
         `;
     code ~= `
         default:
     }`;
     return code;
 }

 bool isPow2OrZero(size_t sz) @safe pure nothrow @nogc
 {
     // See also: std.math.isPowerOf2()
     return (sz & (sz-1)) == 0;
 }

 size_t uniformLowerBound(alias pred, Range, T)(Range range, T needle)
 if (is(T : ElementType!Range))
 {
     assert(isPow2OrZero(range.length));
     size_t idx = 0, m = range.length/2;
     while (m != 0)
     {
         if (pred(range[idx+m], needle))
             idx += m;
         m /= 2;
     }
     if (pred(range[idx], needle))
         idx += 1;
     return idx;
 }

 size_t switchUniformLowerBound(alias pred, Range, T)(Range range, T needle)
 if (is(T : ElementType!Range))
 {
     assert(isPow2OrZero(range.length));
     size_t idx = 0, m = range.length/2;
     enum max = 1 << 10;
     while (m >= max)
     {
         if (pred(range[idx+m], needle))
             idx += m;
         m /= 2;
     }
     mixin(genUnrolledSwitchSearch(max));
     return idx;
 }

 template sharMethod(alias uniLowerBound)
 {
     size_t sharMethod(alias _pred="a<b", Range, T)(Range range, T needle)
         if (is(T : ElementType!Range))
     {
         import std.functional : binaryFun;
         import std.math : nextPow2, truncPow2;
         alias pred = binaryFun!_pred;
         if (range.length == 0)
             return 0;
         if (isPow2OrZero(range.length))
             return uniLowerBound!pred(range, needle);
         size_t n = truncPow2(range.length);
         if (pred(range[n-1], needle))
         {// search in another 2^^k area that fully covers the tail of range
             size_t k = nextPow2(range.length - n + 1);
             return range.length - k + uniLowerBound!pred(range[$-k..$], needle);
         }
         else
             return uniLowerBound!pred(range[0 .. n], needle);
     }
 }

 alias sharLowerBound = sharMethod!uniformLowerBound;
 alias sharSwitchLowerBound = sharMethod!switchUniformLowerBound;

 @safe unittest
 {
     import std.array : array;
     import std.range : assumeSorted, iota;

     auto stdLowerBound(T)(T[] range, T needle)
     {
         return assumeSorted(range).lowerBound(needle).length;
     }
     immutable MAX = 5*1173;
     auto arr = array(iota(5, MAX, 5));
     assert(arr.length == MAX/5-1);
     foreach (i; 0 .. MAX+5)
     {
         auto st = stdLowerBound(arr, i);
         assert(st == sharLowerBound(arr, i));
         assert(st == sharSwitchLowerBound(arr, i));
     }
     arr = [];
     auto st = stdLowerBound(arr, 33);
     assert(st == sharLowerBound(arr, 33));
     assert(st == sharSwitchLowerBound(arr, 33));
 }
 //============================================================================

 @safe
 {
 // hope to see simillar stuff in public interface... once Allocators are out
 //@@@BUG moveFront and friends? dunno, for now it's POD-only

 @trusted size_t genericReplace(Policy=void, T, Range)
     (ref T dest, size_t from, size_t to, Range stuff)
 {
     import std.algorithm.mutation : copy;
     size_t delta = to - from;
     size_t stuff_end = from+stuff.length;
     if (stuff.length > delta)
     {// replace increases length
         delta = stuff.length - delta;// now, new is > old  by delta
         static if (is(Policy == void))
             dest.length = dest.length+delta;//@@@BUG lame @property
         else
             dest = Policy.realloc(dest, dest.length+delta);
         copyBackwards(dest[to .. dest.length-delta],
             dest[to+delta .. dest.length]);
         copyForward(stuff, dest[from .. stuff_end]);
     }
     else if (stuff.length == delta)
     {
         copy(stuff, dest[from .. to]);
     }
     else
     {// replace decreases length by delta
         delta = delta - stuff.length;
         copy(stuff, dest[from .. stuff_end]);
         copyForward(dest[to .. dest.length],
             dest[stuff_end .. dest.length-delta]);
         static if (is(Policy == void))
             dest.length = dest.length - delta;//@@@BUG lame @property
         else
             dest = Policy.realloc(dest, dest.length-delta);
     }
     return stuff_end;
 }


 // Simple storage manipulation policy
 @trusted private struct GcPolicy
 {
     import std.traits : isDynamicArray;

     static T[] dup(T)(const T[] arr)
     {
         return arr.dup;
     }

     static T[] alloc(T)(size_t size)
     {
         return new T[size];
     }

     static T[] realloc(T)(T[] arr, size_t sz)
     {
         arr.length = sz;
         return arr;
     }

     static void replaceImpl(T, Range)(ref T[] dest, size_t from, size_t to, Range stuff)
     {
         replaceInPlace(dest, from, to, stuff);
     }

     static void append(T, V)(ref T[] arr, V value)
         if (!isInputRange!V)
     {
         arr ~= force!T(value);
     }

     static void append(T, V)(ref T[] arr, V value)
         if (isInputRange!V)
     {
         insertInPlace(arr, arr.length, value);
     }

     static void destroy(T)(ref T arr)
         if (isDynamicArray!T && is(Unqual!T == T))
     {
         debug
         {
             arr[] = cast(typeof(T.init[0]))(0xdead_beef);
         }
         arr = null;
     }

     static void destroy(T)(ref T arr)
         if (isDynamicArray!T && !is(Unqual!T == T))
     {
         arr = null;
     }
 }

 // ditto
 @trusted struct ReallocPolicy
 {
     import std.range.primitives : hasLength;

     static T[] dup(T)(const T[] arr)
     {
         auto result = alloc!T(arr.length);
         result[] = arr[];
         return result;
     }

     static T[] alloc(T)(size_t size)
     {
         import core.stdc.stdlib : malloc;
         import std.exception : enforce;

         import core.checkedint : mulu;
         bool overflow;
         size_t nbytes = mulu(size, T.sizeof, overflow);
         if (overflow) assert(0);

         auto ptr = cast(T*) enforce(malloc(nbytes), "out of memory on C heap");
         return ptr[0 .. size];
     }

     static T[] realloc(T)(T[] arr, size_t size)
     {
         import core.stdc.stdlib : realloc;
         import std.exception : enforce;
         if (!size)
         {
             destroy(arr);
             return null;
         }

         import core.checkedint : mulu;
         bool overflow;
         size_t nbytes = mulu(size, T.sizeof, overflow);
         if (overflow) assert(0);

         auto ptr = cast(T*) enforce(realloc(arr.ptr, nbytes), "out of memory on C heap");
         return ptr[0 .. size];
     }

     static void replaceImpl(T, Range)(ref T[] dest, size_t from, size_t to, Range stuff)
     {
         genericReplace!(ReallocPolicy)(dest, from, to, stuff);
     }

     static void append(T, V)(ref T[] arr, V value)
         if (!isInputRange!V)
     {
         if (arr.length == size_t.max) assert(0);
         arr = realloc(arr, arr.length+1);
         arr[$-1] = force!T(value);
     }

     @safe unittest
     {
         int[] arr;
         ReallocPolicy.append(arr, 3);

         import std.algorithm.comparison : equal;
         assert(equal(arr, [3]));
     }

     static void append(T, V)(ref T[] arr, V value)
         if (isInputRange!V && hasLength!V)
     {
         import core.checkedint : addu;
         bool overflow;
         size_t nelems = addu(arr.length, value.length, overflow);
         if (overflow) assert(0);

         arr = realloc(arr, nelems);

         import std.algorithm.mutation : copy;
         copy(value, arr[$-value.length..$]);
     }

     @safe unittest
     {
         int[] arr;
         ReallocPolicy.append(arr, [1,2,3]);

         import std.algorithm.comparison : equal;
         assert(equal(arr, [1,2,3]));
     }

     static void destroy(T)(ref T[] arr)
     {
         import core.stdc.stdlib : free;
         if (arr.ptr)
             free(arr.ptr);
         arr = null;
     }
 }

 //build hack
 alias _RealArray = CowArray!ReallocPolicy;

 @safe unittest
 {
     import std.algorithm.comparison : equal;

     with(ReallocPolicy)
     {
         bool test(T, U, V)(T orig, size_t from, size_t to, U toReplace, V result,
                    string file = __FILE__, size_t line = __LINE__)
         {
             {
                 replaceImpl(orig, from, to, toReplace);
                 scope(exit) destroy(orig);
                 if (!equal(orig, result))
                     return false;
             }
             return true;
         }
         static T[] arr(T)(T[] args... )
         {
             return dup(args);
         }

         assert(test(arr([1, 2, 3, 4]), 0, 0, [5, 6, 7], [5, 6, 7, 1, 2, 3, 4]));
         assert(test(arr([1, 2, 3, 4]), 0, 2, cast(int[])[], [3, 4]));
         assert(test(arr([1, 2, 3, 4]), 0, 4, [5, 6, 7], [5, 6, 7]));
         assert(test(arr([1, 2, 3, 4]), 0, 2, [5, 6, 7], [5, 6, 7, 3, 4]));
         assert(test(arr([1, 2, 3, 4]), 2, 3, [5, 6, 7], [1, 2, 5, 6, 7, 4]));
     }
 }

 /**
     Tests if T is some kind a set of code points. Intended for template constraints.
 */
 public template isCodepointSet(T)
 {
     static if (is(T dummy == InversionList!(Args), Args...))
         enum isCodepointSet = true;
     else
         enum isCodepointSet = false;
 }

 /**
     Tests if $(D T) is a pair of integers that implicitly convert to $(D V).
     The following code must compile for any pair $(D T):
     ---
     (T x){ V a = x[0]; V b = x[1];}
     ---
     The following must not compile:
      ---
     (T x){ V c = x[2];}
     ---
 */
 public template isIntegralPair(T, V=uint)
 {
     enum isIntegralPair = is(typeof((T x){ V a = x[0]; V b = x[1];}))
         && !is(typeof((T x){ V c = x[2]; }));
 }


 /**
     The recommended default type for set of $(CODEPOINTS).
     For details, see the current implementation: $(LREF InversionList).
 */
 public alias CodepointSet = InversionList!GcPolicy;


 //@@@BUG: std.typecons tuples depend on std.format to produce fields mixin
 // which relies on std.uni.isGraphical and this chain blows up with Forward reference error
 // hence below doesn't seem to work
 // public alias CodepointInterval = Tuple!(uint, "a", uint, "b");

 /**
     The recommended type of $(REF Tuple, std,_typecons)
     to represent [a, b$(RPAREN) intervals of $(CODEPOINTS). As used in $(LREF InversionList).
     Any interval type should pass $(LREF isIntegralPair) trait.
 */
 public struct CodepointInterval
 {
 pure:
     uint[2] _tuple;
     alias _tuple this;

 @safe pure nothrow @nogc:

     this(uint low, uint high)
     {
         _tuple[0] = low;
         _tuple[1] = high;
     }
     bool opEquals(T)(T val) const
     {
         return this[0] == val[0] && this[1] == val[1];
     }
     @property ref inout(uint) a() inout { return _tuple[0]; }
     @property ref inout(uint) b() inout { return _tuple[1]; }
 }

 /**
     $(P
     $(D InversionList) is a set of $(CODEPOINTS)
     represented as an array of open-right [a, b$(RPAREN)
     intervals (see $(LREF CodepointInterval) above).
     The name comes from the way the representation reads left to right.
     For instance a set of all values [10, 50$(RPAREN), [80, 90$(RPAREN),
     plus a singular value 60 looks like this:
     )
     ---
     10, 50, 60, 61, 80, 90
     ---
     $(P
     The way to read this is: start with negative meaning that all numbers
     smaller then the next one are not present in this set (and positive
     - the contrary). Then switch positive/negative after each
     number passed from left to right.
     )
     $(P This way negative spans until 10, then positive until 50,
     then negative until 60, then positive until 61, and so on.
     As seen this provides a space-efficient storage of highly redundant data
     that comes in long runs. A description which Unicode $(CHARACTER)
     properties fit nicely. The technique itself could be seen as a variation
     on $(LINK2 https://en.wikipedia.org/wiki/Run-length_encoding, RLE encoding).
     )

     $(P Sets are value types (just like $(D int) is) thus they
         are never aliased.
     )
         Example:
         ---
         auto a = CodepointSet('a', 'z'+1);
         auto b = CodepointSet('A', 'Z'+1);
         auto c = a;
         a = a | b;
         assert(a == CodepointSet('A', 'Z'+1, 'a', 'z'+1));
         assert(a != c);
         ---
     $(P See also $(LREF unicode) for simpler construction of sets
         from predefined ones.
     )

     $(P Memory usage is 8 bytes per each contiguous interval in a set.
     The value semantics are achieved by using the
     $(HTTP en.wikipedia.org/wiki/Copy-on-write, COW) technique
     and thus it's $(RED not) safe to cast this type to $(D_KEYWORD shared).
     )

     Note:
     $(P It's not recommended to rely on the template parameters
     or the exact type of a current $(CODEPOINT) set in $(D std.uni).
     The type and parameters may change when the standard
     allocators design is finalized.
     Use $(LREF isCodepointSet) with templates or just stick with the default
     alias $(LREF CodepointSet) throughout the whole code base.
     )
 */
 @trusted public struct InversionList(SP=GcPolicy)
 {
     import std.range : assumeSorted;

     /**
         Construct from another code point set of any type.
     */
     this(Set)(Set set) pure
         if (isCodepointSet!Set)
     {
         uint[] arr;
         foreach (v; set.byInterval)
         {
             arr ~= v.a;
             arr ~= v.b;
         }
         data = CowArray!(SP).reuse(arr);
     }

     /**
         Construct a set from a forward range of code point intervals.
     */
     this(Range)(Range intervals) pure
         if (isForwardRange!Range && isIntegralPair!(ElementType!Range))
     {
         uint[] arr;
         foreach (v; intervals)
         {
             SP.append(arr, v.a);
             SP.append(arr, v.b);
         }
         data = CowArray!(SP).reuse(arr);
         sanitize(); //enforce invariant: sort intervals etc.
     }

     //helper function that avoids sanity check to be CTFE-friendly
     private static fromIntervals(Range)(Range intervals) pure
     {
         import std.algorithm.iteration : map;
         import std.range : roundRobin;
         auto flattened = roundRobin(intervals.save.map!"a[0]"(),
             intervals.save.map!"a[1]"());
         InversionList set;
         set.data = CowArray!(SP)(flattened);
         return set;
     }
     //ditto untill sort is CTFE-able
     private static fromIntervals()(uint[] intervals...) pure
     in
     {
         import std.conv : text;
         assert(intervals.length % 2 == 0, "Odd number of interval bounds [a, b)!");
         for (uint i = 0; i < intervals.length; i += 2)
         {
             auto a = intervals[i], b = intervals[i+1];
             assert(a < b, text("illegal interval [a, b): ", a, " > ", b));
         }
     }
     body
     {
         InversionList set;
         set.data = CowArray!(SP)(intervals);
         return set;
     }

     /**
         Construct a set from plain values of code point intervals.
     */
     this()(uint[] intervals...)
     in
     {
         import std.conv : text;
         assert(intervals.length % 2 == 0, "Odd number of interval bounds [a, b)!");
         for (uint i = 0; i < intervals.length; i += 2)
         {
             auto a = intervals[i], b = intervals[i+1];
             assert(a < b, text("illegal interval [a, b): ", a, " > ", b));
         }
     }
     body
     {
         data = CowArray!(SP)(intervals);
         sanitize(); //enforce invariant: sort intervals etc.
     }

     ///
     @safe unittest
     {
         import std.algorithm.comparison : equal;

         auto set = CodepointSet('a', 'z'+1, 'а', 'я'+1);
         foreach (v; 'a'..'z'+1)
             assert(set[v]);
         // Cyrillic lowercase interval
         foreach (v; 'а'..'я'+1)
             assert(set[v]);
         //specific order is not required, intervals may interesect
         auto set2 = CodepointSet('а', 'я'+1, 'a', 'd', 'b', 'z'+1);
         //the same end result
         assert(set2.byInterval.equal(set.byInterval));
     }

     /**
         Get range that spans all of the $(CODEPOINT) intervals in this $(LREF InversionList).

         Example:
         -----------
         import std.algorithm.comparison : equal;
         import std.typecons : tuple;

         auto set = CodepointSet('A', 'D'+1, 'a', 'd'+1);

         assert(set.byInterval.equal([tuple('A','E'), tuple('a','e')]));
         -----------
     */
     @property auto byInterval()
     {
         return Intervals!(typeof(data))(data);
     }

     /**
         Tests the presence of code point $(D val) in this set.
     */
     bool opIndex(uint val) const
     {
         // the <= ensures that searching in  interval of [a, b) for 'a' you get .length == 1
         // return assumeSorted!((a,b) => a <= b)(data[]).lowerBound(val).length & 1;
         return sharSwitchLowerBound!"a <= b"(data[], val) & 1;
     }

     ///
     @safe unittest
     {
         auto gothic = unicode.Gothic;
         // Gothic letter ahsa
         assert(gothic['\U00010330']);
         // no ascii in Gothic obviously
         assert(!gothic['$']);
     }


     // Linear scan for $(D ch). Useful only for small sets.
     // TODO:
     // used internally in std.regex
     // should be properly exposed in a public API ?
     package auto scanFor()(dchar ch) const
     {
         immutable len = data.length;
         for (size_t i = 0; i < len; i++)
             if (ch < data[i])
                 return i & 1;
         return 0;
     }

     /// Number of $(CODEPOINTS) in this set
     @property size_t length()
     {
         size_t sum = 0;
         foreach (iv; byInterval)
         {
             sum += iv.b - iv.a;
         }
         return sum;
     }

 // bootstrap full set operations from 4 primitives (suitable as a template mixin):
 // addInterval, skipUpTo, dropUpTo & byInterval iteration
 //============================================================================
 public:
     /**
         $(P Sets support natural syntax for set algebra, namely: )
         $(BOOKTABLE ,
             $(TR $(TH Operator) $(TH Math notation) $(TH Description) )
             $(TR $(TD &) $(TD a ∩ b) $(TD intersection) )
             $(TR $(TD |) $(TD a ∪ b) $(TD union) )
             $(TR $(TD -) $(TD a ∖ b) $(TD subtraction) )
             $(TR $(TD ~) $(TD a ~ b) $(TD symmetric set difference i.e. (a ∪ b) \ (a ∩ b)) )
         )
     */
     This opBinary(string op, U)(U rhs)
         if (isCodepointSet!U || is(U:dchar))
     {
         static if (op == "&" || op == "|" || op == "~")
         {// symmetric ops thus can swap arguments to reuse r-value
             static if (is(U:dchar))
             {
                 auto tmp = this;
                 mixin("tmp "~op~"= rhs; ");
                 return tmp;
             }
             else
             {
                 static if (is(Unqual!U == U))
                 {
                     // try hard to reuse r-value
                     mixin("rhs "~op~"= this;");
                     return rhs;
                 }
                 else
                 {
                     auto tmp = this;
                     mixin("tmp "~op~"= rhs;");
                     return tmp;
                 }
             }
         }
         else static if (op == "-") // anti-symmetric
         {
             auto tmp = this;
             tmp -= rhs;
             return tmp;
         }
         else
             static assert(0, "no operator "~op~" defined for Set");
     }

     ///
     @safe unittest
     {
         import std.algorithm.comparison : equal;
         import std.range : iota;

         auto lower = unicode.LowerCase;
         auto upper = unicode.UpperCase;
         auto ascii = unicode.ASCII;

         assert((lower & upper).empty); // no intersection
         auto lowerASCII = lower & ascii;
         assert(lowerASCII.byCodepoint.equal(iota('a', 'z'+1)));
         // throw away all of the lowercase ASCII
         assert((ascii - lower).length == 128 - 26);

         auto onlyOneOf = lower ~ ascii;
         assert(!onlyOneOf['Δ']); // not ASCII and not lowercase
         assert(onlyOneOf['$']); // ASCII and not lowercase
         assert(!onlyOneOf['a']); // ASCII and lowercase
         assert(onlyOneOf['я']); // not ASCII but lowercase

         // throw away all cased letters from ASCII
         auto noLetters = ascii - (lower | upper);
         assert(noLetters.length == 128 - 26*2);
     }

     /// The 'op=' versions of the above overloaded operators.
     ref This opOpAssign(string op, U)(U rhs)
         if (isCodepointSet!U || is(U:dchar))
     {
         static if (op == "|")    // union
         {
             static if (is(U:dchar))
             {
                 this.addInterval(rhs, rhs+1);
                 return this;
             }
             else
                 return this.add(rhs);
         }
         else static if (op == "&")   // intersection
                 return this.intersect(rhs);// overloaded
         else static if (op == "-")   // set difference
                 return this.sub(rhs);// overloaded
         else static if (op == "~")   // symmetric set difference
         {
             auto copy = this & rhs;
             this |= rhs;
             this -= copy;
             return this;
         }
         else
             static assert(0, "no operator "~op~" defined for Set");
     }

     /**
         Tests the presence of codepoint $(D ch) in this set,
         the same as $(LREF opIndex).
     */
     bool opBinaryRight(string op: "in", U)(U ch) const
         if (is(U : dchar))
     {
         return this[ch];
     }

     ///
     @safe unittest
     {
         assert('я' in unicode.Cyrillic);
         assert(!('z' in unicode.Cyrillic));
     }


     /**
      * Obtains a set that is the inversion of this set.
      *
      * See_Also: $(LREF inverted)
      */
     auto opUnary(string op: "!")()
     {
         return this.inverted;
     }

     /**
         A range that spans each $(CODEPOINT) in this set.
     */
     @property auto byCodepoint()
     {
         @trusted static struct CodepointRange
         {
             this(This set)
             {
                 r = set.byInterval;
                 if (!r.empty)
                     cur = r.front.a;
             }

             @property dchar front() const
             {
                 return cast(dchar) cur;
             }

             @property bool empty() const
             {
                 return r.empty;
             }

             void popFront()
             {
                 cur++;
                 while (cur >= r.front.b)
                 {
                     r.popFront();
                     if (r.empty)
                         break;
                     cur = r.front.a;
                 }
             }
         private:
             uint cur;
             typeof(This.init.byInterval) r;
         }

         return CodepointRange(this);
     }

     ///
     @safe unittest
     {
         import std.algorithm.comparison : equal;
         import std.range : iota;

         auto set = unicode.ASCII;
         set.byCodepoint.equal(iota(0, 0x80));
     }

     /**
         $(P Obtain textual representation of this set in from of
         open-right intervals and feed it to $(D sink).
         )
         $(P Used by various standard formatting facilities such as
          $(REF formattedWrite, std,_format), $(REF write, std,_stdio),
          $(REF writef, std,_stdio), $(REF to, std,_conv) and others.
         )
         Example:
         ---
         import std.conv;
         assert(unicode.ASCII.to!string == "[0..128$(RPAREN)");
         ---
     */

     private import std.format : FormatSpec;

     /***************************************
      * Obtain a textual representation of this InversionList
      * in form of open-right intervals.
      *
      * The formatting flag is applied individually to each value, for example:
      * $(LI $(B %s) and $(B %d) format the intervals as a [low .. high$(RPAREN) range of integrals)
      * $(LI $(B %x) formats the intervals as a [low .. high$(RPAREN) range of lowercase hex characters)
      * $(LI $(B %X) formats the intervals as a [low .. high$(RPAREN) range of uppercase hex characters)
      */
     void toString(Writer)(scope Writer sink,
                   FormatSpec!char fmt) /* const */
     {
         import std.format : formatValue;
         auto range = byInterval;
         if (range.empty)
             return;

         while (1)
         {
             auto i = range.front;
             range.popFront();

             put(sink, "[");
             formatValue(sink, i.a, fmt);
             put(sink, "..");
             formatValue(sink, i.b, fmt);
             put(sink, ")");
             if (range.empty) return;
             put(sink, " ");
         }
     }

     ///
     @safe unittest
     {
         import std.conv : to;
         import std.format : format;
         import std.uni : unicode;

         assert(unicode.Cyrillic.to!string ==
             "[1024..1157) [1159..1320) [7467..7468) [7544..7545) [11744..11776) [42560..42648) [42655..42656)");

         // The specs '%s' and '%d' are equivalent to the to!string call above.
         assert(format("%d", unicode.Cyrillic) == unicode.Cyrillic.to!string);

         assert(format("%#x", unicode.Cyrillic) ==
             "[0x400..0x485) [0x487..0x528) [0x1d2b..0x1d2c) [0x1d78..0x1d79) [0x2de0..0x2e00) "
             ~"[0xa640..0xa698) [0xa69f..0xa6a0)");

         assert(format("%#X", unicode.Cyrillic) ==
             "[0X400..0X485) [0X487..0X528) [0X1D2B..0X1D2C) [0X1D78..0X1D79) [0X2DE0..0X2E00) "
             ~"[0XA640..0XA698) [0XA69F..0XA6A0)");
     }

     @safe unittest
     {
         import std.exception : assertThrown;
         import std.format : format, FormatException;
         assertThrown!FormatException(format("%a", unicode.ASCII));
     }


     /**
         Add an interval [a, b$(RPAREN) to this set.
     */
     ref add()(uint a, uint b)
     {
         addInterval(a, b);
         return this;
     }

     ///
     @safe unittest
     {
         CodepointSet someSet;
         someSet.add('0', '5').add('A','Z'+1);
         someSet.add('5', '9'+1);
         assert(someSet['0']);
         assert(someSet['5']);
         assert(someSet['9']);
         assert(someSet['Z']);
     }

 private:

   package(std)  // used from: std.regex.internal.parser
     ref intersect(U)(U rhs)
         if (isCodepointSet!U)
     {
         Marker mark;
         foreach ( i; rhs.byInterval)
         {
             mark = this.dropUpTo(i.a, mark);
             mark = this.skipUpTo(i.b, mark);
         }
         this.dropUpTo(uint.max, mark);
         return this;
     }

     ref intersect()(dchar ch)
     {
         foreach (i; byInterval)
             if (i.a <= ch && ch < i.b)
                 return this = This.init.add(ch, ch+1);
         this = This.init;
         return this;
     }

     @safe unittest
     {
         assert(unicode.Cyrillic.intersect('-').byInterval.empty);
     }

     ref sub()(dchar ch)
     {
         return subChar(ch);
     }

     // same as the above except that skip & drop parts are swapped
   package(std)  // used from: std.regex.internal.parser
     ref sub(U)(U rhs)
         if (isCodepointSet!U)
     {
         Marker mark;
         foreach (i; rhs.byInterval)
         {
             mark = this.skipUpTo(i.a, mark);
             mark = this.dropUpTo(i.b, mark);
         }
         return this;
     }

   package(std)  // used from: std.regex.internal.parse
     ref add(U)(U rhs)
         if (isCodepointSet!U)
     {
         Marker start;
         foreach (i; rhs.byInterval)
         {
             start = addInterval(i.a, i.b, start);
         }
         return this;
     }

 // end of mixin-able part
 //============================================================================
 public:
     /**
         Obtains a set that is the inversion of this set.

         See the '!' $(LREF opUnary) for the same but using operators.
     */
     @property auto inverted()
     {
         InversionList inversion = this;
         if (inversion.data.length == 0)
         {
             inversion.addInterval(0, lastDchar+1);
             return inversion;
         }
         if (inversion.data[0] != 0)
             genericReplace(inversion.data, 0, 0, [0]);
         else
             genericReplace(inversion.data, 0, 1, cast(uint[]) null);
         if (data[data.length-1] != lastDchar+1)
             genericReplace(inversion.data,
                 inversion.data.length, inversion.data.length, [lastDchar+1]);
         else
             genericReplace(inversion.data,
                 inversion.data.length-1, inversion.data.length, cast(uint[]) null);

         return inversion;
     }

     ///
     @safe unittest
     {
         auto set = unicode.ASCII;
         // union with the inverse gets all of the code points in the Unicode
         assert((set | set.inverted).length == 0x110000);
         // no intersection with the inverse
         assert((set & set.inverted).empty);
     }

     /**
         Generates string with D source code of unary function with name of
         $(D funcName) taking a single $(D dchar) argument. If $(D funcName) is empty
         the code is adjusted to be a lambda function.

         The function generated tests if the $(CODEPOINT) passed
         belongs to this set or not. The result is to be used with string mixin.
         The intended usage area is aggressive optimization via meta programming
         in parser generators and the like.

         Note: Use with care for relatively small or regular sets. It
         could end up being slower then just using multi-staged tables.

         Example:
         ---
         import std.stdio;

         // construct set directly from [a, b$RPAREN intervals
         auto set = CodepointSet(10, 12, 45, 65, 100, 200);
         writeln(set);
         writeln(set.toSourceCode("func"));
         ---

         The above outputs something along the lines of:
         ---
         bool func(dchar ch)  @safe pure nothrow @nogc
         {
             if (ch < 45)
             {
                 if (ch == 10 || ch == 11) return true;
                 return false;
             }
             else if (ch < 65) return true;
             else
             {
                 if (ch < 100) return false;
                 if (ch < 200) return true;
                 return false;
             }
         }
         ---
     */
     string toSourceCode(string funcName="")
     {
         import std.algorithm.searching : countUntil;
         import std.array : array;
         import std.format : format;
         enum maxBinary = 3;
         static string linearScope(R)(R ivals, string indent)
         {
             string result = indent~"{\n";
             string deeper = indent~"    ";
             foreach (ival; ivals)
             {
                 immutable span = ival[1] - ival[0];
                 assert(span != 0);
                 if (span == 1)
                 {
                     result ~= format("%sif (ch == %s) return true;\n", deeper, ival[0]);
                 }
                 else if (span == 2)
                 {
                     result ~= format("%sif (ch == %s || ch == %s) return true;\n",
                         deeper, ival[0], ival[0]+1);
                 }
                 else
                 {
                     if (ival[0] != 0) // dchar is unsigned and  < 0 is useless
                         result ~= format("%sif (ch < %s) return false;\n", deeper, ival[0]);
                     result ~= format("%sif (ch < %s) return true;\n", deeper, ival[1]);
                 }
             }
             result ~= format("%sreturn false;\n%s}\n", deeper, indent); // including empty range of intervals
             return result;
         }

         static string binaryScope(R)(R ivals, string indent)
         {
             // time to do unrolled comparisons?
             if (ivals.length < maxBinary)
                 return linearScope(ivals, indent);
             else
                 return bisect(ivals, ivals.length/2, indent);
         }

         // not used yet if/elsebinary search is far better with DMD  as of 2.061
         // and GDC is doing fine job either way
         static string switchScope(R)(R ivals, string indent)
         {
             string result = indent~"switch (ch){\n";
             string deeper = indent~"    ";
             foreach (ival; ivals)
             {
                 if (ival[0]+1 == ival[1])
                 {
                     result ~= format("%scase %s: return true;\n",
                         deeper, ival[0]);
                 }
                 else
                 {
                     result ~= format("%scase %s: .. case %s: return true;\n",
                          deeper, ival[0], ival[1]-1);
                 }
             }
             result ~= deeper~"default: return false;\n"~indent~"}\n";
             return result;
         }

         static string bisect(R)(R range, size_t idx, string indent)
         {
             string deeper = indent ~ "    ";
             // bisect on one [a, b) interval at idx
             string result = indent~"{\n";
             // less branch, < a
             result ~= format("%sif (ch < %s)\n%s",
                 deeper, range[idx][0], binaryScope(range[0 .. idx], deeper));
             // middle point,  >= a && < b
             result ~= format("%selse if (ch < %s) return true;\n",
                 deeper, range[idx][1]);
             // greater or equal branch,  >= b
             result ~= format("%selse\n%s",
                 deeper, binaryScope(range[idx+1..$], deeper));
             return result~indent~"}\n";
         }

         string code = format("bool %s(dchar ch) @safe pure nothrow @nogc\n",
             funcName.empty ? "function" : funcName);
         auto range = byInterval.array();
         // special case first bisection to be on ASCII vs beyond
         auto tillAscii = countUntil!"a[0] > 0x80"(range);
         if (tillAscii <= 0) // everything is ASCII or nothing is ascii (-1 & 0)
             code ~= binaryScope(range, "");
         else
             code ~= bisect(range, tillAscii, "");
         return code;
     }

     /**
         True if this set doesn't contain any $(CODEPOINTS).
     */
     @property bool empty() const
     {
         return data.length == 0;
     }

     ///
     @safe unittest
     {
         CodepointSet emptySet;
         assert(emptySet.length == 0);
         assert(emptySet.empty);
     }

 private:
     alias This = typeof(this);
     alias Marker = size_t;

     // a random-access range of integral pairs
     static struct Intervals(Range)
     {
         this(Range sp)
         {
             slice = sp;
             start = 0;
             end = sp.length;
         }

         this(Range sp, size_t s, size_t e)
         {
             slice = sp;
             start = s;
             end = e;
         }

         @property auto front()const
         {
             immutable a = slice[start];
             immutable b = slice[start+1];
             return CodepointInterval(a, b);
         }

         //may break sorted property - but we need std.sort to access it
         //hence package protection attribute
         package @property void front(CodepointInterval val)
         {
             slice[start] = val.a;
             slice[start+1] = val.b;
         }

         @property auto back()const
         {
             immutable a = slice[end-2];
             immutable b = slice[end-1];
             return CodepointInterval(a, b);
         }

         //ditto about package
         package @property void back(CodepointInterval val)
         {
             slice[end-2] = val.a;
             slice[end-1] = val.b;
         }

         void popFront()
         {
             start += 2;
         }

         void popBack()
         {
             end -= 2;
         }

         auto opIndex(size_t idx) const
         {
             immutable a = slice[start+idx*2];
             immutable b = slice[start+idx*2+1];
             return CodepointInterval(a, b);
         }

         //ditto about package
         package void opIndexAssign(CodepointInterval val, size_t idx)
         {
             slice[start+idx*2] = val.a;
             slice[start+idx*2+1] = val.b;
         }

         auto opSlice(size_t s, size_t e)
         {
             return Intervals(slice, s*2+start, e*2+start);
         }

         @property size_t length()const {  return slice.length/2; }

         @property bool empty()const { return start == end; }

         @property auto save(){ return this; }
     private:
         size_t start, end;
         Range slice;
     }

     // called after construction from intervals
     // to make sure invariants hold
     void sanitize()
     {
         import std.algorithm.comparison : max;
         import std.algorithm.mutation : SwapStrategy;
         import std.algorithm.sorting : sort;
         if (data.length == 0)
             return;
         alias Ival = CodepointInterval;
         //intervals wrapper for a _range_ over packed array
         auto ivals = Intervals!(typeof(data[]))(data[]);
         //@@@BUG@@@ can't use "a.a < b.a" see issue 12265
         sort!((a,b) => a.a < b.a, SwapStrategy.stable)(ivals);
         // what follows is a variation on stable remove
         // differences:
         // - predicate is binary, and is tested against
         //   the last kept element (at 'i').
         // - predicate mutates lhs (merges rhs into lhs)
         size_t len = ivals.length;
         size_t i = 0;
         size_t j = 1;
         while (j < len)
         {
             if (ivals[i].b >= ivals[j].a)
             {
                 ivals[i] = Ival(ivals[i].a, max(ivals[i].b, ivals[j].b));
                 j++;
             }
             else //unmergable
             {
                 // check if there is a hole after merges
                 // (in the best case we do 0 writes to ivals)
                 if (j != i+1)
                     ivals[i+1] = ivals[j]; //copy over
                 i++;
                 j++;
             }
         }
         len = i + 1;
         for (size_t k=0; k + 1 < len; k++)
         {
             assert(ivals[k].a < ivals[k].b);
             assert(ivals[k].b < ivals[k+1].a);
         }
         data.length = len * 2;
     }

     // special case for normal InversionList
     ref subChar(dchar ch)
     {
         auto mark = skipUpTo(ch);
         if (mark != data.length
             && data[mark] == ch && data[mark-1] == ch)
         {
             // it has split, meaning that ch happens to be in one of intervals
             data[mark] = data[mark]+1;
         }
         return this;
     }

     //
     Marker addInterval(int a, int b, Marker hint=Marker.init)
     in
     {
         assert(a <= b);
     }
     body
     {
         import std.range : assumeSorted, SearchPolicy;
         auto range = assumeSorted(data[]);
         size_t pos;
         size_t a_idx = hint + range[hint..$].lowerBound!(SearchPolicy.gallop)(a).length;
         if (a_idx == range.length)
         {
             //  [---+++----++++----++++++]
             //  [                         a  b]
             data.append(a, b);
             return data.length-1;
         }
         size_t b_idx = range[a_idx .. range.length].lowerBound!(SearchPolicy.gallop)(b).length+a_idx;
         uint[3] buf = void;
         uint to_insert;
         debug(std_uni)
         {
             writefln("a_idx=%d; b_idx=%d;", a_idx, b_idx);
         }
         if (b_idx == range.length)
         {
             //  [-------++++++++----++++++-]
             //  [      s     a                 b]
             if (a_idx & 1)// a in positive
             {
                 buf[0] = b;
                 to_insert = 1;
             }
             else// a in negative
             {
                 buf[0] = a;
                 buf[1] = b;
                 to_insert = 2;
             }
             pos = genericReplace(data, a_idx, b_idx, buf[0 .. to_insert]);
             return pos - 1;
         }

         uint top = data[b_idx];

         debug(std_uni)
         {
             writefln("a_idx=%d; b_idx=%d;", a_idx, b_idx);
             writefln("a=%s; b=%s; top=%s;", a, b, top);
         }
         if (a_idx & 1)
         {// a in positive
             if (b_idx & 1)// b in positive
             {
                 //  [-------++++++++----++++++-]
                 //  [       s    a        b    ]
                 buf[0] = top;
                 to_insert = 1;
             }
             else // b in negative
             {
                 //  [-------++++++++----++++++-]
                 //  [       s    a   b         ]
                 if (top == b)
                 {
                     assert(b_idx+1 < data.length);
                     buf[0] = data[b_idx+1];
                     pos = genericReplace(data, a_idx, b_idx+2, buf[0 .. 1]);
                     return pos - 1;
                 }
                 buf[0] = b;
                 buf[1] = top;
                 to_insert = 2;
             }
         }
         else
         { // a in negative
             if (b_idx & 1) // b in positive
             {
                 //  [----------+++++----++++++-]
                 //  [     a     b              ]
                 buf[0] = a;
                 buf[1] = top;
                 to_insert = 2;
             }
             else// b in negative
             {
                 //  [----------+++++----++++++-]
                 //  [  a       s      b        ]
                 if (top == b)
                 {
                     assert(b_idx+1 < data.length);
                     buf[0] = a;
                     buf[1] = data[b_idx+1];
                     pos = genericReplace(data, a_idx, b_idx+2, buf[0 .. 2]);
                     return pos - 1;
                 }
                 buf[0] = a;
                 buf[1] = b;
                 buf[2] = top;
                 to_insert = 3;
             }
         }
         pos = genericReplace(data, a_idx, b_idx+1, buf[0 .. to_insert]);
         debug(std_uni)
         {
             writefln("marker idx: %d; length=%d", pos, data[pos], data.length);
             writeln("inserting ", buf[0 .. to_insert]);
         }
         return pos - 1;
     }

     //
     Marker dropUpTo(uint a, Marker pos=Marker.init)
     in
     {
         assert(pos % 2 == 0); // at start of interval
     }
     body
     {
         auto range = assumeSorted!"a <= b"(data[pos .. data.length]);
         if (range.empty)
             return pos;
         size_t idx = pos;
         idx += range.lowerBound(a).length;

         debug(std_uni)
         {
             writeln("dropUpTo full length=", data.length);
             writeln(pos,"~~~", idx);
         }
         if (idx == data.length)
             return genericReplace(data, pos, idx, cast(uint[])[]);
         if (idx & 1)
         {   // a in positive
             //[--+++----++++++----+++++++------...]
             //      |<---si       s  a  t
             genericReplace(data, pos, idx, [a]);
         }
         else
         {   // a in negative
             //[--+++----++++++----+++++++-------+++...]
             //      |<---si              s  a  t
             genericReplace(data, pos, idx, cast(uint[])[]);
         }
         return pos;
     }

     //
     Marker skipUpTo(uint a, Marker pos=Marker.init)
     out(result)
     {
         assert(result % 2 == 0);// always start of interval
         //(may be  0-width after-split)
     }
     body
     {
         assert(data.length % 2 == 0);
         auto range = assumeSorted!"a <= b"(data[pos .. data.length]);
         size_t idx = pos+range.lowerBound(a).length;

         if (idx >= data.length) // could have Marker point to recently removed stuff
             return data.length;

         if (idx & 1)// inside of interval, check for split
         {

             immutable top = data[idx];
             if (top == a)// no need to split, it's end
                 return idx+1;
             immutable start = data[idx-1];
             if (a == start)
                 return idx-1;
             // split it up
             genericReplace(data, idx, idx+1, [a, a, top]);
             return idx+1;        // avoid odd index
         }
         return idx;
     }

     CowArray!SP data;
 }

 @system unittest
 {
     import std.conv : to;
     assert(unicode.ASCII.to!string() == "[0..128)");
 }

 // pedantic version for ctfe, and aligned-access only architectures
 @system private uint safeRead24(scope const ubyte* ptr, size_t idx) pure nothrow @nogc
 {
     idx *= 3;
     version (LittleEndian)
         return ptr[idx] + (cast(uint) ptr[idx+1]<<8)
              + (cast(uint) ptr[idx+2]<<16);
     else
         return (cast(uint) ptr[idx]<<16) + (cast(uint) ptr[idx+1]<<8)
              + ptr[idx+2];
 }

 // ditto
 @system private void safeWrite24(scope ubyte* ptr, uint val, size_t idx) pure nothrow @nogc
 {
     idx *= 3;
     version (LittleEndian)
     {
         ptr[idx] = val & 0xFF;
         ptr[idx+1] = (val >> 8) & 0xFF;
         ptr[idx+2] = (val >> 16) & 0xFF;
     }
     else
     {
         ptr[idx] = (val >> 16) & 0xFF;
         ptr[idx+1] = (val >> 8) & 0xFF;
         ptr[idx+2] = val & 0xFF;
     }
 }

 // unaligned x86-like read/write functions
 @system private uint unalignedRead24(scope const ubyte* ptr, size_t idx) pure nothrow @nogc
 {
     uint* src = cast(uint*)(ptr+3*idx);
     version (LittleEndian)
         return *src & 0xFF_FFFF;
     else
         return *src >> 8;
 }

 // ditto
 @system private void unalignedWrite24(scope ubyte* ptr, uint val, size_t idx) pure nothrow @nogc
 {
     uint* dest = cast(uint*)(cast(ubyte*) ptr + 3*idx);
     version (LittleEndian)
         *dest = val | (*dest & 0xFF00_0000);
     else
         *dest = (val << 8) | (*dest & 0xFF);
 }

 @system private uint read24(scope const ubyte* ptr, size_t idx) pure nothrow @nogc
 {
     static if (hasUnalignedReads)
         return __ctfe ? safeRead24(ptr, idx) : unalignedRead24(ptr, idx);
     else
         return safeRead24(ptr, idx);
 }

 @system private void write24(scope ubyte* ptr, uint val, size_t idx) pure nothrow @nogc
 {
     static if (hasUnalignedReads)
         return __ctfe ? safeWrite24(ptr, val, idx) : unalignedWrite24(ptr, val, idx);
     else
         return safeWrite24(ptr, val, idx);
 }

 struct CowArray(SP=GcPolicy)
 {
     import std.range.primitives : hasLength;

   @safe:
     static auto reuse(uint[] arr)
     {
         CowArray cow;
         cow.data = arr;
         SP.append(cow.data, 1);
         assert(cow.refCount == 1);
         assert(cow.length == arr.length);
         return cow;
     }

     this(Range)(Range range)
         if (isInputRange!Range && hasLength!Range)
     {
         import std.algorithm.mutation : copy;
         length = range.length;
         copy(range, data[0..$-1]);
     }

     this(Range)(Range range)
         if (isForwardRange!Range && !hasLength!Range)
     {
         import std.algorithm.mutation : copy;
         import std.range.primitives : walkLength;
         immutable len = walkLength(range.save);
         length = len;
         copy(range, data[0..$-1]);
     }

     this(this)
     {
         if (!empty)
         {
             refCount = refCount + 1;
         }
     }

     ~this()
     {
         if (!empty)
         {
             immutable cnt = refCount;
             if (cnt == 1)
                 SP.destroy(data);
             else
                 refCount = cnt - 1;
         }
     }

     // no ref-count for empty U24 array
     @property bool empty() const { return data.length == 0; }

     // report one less then actual size
     @property size_t length() const
     {
         return data.length ? data.length - 1 : 0;
     }

     //+ an extra slot for ref-count
     @property void length(size_t len)
     {
         import std.algorithm.comparison : min;
         import std.algorithm.mutation : copy;
         if (len == 0)
         {
             if (!empty)
                 freeThisReference();
             return;
         }
         immutable total = len + 1; // including ref-count
         if (empty)
         {
             data = SP.alloc!uint(total);
             refCount = 1;
             return;
         }
         immutable cur_cnt = refCount;
         if (cur_cnt != 1) // have more references to this memory
         {
             refCount = cur_cnt - 1;
             auto new_data = SP.alloc!uint(total);
             // take shrinking into account
             auto to_copy = min(total, data.length) - 1;
             copy(data[0 .. to_copy], new_data[0 .. to_copy]);
             data = new_data; // before setting refCount!
             refCount = 1;
         }
         else // 'this' is the only reference
         {
             // use the realloc (hopefully in-place operation)
             data = SP.realloc(data, total);
             refCount = 1; // setup a ref-count in the new end of the array
         }
     }

     alias opDollar = length;

     uint opIndex()(size_t idx)const
     {
         return data[idx];
     }

     void opIndexAssign(uint val, size_t idx)
     {
         auto cnt = refCount;
         if (cnt != 1)
             dupThisReference(cnt);
         data[idx] = val;
     }

     //
     auto opSlice(size_t from, size_t to)
     {
         if (!empty)
         {
             auto cnt = refCount;
             if (cnt != 1)
                 dupThisReference(cnt);
         }
         return data[from .. to];

     }

     //
     auto opSlice(size_t from, size_t to) const
     {
         return data[from .. to];
     }

     // length slices before the ref count
     auto opSlice()
     {
         return opSlice(0, length);
     }

     // ditto
     auto opSlice() const
     {
         return opSlice(0, length);
     }

     void append(Range)(Range range)
         if (isInputRange!Range && hasLength!Range && is(ElementType!Range : uint))
     {
         size_t nl = length + range.length;
         length = nl;
         copy(range, this[nl-range.length .. nl]);
     }

     void append()(uint[] val...)
     {
         length = length + val.length;
         data[$-val.length-1 .. $-1] = val[];
     }

     bool opEquals()(auto const ref CowArray rhs)const
     {
         if (empty ^ rhs.empty)
             return false; // one is empty and the other isn't
         return empty || data[0..$-1] == rhs.data[0..$-1];
     }

 private:
     // ref-count is right after the data
     @property uint refCount() const
     {
         return data[$-1];
     }

     @property void refCount(uint cnt)
     {
         data[$-1] = cnt;
     }

     void freeThisReference()
     {
         immutable count = refCount;
         if (count != 1) // have more references to this memory
         {
             // dec shared ref-count
             refCount = count - 1;
             data = [];
         }
         else
             SP.destroy(data);
         assert(!data.ptr);
     }

     void dupThisReference(uint count)
     in
     {
         assert(!empty && count != 1 && count == refCount);
     }
     body
     {
         import std.algorithm.mutation : copy;
         // dec shared ref-count
         refCount = count - 1;
         // copy to the new chunk of RAM
         auto new_data = SP.alloc!uint(data.length);
         // bit-blit old stuff except the counter
         copy(data[0..$-1], new_data[0..$-1]);
         data = new_data; // before setting refCount!
         refCount = 1; // so that this updates the right one
     }

     uint[] data;
 }

 @safe unittest// Uint24 tests
 {
     import std.algorithm.comparison : equal;
     import std.algorithm.mutation : copy;
     import std.conv : text;
     import std.range : iota, chain;
     import std.range.primitives : isBidirectionalRange, isOutputRange;
     void funcRef(T)(ref T u24)
     {
         u24.length = 2;
         u24[1] = 1024;
         T u24_c = u24;
         assert(u24[1] == 1024);
         u24.length = 0;
         assert(u24.empty);
         u24.append([1, 2]);
         assert(equal(u24[], [1, 2]));
         u24.append(111);
         assert(equal(u24[], [1, 2, 111]));
         assert(!u24_c.empty && u24_c[1] == 1024);
         u24.length = 3;
         copy(iota(0, 3), u24[]);
         assert(equal(u24[], iota(0, 3)));
         assert(u24_c[1] == 1024);
     }

     void func2(T)(T u24)
     {
         T u24_2 = u24;
         T u24_3;
         u24_3 = u24_2;
         assert(u24_2 == u24_3);
         assert(equal(u24[], u24_2[]));
         assert(equal(u24_2[], u24_3[]));
         funcRef(u24_3);

         assert(equal(u24_3[], iota(0, 3)));
         assert(!equal(u24_2[], u24_3[]));
         assert(equal(u24_2[], u24[]));
         u24_2 = u24_3;
         assert(equal(u24_2[], iota(0, 3)));
         // to test that passed arg is intact outside
         // plus try out opEquals
         u24 = u24_3;
         u24 = T.init;
         u24_3 = T.init;
         assert(u24.empty);
         assert(u24 == u24_3);
         assert(u24 != u24_2);
     }

     foreach (Policy; AliasSeq!(GcPolicy, ReallocPolicy))
     {
         alias Range = typeof(CowArray!Policy.init[]);
         alias U24A = CowArray!Policy;
         static assert(isForwardRange!Range);
         static assert(isBidirectionalRange!Range);
         static assert(isOutputRange!(Range, uint));
         static assert(isRandomAccessRange!(Range));

         auto arr = U24A([42u, 36, 100]);
         assert(arr[0] == 42);
         assert(arr[1] == 36);
         arr[0] = 72;
         arr[1] = 0xFE_FEFE;
         assert(arr[0] == 72);
         assert(arr[1] == 0xFE_FEFE);
         assert(arr[2] == 100);
         U24A arr2 = arr;
         assert(arr2[0] == 72);
         arr2[0] = 11;
         // test COW-ness
         assert(arr[0] == 72);
         assert(arr2[0] == 11);
         // set this to about 100M to stress-test COW memory management
         foreach (v; 0 .. 10_000)
             func2(arr);
         assert(equal(arr[], [72, 0xFE_FEFE, 100]));

         auto r2 = U24A(iota(0, 100));
         assert(equal(r2[], iota(0, 100)), text(r2[]));
         copy(iota(10, 170, 2), r2[10 .. 90]);
         assert(equal(r2[], chain(iota(0, 10), iota(10, 170, 2), iota(90, 100)))
                , text(r2[]));
     }
 }

 version (unittest)
 {
     private alias AllSets = AliasSeq!(InversionList!GcPolicy, InversionList!ReallocPolicy);
 }

 @safe unittest// core set primitives test
 {
     import std.conv : text;
     foreach (CodeList; AllSets)
     {
         CodeList a;
         //"plug a hole" test
         a.add(10, 20).add(25, 30).add(15, 27);
         assert(a == CodeList(10, 30), text(a));

         auto x = CodeList.init;
         x.add(10, 20).add(30, 40).add(50, 60);

         a = x;
         a.add(20, 49);//[10, 49) [50, 60)
         assert(a == CodeList(10, 49, 50 ,60));

         a = x;
         a.add(20, 50);
         assert(a == CodeList(10, 60), text(a));

         // simple unions, mostly edge effects
         x = CodeList.init;
         x.add(10, 20).add(40, 60);

         a = x;
         a.add(10, 25); //[10, 25) [40, 60)
         assert(a == CodeList(10, 25, 40, 60));

         a = x;
         a.add(5, 15); //[5, 20) [40, 60)
         assert(a == CodeList(5, 20, 40, 60));

         a = x;
         a.add(0, 10); // [0, 20) [40, 60)
         assert(a == CodeList(0, 20, 40, 60));

         a = x;
         a.add(0, 5); // prepand
         assert(a == CodeList(0, 5, 10, 20, 40, 60), text(a));

         a = x;
         a.add(5, 20);
         assert(a == CodeList(5, 20, 40, 60));

         a = x;
         a.add(3, 37);
         assert(a == CodeList(3, 37, 40, 60));

         a = x;
         a.add(37, 65);
         assert(a == CodeList(10, 20, 37, 65));

         // some tests on helpers for set intersection
         x = CodeList.init.add(10, 20).add(40, 60).add(100, 120);
         a = x;

         auto m = a.skipUpTo(60);
         a.dropUpTo(110, m);
         assert(a == CodeList(10, 20, 40, 60, 110, 120), text(a.data[]));

         a = x;
         a.dropUpTo(100);
         assert(a == CodeList(100, 120), text(a.data[]));

         a = x;
         m = a.skipUpTo(50);
         a.dropUpTo(140, m);
         assert(a == CodeList(10, 20, 40, 50), text(a.data[]));
         a = x;
         a.dropUpTo(60);
         assert(a == CodeList(100, 120), text(a.data[]));
     }
 }


 //test constructor to work with any order of intervals
 @safe unittest
 {
     import std.algorithm.comparison : equal;
     import std.conv : text, to;
     import std.range : chain, iota;
     import std.typecons : tuple;
     //ensure constructor handles bad ordering and overlap
     auto c1 = CodepointSet('а', 'я'+1, 'А','Я'+1);
     foreach (ch; chain(iota('а', 'я'+1), iota('А','Я'+1)))
         assert(ch in c1, to!string(ch));

     //contiguos
     assert(CodepointSet(1000, 1006, 1006, 1009)
         .byInterval.equal([tuple(1000, 1009)]));
     //contains
     assert(CodepointSet(900, 1200, 1000, 1100)
         .byInterval.equal([tuple(900, 1200)]));
     //intersect left
     assert(CodepointSet(900, 1100, 1000, 1200)
         .byInterval.equal([tuple(900, 1200)]));
     //intersect right
     assert(CodepointSet(1000, 1200, 900, 1100)
         .byInterval.equal([tuple(900, 1200)]));

     //ditto with extra items at end
     assert(CodepointSet(1000, 1200, 900, 1100, 800, 850)
         .byInterval.equal([tuple(800, 850), tuple(900, 1200)]));
     assert(CodepointSet(900, 1100, 1000, 1200, 800, 850)
         .byInterval.equal([tuple(800, 850), tuple(900, 1200)]));

     //"plug a hole" test
     auto c2 = CodepointSet(20, 40,
         60, 80, 100, 140, 150, 200,
         40, 60, 80, 100, 140, 150
     );
     assert(c2.byInterval.equal([tuple(20, 200)]));

     auto c3 = CodepointSet(
         20, 40, 60, 80, 100, 140, 150, 200,
         0, 10, 15, 100, 10, 20, 200, 220);
     assert(c3.byInterval.equal([tuple(0, 140), tuple(150, 220)]));
 }


 @safe unittest
 {   // full set operations
     import std.conv : text;
     foreach (CodeList; AllSets)
     {
         CodeList a, b, c, d;

         //"plug a hole"
         a.add(20, 40).add(60, 80).add(100, 140).add(150, 200);
         b.add(40, 60).add(80, 100).add(140, 150);
         c = a | b;
         d = b | a;
         assert(c == CodeList(20, 200), text(CodeList.stringof," ", c));
         assert(c == d, text(c," vs ", d));

         b = CodeList.init.add(25, 45).add(65, 85).add(95,110).add(150, 210);
         c = a | b; //[20,45) [60, 85) [95, 140) [150, 210)
         d = b | a;
         assert(c == CodeList(20, 45, 60, 85, 95, 140, 150, 210), text(c));
         assert(c == d, text(c," vs ", d));

         b = CodeList.init.add(10, 20).add(30,100).add(145,200);
         c = a | b;//[10, 140) [145, 200)
         d = b | a;
         assert(c == CodeList(10, 140, 145, 200));
         assert(c == d, text(c," vs ", d));

         b = CodeList.init.add(0, 10).add(15, 100).add(10, 20).add(200, 220);
         c = a | b;//[0, 140) [150, 220)
         d = b | a;
         assert(c == CodeList(0, 140, 150, 220));
         assert(c == d, text(c," vs ", d));


         a = CodeList.init.add(20, 40).add(60, 80);
         b = CodeList.init.add(25, 35).add(65, 75);
         c = a & b;
         d = b & a;
         assert(c == CodeList(25, 35, 65, 75), text(c));
         assert(c == d, text(c," vs ", d));

         a = CodeList.init.add(20, 40).add(60, 80).add(100, 140).add(150, 200);
         b = CodeList.init.add(25, 35).add(65, 75).add(110, 130).add(160, 180);
         c = a & b;
         d = b & a;
         assert(c == CodeList(25, 35, 65, 75, 110, 130, 160, 180), text(c));
         assert(c == d, text(c," vs ", d));

         a = CodeList.init.add(20, 40).add(60, 80).add(100, 140).add(150, 200);
         b = CodeList.init.add(10, 30).add(60, 120).add(135, 160);
         c = a & b;//[20, 30)[60, 80) [100, 120) [135, 140) [150, 160)
         d = b & a;

         assert(c == CodeList(20, 30, 60, 80, 100, 120, 135, 140, 150, 160),text(c));
         assert(c == d, text(c, " vs ",d));
         assert((c & a) == c);
         assert((d & b) == d);
         assert((c & d) == d);

         b = CodeList.init.add(40, 60).add(80, 100).add(140, 200);
         c = a & b;
         d = b & a;
         assert(c == CodeList(150, 200), text(c));
         assert(c == d, text(c, " vs ",d));
         assert((c & a) == c);
         assert((d & b) == d);
         assert((c & d) == d);

         assert((a & a) == a);
         assert((b & b) == b);

         a = CodeList.init.add(20, 40).add(60, 80).add(100, 140).add(150, 200);
         b = CodeList.init.add(30, 60).add(75, 120).add(190, 300);
         c = a - b;// [30, 40) [60, 75) [120, 140) [150, 190)
         d = b - a;// [40, 60) [80, 100) [200, 300)
         assert(c == CodeList(20, 30, 60, 75, 120, 140, 150, 190), text(c));
         assert(d == CodeList(40, 60, 80, 100, 200, 300), text(d));
         assert(c - d == c, text(c-d, " vs ", c));
         assert(d - c == d, text(d-c, " vs ", d));
         assert(c - c == CodeList.init);
         assert(d - d == CodeList.init);

         a = CodeList.init.add(20, 40).add( 60, 80).add(100, 140).add(150,            200);
         b = CodeList.init.add(10,  50).add(60,                           160).add(190, 300);
         c = a - b;// [160, 190)
         d = b - a;// [10, 20) [40, 50) [80, 100) [140, 150) [200, 300)
         assert(c == CodeList(160, 190), text(c));
         assert(d == CodeList(10, 20, 40, 50, 80, 100, 140, 150, 200, 300), text(d));
         assert(c - d == c, text(c-d, " vs ", c));
         assert(d - c == d, text(d-c, " vs ", d));
         assert(c - c == CodeList.init);
         assert(d - d == CodeList.init);

         a = CodeList.init.add(20,    40).add(60, 80).add(100,      140).add(150,  200);
         b = CodeList.init.add(10, 30).add(45,         100).add(130,             190);
         c = a ~ b; // [10, 20) [30, 40) [45, 60) [80, 130) [140, 150) [190, 200)
         d = b ~ a;
         assert(c == CodeList(10, 20, 30, 40, 45, 60, 80, 130, 140, 150, 190, 200),
                text(c));
         assert(c == d, text(c, " vs ", d));
     }
 }

 }

 @safe unittest// vs single dchar
 {
     import std.conv : text;
     CodepointSet a = CodepointSet(10, 100, 120, 200);
     assert(a - 'A' == CodepointSet(10, 65, 66, 100, 120, 200), text(a - 'A'));
     assert((a & 'B') == CodepointSet(66, 67));
 }

 @safe unittest// iteration & opIndex
 {
     import std.algorithm.comparison : equal;
     import std.conv : text;
     import std.typecons : tuple, Tuple;

     foreach (CodeList; AliasSeq!(InversionList!(ReallocPolicy)))
     {
         auto arr = "ABCDEFGHIJKLMabcdefghijklm"d;
         auto a = CodeList('A','N','a', 'n');
         assert(equal(a.byInterval,
                 [tuple(cast(uint)'A', cast(uint)'N'), tuple(cast(uint)'a', cast(uint)'n')]
             ), text(a.byInterval));

         // same @@@BUG as in issue 8949 ?
         version (bug8949)
         {
             import std.range : retro;
             assert(equal(retro(a.byInterval),
                 [tuple(cast(uint)'a', cast(uint)'n'), tuple(cast(uint)'A', cast(uint)'N')]
             ), text(retro(a.byInterval)));
         }
         auto achr = a.byCodepoint;
         assert(equal(achr, arr), text(a.byCodepoint));
         foreach (ch; a.byCodepoint)
             assert(a[ch]);
         auto x = CodeList(100, 500, 600, 900, 1200, 1500);
         assert(equal(x.byInterval, [ tuple(100, 500), tuple(600, 900), tuple(1200, 1500)]), text(x.byInterval));
         foreach (ch; x.byCodepoint)
             assert(x[ch]);
         static if (is(CodeList == CodepointSet))
         {
             auto y = CodeList(x.byInterval);
             assert(equal(x.byInterval, y.byInterval));
         }
         assert(equal(CodepointSet.init.byInterval, cast(Tuple!(uint, uint)[])[]));
         assert(equal(CodepointSet.init.byCodepoint, cast(dchar[])[]));
     }
 }

 //============================================================================
 // Generic Trie template and various ways to build it
 //============================================================================

 // debug helper to get a shortened array dump
 auto arrayRepr(T)(T x)
 {
     import std.conv : text;
     if (x.length > 32)
     {
         return text(x[0 .. 16],"~...~", x[x.length-16 .. x.length]);
     }
     else
         return text(x);
 }

 /**
     Maps $(D Key) to a suitable integer index within the range of $(D size_t).
     The mapping is constructed by applying predicates from $(D Prefix) left to right
     and concatenating the resulting bits.

     The first (leftmost) predicate defines the most significant bits of
     the resulting index.
  */
 template mapTrieIndex(Prefix...)
 {
     size_t mapTrieIndex(Key)(Key key)
         if (isValidPrefixForTrie!(Key, Prefix))
     {
         alias p = Prefix;
         size_t idx;
         foreach (i, v; p[0..$-1])
         {
             idx |= p[i](key);
             idx <<= p[i+1].bitSize;
         }
         idx |= p[$-1](key);
         return idx;
     }
 }

 /*
     $(D TrieBuilder) is a type used for incremental construction
     of $(LREF Trie)s.

     See $(LREF buildTrie) for generic helpers built on top of it.
 */
 @trusted private struct TrieBuilder(Value, Key, Args...)
 if (isBitPackableType!Value && isValidArgsForTrie!(Key, Args))
 {
     import std.exception : enforce;

 private:
     // last index is not stored in table, it is used as an offset to values in a block.
     static if (is(Value == bool))// always pack bool
         alias V = BitPacked!(Value, 1);
     else
         alias V = Value;
     static auto deduceMaxIndex(Preds...)()
     {
         size_t idx = 1;
         foreach (v; Preds)
             idx *= 2^^v.bitSize;
         return idx;
     }

     static if (is(typeof(Args[0]) : Key)) // Args start with upper bound on Key
     {
         alias Prefix = Args[1..$];
         enum lastPageSize = 2^^Prefix[$-1].bitSize;
         enum translatedMaxIndex = mapTrieIndex!(Prefix)(Args[0]);
         enum roughedMaxIndex =
             (translatedMaxIndex + lastPageSize-1)/lastPageSize*lastPageSize;
         // check warp around - if wrapped, use the default deduction rule
         enum maxIndex = roughedMaxIndex < translatedMaxIndex ?
             deduceMaxIndex!(Prefix)() : roughedMaxIndex;
     }
     else
     {
         alias Prefix = Args;
         enum maxIndex = deduceMaxIndex!(Prefix)();
     }

     alias getIndex = mapTrieIndex!(Prefix);

     enum lastLevel = Prefix.length-1;
     struct ConstructState
     {
         size_t idx_zeros, idx_ones;
     }
     // iteration over levels of Trie, each indexes its own level and thus a shortened domain
     size_t[Prefix.length] indices;
     // default filler value to use
     Value defValue;
     // this is a full-width index of next item
     size_t curIndex;
     // all-zeros page index, all-ones page index (+ indicator if there is such a page)
     ConstructState[Prefix.length] state;
     // the table being constructed
     MultiArray!(idxTypes!(Key, fullBitSize!(Prefix), Prefix[0..$]), V) table;

     @disable this();

     //shortcut for index variable at level 'level'
     @property ref idx(size_t level)(){ return indices[level]; }

     // this function assumes no holes in the input so
     // indices are going one by one
     void addValue(size_t level, T)(T val, size_t numVals)
     {
         alias j = idx!level;
         enum pageSize = 1 << Prefix[level].bitSize;
         if (numVals == 0)
             return;
         auto ptr = table.slice!(level);
         if (numVals == 1)
         {
             static if (level == Prefix.length-1)
                 ptr[j] = val;
             else
             {// can incur narrowing conversion
                 assert(j < ptr.length);
                 ptr[j] = force!(typeof(ptr[j]))(val);
             }
             j++;
             if (j % pageSize == 0)
                 spillToNextPage!level(ptr);
             return;
         }
         // longer row of values
         // get to the next page boundary
         immutable nextPB = (j + pageSize) & ~(pageSize-1);
         immutable n =  nextPB - j;// can fill right in this page
         if (numVals < n) //fits in current page
         {
             ptr[j .. j+numVals]  = val;
             j += numVals;
             return;
         }
         static if (level != 0)//on the first level it always fits
         {
             numVals -= n;
             //write till the end of current page
             ptr[j .. j+n]  = val;
             j += n;
             //spill to the next page
             spillToNextPage!level(ptr);
             // page at once loop
             if (state[level].idx_zeros != size_t.max && val == T.init)
             {
                 alias NextIdx = typeof(table.slice!(level-1)[0]);
                 addValue!(level-1)(force!NextIdx(state[level].idx_zeros),
                     numVals/pageSize);
                 ptr = table.slice!level; //table structure might have changed
                 numVals %= pageSize;
             }
             else
             {
                 while (numVals >= pageSize)
                 {
                     numVals -= pageSize;
                     ptr[j .. j+pageSize]  = val;
                     j += pageSize;
                     spillToNextPage!level(ptr);
                 }
             }
             if (numVals)
             {
                 // the leftovers, an incomplete page
                 ptr[j .. j+numVals]  = val;
                 j += numVals;
             }
         }
     }

     void spillToNextPage(size_t level, Slice)(ref Slice ptr)
     {
         // last level (i.e. topmost) has 1 "page"
         // thus it need not to add a new page on upper level
         static if (level != 0)
             spillToNextPageImpl!(level)(ptr);
     }

     // this can re-use the current page if duplicate or allocate a new one
     // it also makes sure that previous levels point to the correct page in this level
     void spillToNextPageImpl(size_t level, Slice)(ref Slice ptr)
     {
         alias NextIdx = typeof(table.slice!(level-1)[0]);
         NextIdx next_lvl_index;
         enum pageSize = 1 << Prefix[level].bitSize;
         assert(idx!level % pageSize == 0);
         immutable last = idx!level-pageSize;
         const slice = ptr[idx!level - pageSize .. idx!level];
         size_t j;
         for (j=0; j<last; j+=pageSize)
         {
             if (ptr[j .. j+pageSize] == slice)
             {
                 // get index to it, reuse ptr space for the next block
                 next_lvl_index = force!NextIdx(j/pageSize);
                 version (none)
                 {
                 import std.stdio : writefln, writeln;
                 writefln("LEVEL(%s) page mapped idx: %s: 0..%s  ---> [%s..%s]"
                         ,level
                         ,indices[level-1], pageSize, j, j+pageSize);
                 writeln("LEVEL(", level
                         , ") mapped page is: ", slice, ": ", arrayRepr(ptr[j .. j+pageSize]));
                 writeln("LEVEL(", level
                         , ") src page is :", ptr, ": ", arrayRepr(slice[0 .. pageSize]));
                 }
                 idx!level -= pageSize; // reuse this page, it is duplicate
                 break;
             }
         }
         if (j == last)
         {
     L_allocate_page:
             next_lvl_index = force!NextIdx(idx!level/pageSize - 1);
             if (state[level].idx_zeros == size_t.max && ptr.zeros(j, j+pageSize))
             {
                 state[level].idx_zeros = next_lvl_index;
             }
             // allocate next page
             version (none)
             {
             import std.stdio : writefln;
             writefln("LEVEL(%s) page allocated: %s"
                      , level, arrayRepr(slice[0 .. pageSize]));
             writefln("LEVEL(%s) index: %s ; page at this index %s"
                      , level
                      , next_lvl_index
                      , arrayRepr(
                          table.slice!(level)
                           [pageSize*next_lvl_index..(next_lvl_index+1)*pageSize]
                         ));
             }
             table.length!level = table.length!level + pageSize;
         }
     L_know_index:
         // for the previous level, values are indices to the pages in the current level
         addValue!(level-1)(next_lvl_index, 1);
         ptr = table.slice!level; //re-load the slice after moves
     }

     // idx - full-width index to fill with v (full-width index != key)
     // fills everything in the range of [curIndex, idx) with filler
     void putAt(size_t idx, Value v)
     {
         assert(idx >= curIndex);
         immutable numFillers = idx - curIndex;
         addValue!lastLevel(defValue, numFillers);
         addValue!lastLevel(v, 1);
         curIndex = idx + 1;
     }

     // ditto, but sets the range of [idxA, idxB) to v
     void putRangeAt(size_t idxA, size_t idxB, Value v)
     {
         assert(idxA >= curIndex);
         assert(idxB >= idxA);
         size_t numFillers = idxA - curIndex;
         addValue!lastLevel(defValue, numFillers);
         addValue!lastLevel(v, idxB - idxA);
         curIndex = idxB; // open-right
     }

     enum errMsg = "non-monotonic prefix function(s), an unsorted range or "~
         "duplicate key->value mapping";

 public:
     /**
         Construct a builder, where $(D filler) is a value
         to indicate empty slots (or "not found" condition).
     */
     this(Value filler)
     {
         curIndex = 0;
         defValue = filler;
         // zeros-page index, ones-page index
         foreach (ref v; state)
             v = ConstructState(size_t.max, size_t.max);
         table = typeof(table)(indices);
         // one page per level is a bootstrap minimum
         foreach (i, Pred; Prefix)
             table.length!i = (1 << Pred.bitSize);
     }

     /**
         Put a value $(D v) into interval as
         mapped by keys from $(D a) to $(D b).
         All slots prior to $(D a) are filled with
         the default filler.
     */
     void putRange(Key a, Key b, Value v)
     {
         auto idxA = getIndex(a), idxB = getIndex(b);
         // indexes of key should always grow
         enforce(idxB >= idxA && idxA >= curIndex, errMsg);
         putRangeAt(idxA, idxB, v);
     }

     /**
         Put a value $(D v) into slot mapped by $(D key).
         All slots prior to $(D key) are filled with the
         default filler.
     */
     void putValue(Key key, Value v)
     {
         import std.conv : text;
         auto idx = getIndex(key);
         enforce(idx >= curIndex, text(errMsg, " ", idx));
         putAt(idx, v);
     }

     /// Finishes construction of Trie, yielding an immutable Trie instance.
     auto build()
     {
         static if (maxIndex != 0) // doesn't cover full range of size_t
         {
             assert(curIndex <= maxIndex);
             addValue!lastLevel(defValue, maxIndex - curIndex);
         }
         else
         {
             if (curIndex != 0 // couldn't wrap around
                 || (Prefix.length != 1 && indices[lastLevel] == 0)) // can be just empty
             {
                 addValue!lastLevel(defValue, size_t.max - curIndex);
                 addValue!lastLevel(defValue, 1);
             }
             // else curIndex already completed the full range of size_t by wrapping around
         }
         return Trie!(V, Key, maxIndex, Prefix)(table);
     }
 }

 /**
     $(P A generic Trie data-structure for a fixed number of stages.
     The design goal is optimal speed with smallest footprint size.
     )
     $(P It's intentionally read-only and doesn't provide constructors.
      To construct one use a special builder,
      see $(LREF TrieBuilder) and $(LREF buildTrie).
     )

 */
 @trusted private struct Trie(Value, Key, Args...)
 if (isValidPrefixForTrie!(Key, Args)
     || (isValidPrefixForTrie!(Key, Args[1..$])
     && is(typeof(Args[0]) : size_t)))
 {
     import std.range.primitives : isOutputRange;
     static if (is(typeof(Args[0]) : size_t))
     {
         private enum maxIndex = Args[0];
         private enum hasBoundsCheck = true;
         private alias Prefix = Args[1..$];
     }
     else
     {
         private enum hasBoundsCheck = false;
         private alias Prefix = Args;
     }

     private this()(typeof(_table) table)
     {
         _table = table;
     }

     // only for constant Tries constructed from precompiled tables
     private this()(const(size_t)[] offsets, const(size_t)[] sizes,
         const(size_t)[] data) const
     {
         _table = typeof(_table)(offsets, sizes, data);
     }

     /**
         $(P Lookup the $(D key) in this $(D Trie). )

         $(P The lookup always succeeds if key fits the domain
         provided during construction. The whole domain defined
         is covered so instead of not found condition
         the sentinel (filler) value could be used. )

         $(P See $(LREF buildTrie), $(LREF TrieBuilder) for how to
         define a domain of $(D Trie) keys and the sentinel value. )

         Note:
         Domain range-checking is only enabled in debug builds
         and results in assertion failure.
     */
     TypeOfBitPacked!Value opIndex()(Key key) const
     {
         static if (hasBoundsCheck)
             assert(mapTrieIndex!Prefix(key) < maxIndex);
         size_t idx;
         alias p = Prefix;
         idx = cast(size_t) p[0](key);
         foreach (i, v; p[0..$-1])
             idx = cast(size_t)((_table.ptr!i[idx]<<p[i+1].bitSize) + p[i+1](key));
         return _table.ptr!(p.length-1)[idx];
     }

     ///
     @property size_t bytes(size_t n=size_t.max)() const
     {
         return _table.bytes!n;
     }

     ///
     @property size_t pages(size_t n)() const
     {
         return (bytes!n+2^^(Prefix[n].bitSize-1))
                 /2^^Prefix[n].bitSize;
     }

     ///
     void store(OutRange)(scope OutRange sink) const
         if (isOutputRange!(OutRange, char))
     {
         _table.store(sink);
     }

 private:
     MultiArray!(idxTypes!(Key, fullBitSize!(Prefix), Prefix[0..$]), Value) _table;
 }

 // create a tuple of 'sliceBits' that slice the 'top' of bits into pieces of sizes 'sizes'
 // left-to-right, the most significant bits first
 template GetBitSlicing(size_t top, sizes...)
 {
     static if (sizes.length > 0)
         alias GetBitSlicing =
             AliasSeq!(sliceBits!(top - sizes[0], top),
                       GetBitSlicing!(top - sizes[0], sizes[1..$]));
     else
         alias GetBitSlicing = AliasSeq!();
 }

 template callableWith(T)
 {
     template callableWith(alias Pred)
     {
         static if (!is(typeof(Pred(T.init))))
             enum callableWith = false;
         else
         {
             alias Result = typeof(Pred(T.init));
             enum callableWith = isBitPackableType!(TypeOfBitPacked!(Result));
         }
     }
 }

 /*
     Check if $(D Prefix) is a valid set of predicates
     for $(D Trie) template having $(D Key) as the type of keys.
     This requires all predicates to be callable, take
     single argument of type $(D Key) and return unsigned value.
 */
 template isValidPrefixForTrie(Key, Prefix...)
 {
     import std.meta : allSatisfy;
     enum isValidPrefixForTrie = allSatisfy!(callableWith!Key, Prefix); // TODO: tighten the screws
 }

 /*
     Check if $(D Args) is a set of maximum key value followed by valid predicates
     for $(D Trie) template having $(D Key) as the type of keys.
 */
 template isValidArgsForTrie(Key, Args...)
 {
     static if (Args.length > 1)
     {
         enum isValidArgsForTrie = isValidPrefixForTrie!(Key, Args)
             || (isValidPrefixForTrie!(Key, Args[1..$]) && is(typeof(Args[0]) : Key));
     }
     else
         enum isValidArgsForTrie = isValidPrefixForTrie!Args;
 }

 @property size_t sumOfIntegerTuple(ints...)()
 {
     size_t count=0;
     foreach (v; ints)
         count += v;
     return count;
 }

 /**
     A shorthand for creating a custom multi-level fixed Trie
     from a $(D CodepointSet). $(D sizes) are numbers of bits per level,
     with the most significant bits used first.

     Note: The sum of $(D sizes) must be equal 21.

     See_Also: $(LREF toTrie), which is even simpler.

     Example:
     ---
     {
         import std.stdio;
         auto set = unicode("Number");
         auto trie = codepointSetTrie!(8, 5, 8)(set);
         writeln("Input code points to test:");
         foreach (line; stdin.byLine)
         {
             int count=0;
             foreach (dchar ch; line)
                 if (trie[ch])// is number
                     count++;
             writefln("Contains %d number code points.", count);
         }
     }
     ---
 */
 public template codepointSetTrie(sizes...)
 if (sumOfIntegerTuple!sizes == 21)
 {
     auto codepointSetTrie(Set)(Set set)
         if (isCodepointSet!Set)
     {
         auto builder = TrieBuilder!(bool, dchar, lastDchar+1, GetBitSlicing!(21, sizes))(false);
         foreach (ival; set.byInterval)
             builder.putRange(ival[0], ival[1], true);
         return builder.build();
     }
 }

 /// Type of Trie generated by codepointSetTrie function.
 public template CodepointSetTrie(sizes...)
 if (sumOfIntegerTuple!sizes == 21)
 {
     alias Prefix = GetBitSlicing!(21, sizes);
     alias CodepointSetTrie = typeof(TrieBuilder!(bool, dchar, lastDchar+1, Prefix)(false).build());
 }

 /**
     A slightly more general tool for building fixed $(D Trie)
     for the Unicode data.

     Specifically unlike $(D codepointSetTrie) it's allows creating mappings
     of $(D dchar) to an arbitrary type $(D T).

     Note: Overload taking $(D CodepointSet)s will naturally convert
     only to bool mapping $(D Trie)s.
 */
 public template codepointTrie(T, sizes...)
 if (sumOfIntegerTuple!sizes == 21)
 {
     alias Prefix = GetBitSlicing!(21, sizes);

     static if (is(TypeOfBitPacked!T == bool))
     {
         auto codepointTrie(Set)(in Set set)
             if (isCodepointSet!Set)
         {
             return codepointSetTrie(set);
         }
     }

     auto codepointTrie()(T[dchar] map, T defValue=T.init)
     {
         return buildTrie!(T, dchar, Prefix)(map, defValue);
     }

     // unsorted range of pairs
     auto codepointTrie(R)(R range, T defValue=T.init)
         if (isInputRange!R
             && is(typeof(ElementType!R.init[0]) : T)
             && is(typeof(ElementType!R.init[1]) : dchar))
     {
         // build from unsorted array of pairs
         // TODO: expose index sorting functions for Trie
         return buildTrie!(T, dchar, Prefix)(range, defValue, true);
     }
 }

 @system pure unittest
 {
     import std.algorithm.comparison : max;
     import std.algorithm.searching : count;

     // pick characters from the Greek script
     auto set = unicode.Greek;

     // a user-defined property (or an expensive function)
     // that we want to look up
     static uint luckFactor(dchar ch)
     {
         // here we consider a character lucky
         // if its code point has a lot of identical hex-digits
         // e.g. arabic letter DDAL (\u0688) has a "luck factor" of 2
         ubyte[6] nibbles; // 6 4-bit chunks of code point
         uint value = ch;
         foreach (i; 0 .. 6)
         {
             nibbles[i] = value & 0xF;
             value >>= 4;
         }
         uint luck;
         foreach (n; nibbles)
             luck = cast(uint) max(luck, count(nibbles[], n));
         return luck;
     }

     // only unsigned built-ins are supported at the moment
     alias LuckFactor = BitPacked!(uint, 3);

     // create a temporary associative array (AA)
     LuckFactor[dchar] map;
     foreach (ch; set.byCodepoint)
         map[ch] = LuckFactor(luckFactor(ch));

     // bits per stage are chosen randomly, fell free to optimize
     auto trie = codepointTrie!(LuckFactor, 8, 5, 8)(map);

     // from now on the AA is not needed
     foreach (ch; set.byCodepoint)
         assert(trie[ch] == luckFactor(ch)); // verify
     // CJK is not Greek, thus it has the default value
     assert(trie['\u4444'] == 0);
     // and here is a couple of quite lucky Greek characters:
     // Greek small letter epsilon with dasia
     assert(trie['\u1F11'] == 3);
     // Ancient Greek metretes sign
     assert(trie['\U00010181'] == 3);

 }

 /// Type of Trie as generated by codepointTrie function.
 public template CodepointTrie(T, sizes...)
 if (sumOfIntegerTuple!sizes == 21)
 {
     alias Prefix = GetBitSlicing!(21, sizes);
     alias CodepointTrie = typeof(TrieBuilder!(T, dchar, lastDchar+1, Prefix)(T.init).build());
 }

 package template cmpK0(alias Pred)
 {
     import std.typecons : Tuple;
     static bool cmpK0(Value, Key)
         (Tuple!(Value, Key) a, Tuple!(Value, Key) b)
     {
         return Pred(a[1]) < Pred(b[1]);
     }
 }

 /**
     The most general utility for construction of $(D Trie)s
     short of using $(D TrieBuilder) directly.

     Provides a number of convenience overloads.
     $(D Args) is tuple of maximum key value followed by
     predicates to construct index from key.

     Alternatively if the first argument is not a value convertible to $(D Key)
     then the whole tuple of $(D Args) is treated as predicates
     and the maximum Key is deduced from predicates.
 */
 private template buildTrie(Value, Key, Args...)
 if (isValidArgsForTrie!(Key, Args))
 {
     static if (is(typeof(Args[0]) : Key)) // prefix starts with upper bound on Key
     {
         alias Prefix = Args[1..$];
     }
     else
         alias Prefix = Args;

     alias getIndex = mapTrieIndex!(Prefix);

     // for multi-sort
     template GetComparators(size_t n)
     {
         static if (n > 0)
             alias GetComparators =
                 AliasSeq!(GetComparators!(n-1), cmpK0!(Prefix[n-1]));
         else
             alias GetComparators = AliasSeq!();
     }

     /*
         Build $(D Trie) from a range of a Key-Value pairs,
         assuming it is sorted by Key as defined by the following lambda:
         ------
         (a, b) => mapTrieIndex!(Prefix)(a) < mapTrieIndex!(Prefix)(b)
         ------
         Exception is thrown if it's detected that the above order doesn't hold.

         In other words $(LREF mapTrieIndex) should be a
         monotonically increasing function that maps $(D Key) to an integer.

         See_Also: $(REF sort, std,_algorithm),
         $(REF SortedRange, std,_range),
         $(REF setUnion, std,_algorithm).
     */
     auto buildTrie(Range)(Range range, Value filler=Value.init)
         if (isInputRange!Range && is(typeof(Range.init.front[0]) : Value)
             && is(typeof(Range.init.front[1]) : Key))
     {
         auto builder = TrieBuilder!(Value, Key, Prefix)(filler);
         foreach (v; range)
             builder.putValue(v[1], v[0]);
         return builder.build();
     }

     /*
         If $(D Value) is bool (or BitPacked!(bool, x)) then it's possible
         to build $(D Trie) from a range of open-right intervals of $(D Key)s.
         The requirement  on the ordering of keys (and the behavior on the
         violation of it) is the same as for Key-Value range overload.

         Intervals denote ranges of !$(D filler) i.e. the opposite of filler.
         If no filler provided keys inside of the intervals map to true,
         and $(D filler) is false.
     */
     auto buildTrie(Range)(Range range, Value filler=Value.init)
         if (is(TypeOfBitPacked!Value ==  bool)
             && isInputRange!Range && is(typeof(Range.init.front[0]) : Key)
             && is(typeof(Range.init.front[1]) : Key))
     {
         auto builder = TrieBuilder!(Value, Key, Prefix)(filler);
         foreach (ival; range)
             builder.putRange(ival[0], ival[1], !filler);
         return builder.build();
     }

     auto buildTrie(Range)(Range range, Value filler, bool unsorted)
         if (isInputRange!Range
             && is(typeof(Range.init.front[0]) : Value)
             && is(typeof(Range.init.front[1]) : Key))
     {
         import std.algorithm.sorting : multiSort;
         alias Comps = GetComparators!(Prefix.length);
         if (unsorted)
             multiSort!(Comps)(range);
         return buildTrie(range, filler);
     }

     /*
         If $(D Value) is bool (or BitPacked!(bool, x)) then it's possible
         to build $(D Trie) simply from an input range of $(D Key)s.
         The requirement  on the ordering of keys (and the behavior on the
         violation of it) is the same as for Key-Value range overload.

         Keys found in range denote !$(D filler) i.e. the opposite of filler.
         If no filler provided keys map to true, and $(D filler) is false.
     */
     auto buildTrie(Range)(Range range, Value filler=Value.init)
         if (is(TypeOfBitPacked!Value ==  bool)
             && isInputRange!Range && is(typeof(Range.init.front) : Key))
     {
         auto builder = TrieBuilder!(Value, Key, Prefix)(filler);
         foreach (v; range)
             builder.putValue(v, !filler);
         return builder.build();
     }

     /*
         If $(D Key) is unsigned integer $(D Trie) could be constructed from array
         of values where array index serves as key.
     */
     auto buildTrie()(Value[] array, Value filler=Value.init)
         if (isUnsigned!Key)
     {
         auto builder = TrieBuilder!(Value, Key, Prefix)(filler);
         foreach (idx, v; array)
             builder.putValue(idx, v);
         return builder.build();
     }

     /*
         Builds $(D Trie) from associative array.
     */
     auto buildTrie(Key, Value)(Value[Key] map, Value filler=Value.init)
     {
         import std.array : array;
         import std.range : zip;
         auto range = array(zip(map.values, map.keys));
         return buildTrie(range, filler, true); // sort it
     }
 }

 // helper in place of assumeSize to
 //reduce mangled name & help DMD inline Trie functors
 struct clamp(size_t bits)
 {
     static size_t opCall(T)(T arg){ return arg; }
     enum bitSize = bits;
 }

 struct clampIdx(size_t idx, size_t bits)
 {
     static size_t opCall(T)(T arg){ return arg[idx]; }
     enum bitSize = bits;
 }

 /**
     Conceptual type that outlines the common properties of all UTF Matchers.

     Note: For illustration purposes only, every method
     call results in assertion failure.
     Use $(LREF utfMatcher) to obtain a concrete matcher
     for UTF-8 or UTF-16 encodings.
 */
 public struct MatcherConcept
 {
     /**
         $(P Perform a semantic equivalent 2 operations:
         decoding a $(CODEPOINT) at front of $(D inp) and testing if
         it belongs to the set of $(CODEPOINTS) of this matcher. )

         $(P The effect on $(D inp) depends on the kind of function called:)

         $(P Match. If the codepoint is found in the set then range $(D inp)
         is advanced by its size in $(S_LINK Code unit, code units),
         otherwise the range is not modifed.)

         $(P Skip. The range is always advanced by the size
         of the tested $(CODEPOINT) regardless of the result of test.)

         $(P Test. The range is left unaffected regardless
         of the result of test.)
     */
     public bool match(Range)(ref Range inp)
         if (isRandomAccessRange!Range && is(ElementType!Range : char))
     {
        assert(false);
     }

     ///ditto
     public bool skip(Range)(ref Range inp)
         if (isRandomAccessRange!Range && is(ElementType!Range : char))
     {
         assert(false);
     }

     ///ditto
     public bool test(Range)(ref Range inp)
         if (isRandomAccessRange!Range && is(ElementType!Range : char))
     {
         assert(false);
     }
     ///
     @safe unittest
     {
         string truth = "2² = 4";
         auto m = utfMatcher!char(unicode.Number);
         assert(m.match(truth)); // '2' is a number all right
         assert(truth == "² = 4"); // skips on match
         assert(m.match(truth)); // so is the superscript '2'
         assert(!m.match(truth)); // space is not a number
         assert(truth == " = 4"); // unaffected on no match
         assert(!m.skip(truth)); // same test ...
         assert(truth == "= 4"); // but skips a codepoint regardless
         assert(!m.test(truth)); // '=' is not a number
         assert(truth == "= 4"); // test never affects argument
     }

     /**
         Advanced feature - provide direct access to a subset of matcher based a
         set of known encoding lengths. Lengths are provided in
         $(S_LINK Code unit, code units). The sub-matcher then may do less
         operations per any $(D test)/$(D match).

         Use with care as the sub-matcher won't match
         any $(CODEPOINTS) that have encoded length that doesn't belong
         to the selected set of lengths. Also the sub-matcher object references
         the parent matcher and must not be used past the liftetime
         of the latter.

         Another caveat of using sub-matcher is that skip is not available
         preciesly because sub-matcher doesn't detect all lengths.
     */
     @property auto subMatcher(Lengths...)()
     {
         assert(0);
         return this;
     }

     @safe unittest
     {
         auto m = utfMatcher!char(unicode.Number);
         string square = "2²";
         // about sub-matchers
         assert(!m.subMatcher!(2,3,4).test(square)); // ASCII no covered
         assert(m.subMatcher!1.match(square)); // ASCII-only, works
         assert(!m.subMatcher!1.test(square)); // unicode '²'
         assert(m.subMatcher!(2,3,4).match(square));  //
         assert(square == "");
         wstring wsquare = "2²";
         auto m16 = utfMatcher!wchar(unicode.Number);
         // may keep ref, but the orignal (m16) must be kept alive
         auto bmp = m16.subMatcher!1;
         assert(bmp.match(wsquare)); // Okay, in basic multilingual plan
         assert(bmp.match(wsquare)); // And '²' too
     }
 }

 /**
     Test if $(D M) is an UTF Matcher for ranges of $(D Char).
 */
 public enum isUtfMatcher(M, C) = __traits(compiles, (){
     C[] s;
     auto d = s.decoder;
     M m;
     assert(is(typeof(m.match(d)) == bool));
     assert(is(typeof(m.test(d)) == bool));
     static if (is(typeof(m.skip(d))))
     {
         assert(is(typeof(m.skip(d)) == bool));
         assert(is(typeof(m.skip(s)) == bool));
     }
     assert(is(typeof(m.match(s)) == bool));
     assert(is(typeof(m.test(s)) == bool));
 });

 @safe unittest
 {
     alias CharMatcher = typeof(utfMatcher!char(CodepointSet.init));
     alias WcharMatcher = typeof(utfMatcher!wchar(CodepointSet.init));
     static assert(isUtfMatcher!(CharMatcher, char));
     static assert(isUtfMatcher!(CharMatcher, immutable(char)));
     static assert(isUtfMatcher!(WcharMatcher, wchar));
     static assert(isUtfMatcher!(WcharMatcher, immutable(wchar)));
 }

 enum Mode {
     alwaysSkip,
     neverSkip,
     skipOnMatch
 }

 mixin template ForwardStrings()
 {
     private bool fwdStr(string fn, C)(ref C[] str) const pure
     {
         import std.utf : byCodeUnit;
         alias type = typeof(byCodeUnit(str));
         return mixin(fn~"(*cast(type*)&str)");
     }
 }

 template Utf8Matcher()
 {
     enum validSize(int sz) = sz >= 1 && sz <= 4;

     void badEncoding() pure @safe
     {
         import std.utf : UTFException;
         throw new UTFException("Invalid UTF-8 sequence");
     }

     //for 1-stage ASCII
     alias AsciiSpec = AliasSeq!(bool, char, clamp!7);
     //for 2-stage lookup of 2 byte UTF-8 sequences
     alias Utf8Spec2 = AliasSeq!(bool, char[2],
         clampIdx!(0, 5), clampIdx!(1, 6));
     //ditto for 3 byte
     alias Utf8Spec3 = AliasSeq!(bool, char[3],
         clampIdx!(0, 4),
         clampIdx!(1, 6),
         clampIdx!(2, 6)
     );
     //ditto for 4 byte
     alias Utf8Spec4 = AliasSeq!(bool, char[4],
         clampIdx!(0, 3), clampIdx!(1, 6),
         clampIdx!(2, 6), clampIdx!(3, 6)
     );
     alias Tables = AliasSeq!(
         typeof(TrieBuilder!(AsciiSpec)(false).build()),
         typeof(TrieBuilder!(Utf8Spec2)(false).build()),
         typeof(TrieBuilder!(Utf8Spec3)(false).build()),
         typeof(TrieBuilder!(Utf8Spec4)(false).build())
     );
     alias Table(int size) = Tables[size-1];

     enum leadMask(size_t size) = (cast(size_t) 1<<(7 - size))-1;
     enum encMask(size_t size) = ((1 << size)-1)<<(8-size);

     char truncate()(char ch) pure @safe
     {
         ch -= 0x80;
         if (ch < 0x40)
         {
             return ch;
         }
         else
         {
             badEncoding();
             return cast(char) 0;
         }
     }

     static auto encode(size_t sz)(dchar ch)
         if (sz > 1)
     {
         import std.utf : encodeUTF = encode;
         char[4] buf;
         encodeUTF(buf, ch);
         char[sz] ret;
         buf[0] &= leadMask!sz;
         foreach (n; 1 .. sz)
             buf[n] = buf[n] & 0x3f; //keep 6 lower bits
         ret[] = buf[0 .. sz];
         return ret;
     }

     auto build(Set)(Set set)
     {
         import std.algorithm.iteration : map;
         auto ascii = set & unicode.ASCII;
         auto utf8_2 = set & CodepointSet(0x80, 0x800);
         auto utf8_3 = set & CodepointSet(0x800, 0x1_0000);
         auto utf8_4 = set & CodepointSet(0x1_0000, lastDchar+1);
         auto asciiT = ascii.byCodepoint.map!(x=>cast(char) x).buildTrie!(AsciiSpec);
         auto utf8_2T = utf8_2.byCodepoint.map!(x=>encode!2(x)).buildTrie!(Utf8Spec2);
         auto utf8_3T = utf8_3.byCodepoint.map!(x=>encode!3(x)).buildTrie!(Utf8Spec3);
         auto utf8_4T = utf8_4.byCodepoint.map!(x=>encode!4(x)).buildTrie!(Utf8Spec4);
         alias Ret = Impl!(1,2,3,4);
         return Ret(asciiT, utf8_2T, utf8_3T, utf8_4T);
     }

     // Bootstrap UTF-8 static matcher interface
     // from 3 primitives: tab!(size), lookup and Sizes
     mixin template DefMatcher()
     {
         import std.format : format;
         import std.meta : Erase, staticIndexOf;
         enum hasASCII = staticIndexOf!(1, Sizes) >= 0;
         alias UniSizes = Erase!(1, Sizes);

         //generate dispatch code sequence for unicode parts
         static auto genDispatch()
         {
             string code;
             foreach (size; UniSizes)
                 code ~= format(q{
                     if ((ch & ~leadMask!%d) == encMask!(%d))
                         return lookup!(%d, mode)(inp);
                     else
                 }, size, size, size);
             static if (Sizes.length == 4) //covers all code unit cases
                 code ~= "{ badEncoding(); return false; }";
             else
                 code ~= "return false;"; //may be just fine but not covered
             return code;
         }
         enum dispatch = genDispatch();

         public bool match(Range)(ref Range inp) const pure
             if (isRandomAccessRange!Range && is(ElementType!Range : char))
         {
             enum mode = Mode.skipOnMatch;
             assert(!inp.empty);
             immutable ch = inp[0];
             static if (hasASCII)
             {
                 if (ch < 0x80)
                 {
                     immutable r = tab!1[ch];
                     if (r)
                         inp.popFront();
                     return r;
                 }
                 else
                     mixin(dispatch);
             }
             else
                 mixin(dispatch);
         }

         static if (Sizes.length == 4) // can skip iff can detect all encodings
         {
             public bool skip(Range)(ref Range inp) const pure @trusted
                 if (isRandomAccessRange!Range && is(ElementType!Range : char))
             {
                 enum mode = Mode.alwaysSkip;
                 assert(!inp.empty);
                 auto ch = inp[0];
                 static if (hasASCII)
                 {
                     if (ch < 0x80)
                     {
                         inp.popFront();
                         return tab!1[ch];
                     }
                     else
                         mixin(dispatch);
                 }
                 else
                     mixin(dispatch);
             }
         }

         public bool test(Range)(ref Range inp) const pure @trusted
             if (isRandomAccessRange!Range && is(ElementType!Range : char))
         {
             enum mode = Mode.neverSkip;
             assert(!inp.empty);
             auto ch = inp[0];
             static if (hasASCII)
             {
                 if (ch < 0x80)
                     return tab!1[ch];
                 else
                     mixin(dispatch);
             }
             else
                 mixin(dispatch);
         }

         bool match(C)(ref C[] str) const pure @trusted
             if (isSomeChar!C)
         {
             return fwdStr!"match"(str);
         }

         bool skip(C)(ref C[] str) const pure @trusted
             if (isSomeChar!C)
         {
             return fwdStr!"skip"(str);
         }

         bool test(C)(ref C[] str) const pure @trusted
             if (isSomeChar!C)
         {
             return fwdStr!"test"(str);
         }

         mixin ForwardStrings;
     }

     struct Impl(Sizes...)
     {
         import std.meta : allSatisfy, staticMap;
         static assert(allSatisfy!(validSize, Sizes),
             "Only lengths of 1, 2, 3 and 4 code unit are possible for UTF-8");
     private:
         //pick tables for chosen sizes
         alias OurTabs = staticMap!(Table, Sizes);
         OurTabs tables;
         mixin DefMatcher;
         //static disptach helper UTF size ==> table
         alias tab(int i) = tables[i - 1];

         package @property auto subMatcher(SizesToPick...)() @trusted
         {
             return CherryPick!(Impl, SizesToPick)(&this);
         }

         bool lookup(int size, Mode mode, Range)(ref Range inp) const pure @trusted
         {
             import std.typecons : staticIota;
             if (inp.length < size)
             {
                 badEncoding();
                 return false;
             }
             char[size] needle = void;
             needle[0] = leadMask!size & inp[0];
             foreach (i; staticIota!(1, size))
             {
                 needle[i] = truncate(inp[i]);
             }
             //overlong encoding checks
             static if (size == 2)
             {
                 //0x80-0x7FF
                 //got 6 bits in needle[1], must use at least 8 bits
                 //must use at least 2 bits in needle[1]
                 if (needle[0] < 2) badEncoding();
             }
             else static if (size == 3)
             {
                 //0x800-0xFFFF
                 //got 6 bits in needle[2], must use at least 12bits
                 //must use 6 bits in needle[1] or anything in needle[0]
                 if (needle[0] == 0 && needle[1] < 0x20) badEncoding();
             }
             else static if (size == 4)
             {
                 //0x800-0xFFFF
                 //got 2x6=12 bits in needle[2 .. 3] must use at least 17bits
                 //must use 5 bits (or above) in needle[1] or anything in needle[0]
                 if (needle[0] == 0 && needle[1] < 0x10) badEncoding();
             }
             static if (mode == Mode.alwaysSkip)
             {
                 inp.popFrontN(size);
                 return tab!size[needle];
             }
             else static if (mode == Mode.neverSkip)
             {
                 return tab!size[needle];
             }
             else
             {
                 static assert(mode == Mode.skipOnMatch);
                 if (tab!size[needle])
                 {
                     inp.popFrontN(size);
                     return true;
                 }
                 else
                     return false;
             }
         }
     }

     struct CherryPick(I, Sizes...)
     {
         import std.meta : allSatisfy;
         static assert(allSatisfy!(validSize, Sizes),
             "Only lengths of 1, 2, 3 and 4 code unit are possible for UTF-8");
     private:
         I* m;
         @property ref tab(int i)() const pure { return m.tables[i - 1]; }
         bool lookup(int size, Mode mode, Range)(ref Range inp) const pure
         {
             return m.lookup!(size, mode)(inp);
         }
         mixin DefMatcher;
     }
 }

 template Utf16Matcher()
 {
     enum validSize(int sz) = sz >= 1 && sz <= 2;

     void badEncoding() pure
     {
         import std.utf : UTFException;
         throw new UTFException("Invalid UTF-16 sequence");
     }

     // 1-stage ASCII
     alias AsciiSpec = AliasSeq!(bool, wchar, clamp!7);
     //2-stage BMP
     alias BmpSpec = AliasSeq!(bool, wchar, sliceBits!(7, 16), sliceBits!(0, 7));
     //4-stage - full Unicode
     //assume that 0xD800 & 0xDC00 bits are cleared
     //thus leaving 10 bit per wchar to worry about
     alias UniSpec = AliasSeq!(bool, wchar[2],
         assumeSize!(x=>x[0]>>4, 6), assumeSize!(x=>x[0]&0xf, 4),
         assumeSize!(x=>x[1]>>6, 4), assumeSize!(x=>x[1]&0x3f, 6),
     );
     alias Ascii = typeof(TrieBuilder!(AsciiSpec)(false).build());
     alias Bmp = typeof(TrieBuilder!(BmpSpec)(false).build());
     alias Uni = typeof(TrieBuilder!(UniSpec)(false).build());

     auto encode2(dchar ch)
     {
         ch -= 0x1_0000;
         assert(ch <= 0xF_FFFF);
         wchar[2] ret;
         //do not put surrogate bits, they are sliced off
         ret[0] = cast(wchar)(ch >> 10);
         ret[1] = (ch & 0xFFF);
         return ret;
     }

     auto build(Set)(Set set)
     {
         import std.algorithm.iteration : map;
         auto ascii = set & unicode.ASCII;
         auto bmp = (set & CodepointSet.fromIntervals(0x80, 0xFFFF+1))
             - CodepointSet.fromIntervals(0xD800, 0xDFFF+1);
         auto other = set - (bmp | ascii);
         auto asciiT = ascii.byCodepoint.map!(x=>cast(char) x).buildTrie!(AsciiSpec);
         auto bmpT = bmp.byCodepoint.map!(x=>cast(wchar) x).buildTrie!(BmpSpec);
         auto otherT = other.byCodepoint.map!(x=>encode2(x)).buildTrie!(UniSpec);
         alias Ret = Impl!(1,2);
         return Ret(asciiT, bmpT, otherT);
     }

     //bootstrap full UTF-16 matcher interace from
     //sizeFlags, lookupUni and ascii
     mixin template DefMatcher()
     {
         public bool match(Range)(ref Range inp) const pure @trusted
             if (isRandomAccessRange!Range && is(ElementType!Range : wchar))
         {
             enum mode = Mode.skipOnMatch;
             assert(!inp.empty);
             immutable ch = inp[0];
             static if (sizeFlags & 1)
             {
                 if (ch < 0x80)
                 {
                   if (ascii[ch])
                   {
                       inp.popFront();
                       return true;
                   }
                   else
                       return false;
                 }
                 return lookupUni!mode(inp);
             }
             else
                 return lookupUni!mode(inp);
         }

         static if (Sizes.length == 2)
         {
             public bool skip(Range)(ref Range inp) const pure @trusted
                 if (isRandomAccessRange!Range && is(ElementType!Range : wchar))
             {
                 enum mode = Mode.alwaysSkip;
                 assert(!inp.empty);
                 immutable ch = inp[0];
                 static if (sizeFlags & 1)
                 {
                     if (ch < 0x80)
                     {
                         inp.popFront();
                         return ascii[ch];
                     }
                     else
                         return lookupUni!mode(inp);
                 }
                 else
                     return lookupUni!mode(inp);
             }
         }

         public bool test(Range)(ref Range inp) const pure @trusted
             if (isRandomAccessRange!Range && is(ElementType!Range : wchar))
         {
             enum mode = Mode.neverSkip;
             assert(!inp.empty);
             auto ch = inp[0];
             static if (sizeFlags & 1)
                 return ch < 0x80 ? ascii[ch] : lookupUni!mode(inp);
             else
                 return lookupUni!mode(inp);
         }

         bool match(C)(ref C[] str) const pure @trusted
             if (isSomeChar!C)
         {
             return fwdStr!"match"(str);
         }

         bool skip(C)(ref C[] str) const pure @trusted
             if (isSomeChar!C)
         {
             return fwdStr!"skip"(str);
         }

         bool test(C)(ref C[] str) const pure @trusted
             if (isSomeChar!C)
         {
             return fwdStr!"test"(str);
         }

         mixin ForwardStrings; //dispatch strings to range versions
     }

     struct Impl(Sizes...)
         if (Sizes.length >= 1 && Sizes.length <= 2)
     {
     private:
         import std.meta : allSatisfy;
         static assert(allSatisfy!(validSize, Sizes),
             "Only lengths of 1 and 2 code units are possible in UTF-16");
         static if (Sizes.length > 1)
             enum sizeFlags = Sizes[0] | Sizes[1];
         else
             enum sizeFlags = Sizes[0];

         static if (sizeFlags & 1)
         {
             Ascii ascii;
             Bmp bmp;
         }
         static if (sizeFlags & 2)
         {
             Uni uni;
         }
         mixin DefMatcher;

         package @property auto subMatcher(SizesToPick...)() @trusted
         {
             return CherryPick!(Impl, SizesToPick)(&this);
         }

         bool lookupUni(Mode mode, Range)(ref Range inp) const pure
         {
             wchar x = cast(wchar)(inp[0] - 0xD800);
             //not a high surrogate
             if (x > 0x3FF)
             {
                 //low surrogate
                 if (x <= 0x7FF) badEncoding();
                 static if (sizeFlags & 1)
                 {
                     auto ch = inp[0];
                     static if (mode == Mode.alwaysSkip)
                         inp.popFront();
                     static if (mode == Mode.skipOnMatch)
                     {
                         if (bmp[ch])
                         {
                             inp.popFront();
                             return true;
                         }
                         else
                             return false;
                     }
                     else
                         return bmp[ch];
                 }
                 else //skip is not available for sub-matchers, so just false
                     return false;
             }
             else
             {
                 static if (sizeFlags & 2)
                 {
                     if (inp.length < 2)
                         badEncoding();
                     wchar y = cast(wchar)(inp[1] - 0xDC00);
                     //not a low surrogate
                     if (y > 0x3FF)
                         badEncoding();
                     wchar[2] needle = [inp[0] & 0x3ff, inp[1] & 0x3ff];
                     static if (mode == Mode.alwaysSkip)
                         inp.popFrontN(2);
                     static if (mode == Mode.skipOnMatch)
                     {
                         if (uni[needle])
                         {
                             inp.popFrontN(2);
                             return true;
                         }
                         else
                             return false;
                     }
                     else
                         return uni[needle];
                 }
                 else //ditto
                     return false;
             }
         }
     }

     struct CherryPick(I, Sizes...)
         if (Sizes.length >= 1 && Sizes.length <= 2)
     {
     private:
         import std.meta : allSatisfy;
         I* m;
         enum sizeFlags = I.sizeFlags;

         static if (sizeFlags & 1)
         {
             @property ref ascii()() const pure{ return m.ascii; }
         }

         bool lookupUni(Mode mode, Range)(ref Range inp) const pure
         {
             return m.lookupUni!mode(inp);
         }
         mixin DefMatcher;
         static assert(allSatisfy!(validSize, Sizes),
             "Only lengths of 1 and 2 code units are possible in UTF-16");
     }
 }

 private auto utf8Matcher(Set)(Set set) @trusted
 {
     return Utf8Matcher!().build(set);
 }

 private auto utf16Matcher(Set)(Set set) @trusted
 {
     return Utf16Matcher!().build(set);
 }

 /**
     Constructs a matcher object
     to classify $(CODEPOINTS) from the $(D set) for encoding
     that has $(D Char) as code unit.

     See $(LREF MatcherConcept) for API outline.
 */
 public auto utfMatcher(Char, Set)(Set set) @trusted
 if (isCodepointSet!Set)
 {
     static if (is(Char : char))
         return utf8Matcher(set);
     else static if (is(Char : wchar))
         return utf16Matcher(set);
     else static if (is(Char : dchar))
         static assert(false, "UTF-32 needs no decoding,
             and thus not supported by utfMatcher");
     else
         static assert(false, "Only character types 'char' and 'wchar' are allowed");
 }


 //a range of code units, packed with index to speed up forward iteration
 package auto decoder(C)(C[] s, size_t offset=0) @safe pure nothrow @nogc
 if (is(C : wchar) || is(C : char))
 {
     static struct Decoder
     {
     pure nothrow:
         C[] str;
         size_t idx;
         @property C front(){ return str[idx]; }
         @property C back(){ return str[$-1]; }
         void popFront(){ idx++; }
         void popBack(){ str = str[0..$-1]; }
         void popFrontN(size_t n){ idx += n; }
         @property bool empty(){ return idx == str.length; }
         @property auto save(){ return this; }
         auto opIndex(size_t i){ return str[idx+i]; }
         @property size_t length(){ return str.length - idx; }
         alias opDollar = length;
         auto opSlice(size_t a, size_t b){ return Decoder(str[0 .. idx+b], idx+a); }
     }
     static assert(isRandomAccessRange!Decoder);
     static assert(is(ElementType!Decoder : C));
     return Decoder(s, offset);
 }

 @safe unittest
 {
     string rs = "hi! ﾈемног砀 текста";
     auto codec = rs.decoder;
     auto utf8 =  utf8Matcher(unicode.Letter);
     auto asc = utf8.subMatcher!(1);
     auto uni = utf8.subMatcher!(2,3,4);
     assert(asc.test(codec));
     assert(!uni.match(codec));
     assert(utf8.skip(codec));
     assert(codec.idx == 1);

     assert(!uni.match(codec));
     assert(asc.test(codec));
     assert(utf8.skip(codec));
     assert(codec.idx == 2);
     assert(!asc.match(codec));

     assert(!utf8.test(codec));
     assert(!utf8.skip(codec));

     assert(!asc.test(codec));
     assert(!utf8.test(codec));
     assert(!utf8.skip(codec));
     assert(utf8.test(codec));
     foreach (i; 0 .. 7)
     {
         assert(!asc.test(codec));
         assert(uni.test(codec));
         assert(utf8.skip(codec));
     }
     assert(!utf8.test(codec));
     assert(!utf8.skip(codec));
     //the same with match where applicable
     codec = rs.decoder;
     assert(utf8.match(codec));
     assert(codec.idx == 1);
     assert(utf8.match(codec));
     assert(codec.idx == 2);
     assert(!utf8.match(codec));
     assert(codec.idx == 2);
     assert(!utf8.skip(codec));
     assert(!utf8.skip(codec));

     foreach (i; 0 .. 7)
     {
         assert(!asc.test(codec));
         assert(utf8.test(codec));
         assert(utf8.match(codec));
     }
     auto i = codec.idx;
     assert(!utf8.match(codec));
     assert(codec.idx == i);
 }

 @safe unittest
 {
     import std.range : stride;
     static bool testAll(Matcher, Range)(ref Matcher m, ref Range r)
     {
         bool t = m.test(r);
         auto save = r.idx;
         assert(t == m.match(r));
         assert(r.idx == save || t); //ether no change or was match
         r.idx = save;
         static if (is(typeof(m.skip(r))))
         {
             assert(t == m.skip(r));
             assert(r.idx != save); //always changed
             r.idx = save;
         }
         return t;
     }
     auto utf16 = utfMatcher!wchar(unicode.L);
     auto bmp = utf16.subMatcher!1;
     auto nonBmp = utf16.subMatcher!1;
     auto utf8 = utfMatcher!char(unicode.L);
     auto ascii = utf8.subMatcher!1;
     auto uni2 = utf8.subMatcher!2;
     auto uni3 = utf8.subMatcher!3;
     auto uni24 = utf8.subMatcher!(2,4);
     foreach (ch; unicode.L.byCodepoint.stride(3))
     {
         import std.utf : encode;
         char[4] buf;
         wchar[2] buf16;
         auto len = encode(buf, ch);
         auto len16 = encode(buf16, ch);
         auto c8 = buf[0 .. len].decoder;
         auto c16 = buf16[0 .. len16].decoder;
         assert(testAll(utf16, c16));
         assert(testAll(bmp, c16) || len16 != 1);
         assert(testAll(nonBmp, c16) || len16 != 2);

         assert(testAll(utf8, c8));

         //submatchers return false on out of their domain
         assert(testAll(ascii, c8) || len != 1);
         assert(testAll(uni2, c8) || len != 2);
         assert(testAll(uni3, c8) || len != 3);
         assert(testAll(uni24, c8) || (len != 2 && len != 4));
     }
 }

 // cover decode fail cases of Matcher
 @system unittest
 {
     import std.algorithm.iteration : map;
     import std.exception : collectException;
     import std.format : format;
     auto utf16 = utfMatcher!wchar(unicode.L);
     auto utf8 = utfMatcher!char(unicode.L);
     //decode failure cases UTF-8
     alias fails8 = AliasSeq!("\xC1", "\x80\x00","\xC0\x00", "\xCF\x79",
         "\xFF\x00\0x00\0x00\x00", "\xC0\0x80\0x80\x80", "\x80\0x00\0x00\x00",
         "\xCF\x00\0x00\0x00\x00");
     foreach (msg; fails8)
     {
         assert(collectException((){
             auto s = msg;
             size_t idx = 0;
             utf8.test(s);
         }()), format("%( %2x %)", cast(ubyte[]) msg));
     }
     //decode failure cases UTF-16
     alias fails16 = AliasSeq!([0xD811], [0xDC02]);
     foreach (msg; fails16)
     {
         assert(collectException((){
             auto s = msg.map!(x => cast(wchar) x);
             utf16.test(s);
         }()));
     }
 }

 /++
     Convenience function to construct optimal configurations for
     packed Trie from any $(D set) of $(CODEPOINTS).

     The parameter $(D level) indicates the number of trie levels to use,
     allowed values are: 1, 2, 3 or 4. Levels represent different trade-offs
     speed-size wise.

     $(P Level 1 is fastest and the most memory hungry (a bit array). )
     $(P Level 4 is the slowest and has the smallest footprint. )

     See the $(S_LINK Synopsis, Synopsis) section for example.

     Note:
     Level 4 stays very practical (being faster and more predictable)
     compared to using direct lookup on the $(D set) itself.


 +/
 public auto toTrie(size_t level, Set)(Set set)
 if (isCodepointSet!Set)
 {
     static if (level == 1)
         return codepointSetTrie!(21)(set);
     else static if (level == 2)
         return codepointSetTrie!(10, 11)(set);
     else static if (level == 3)
         return codepointSetTrie!(8, 5, 8)(set);
     else static if (level == 4)
          return codepointSetTrie!(6, 4, 4, 7)(set);
     else
         static assert(false,
             "Sorry, toTrie doesn't support levels > 4, use codepointSetTrie directly");
 }

 /**
     $(P Builds a $(D Trie) with typically optimal speed-size trade-off
     and wraps it into a delegate of the following type:
     $(D bool delegate(dchar ch)). )

     $(P Effectively this creates a 'tester' lambda suitable
     for algorithms like std.algorithm.find that take unary predicates. )

     See the $(S_LINK Synopsis, Synopsis) section for example.
 */
 public auto toDelegate(Set)(Set set)
 if (isCodepointSet!Set)
 {
     // 3 is very small and is almost as fast as 2-level (due to CPU caches?)
     auto t = toTrie!3(set);
     return (dchar ch) => t[ch];
 }

 /**
     $(P Opaque wrapper around unsigned built-in integers and
     code unit (char/wchar/dchar) types.
     Parameter $(D sz) indicates that the value is confined
     to the range of [0, 2^^sz$(RPAREN). With this knowledge it can be
     packed more tightly when stored in certain
     data-structures like trie. )

     Note:
     $(P The $(D BitPacked!(T, sz)) is implicitly convertible to $(D T)
     but not vise-versa. Users have to ensure the value fits in
     the range required and use the $(D cast)
     operator to perform the conversion.)
 */
 struct BitPacked(T, size_t sz)
 if (isIntegral!T || is(T:dchar))
 {
     enum bitSize = sz;
     T _value;
     alias _value this;
 }

 /*
     Depending on the form of the passed argument $(D bitSizeOf) returns
     the amount of bits required to represent a given type
     or a return type of a given functor.
 */
 template bitSizeOf(Args...)
 if (Args.length == 1)
 {
     import std.traits : ReturnType;
     alias T = Args[0];
     static if (__traits(compiles, { size_t val = T.bitSize; })) //(is(typeof(T.bitSize) : size_t))
     {
         enum bitSizeOf = T.bitSize;
     }
     else static if (is(ReturnType!T dummy == BitPacked!(U, bits), U, size_t bits))
     {
         enum bitSizeOf = bitSizeOf!(ReturnType!T);
     }
     else
     {
         enum bitSizeOf = T.sizeof*8;
     }
 }

 /**
     Tests if $(D T) is some instantiation of $(LREF BitPacked)!(U, x)
     and thus suitable for packing.
 */
 template isBitPacked(T)
 {
     static if (is(T dummy == BitPacked!(U, bits), U, size_t bits))
         enum isBitPacked = true;
     else
         enum isBitPacked = false;
 }

 /**
     Gives the type $(D U) from $(LREF BitPacked)!(U, x)
     or $(D T) itself for every other type.
 */
 template TypeOfBitPacked(T)
 {
     static if (is(T dummy == BitPacked!(U, bits), U, size_t bits))
         alias TypeOfBitPacked = U;
     else
         alias TypeOfBitPacked = T;
 }

 /*
     Wrapper, used in definition of custom data structures from $(D Trie) template.
     Applying it to a unary lambda function indicates that the returned value always
     fits within $(D bits) of bits.
 */
 struct assumeSize(alias Fn, size_t bits)
 {
     enum bitSize = bits;
     static auto ref opCall(T)(auto ref T arg)
     {
         return Fn(arg);
     }
 }

 /*
     A helper for defining lambda function that yields a slice
     of certain bits from an unsigned integral value.
     The resulting lambda is wrapped in assumeSize and can be used directly
     with $(D Trie) template.
 */
 struct sliceBits(size_t from, size_t to)
 {
     //for now bypass assumeSize, DMD has trouble inlining it
     enum bitSize = to-from;
     static auto opCall(T)(T x)
     out(result)
     {
         assert(result < (1 << to-from));
     }
     body
     {
         static assert(from < to);
         static if (from == 0)
             return x & ((1 << to)-1);
         else
         return (x >> from) & ((1<<(to-from))-1);
     }
 }

 @safe pure nothrow @nogc uint low_8(uint x) { return x&0xFF; }
 @safe pure nothrow @nogc uint midlow_8(uint x){ return (x&0xFF00)>>8; }
 alias lo8 = assumeSize!(low_8, 8);
 alias mlo8 = assumeSize!(midlow_8, 8);

 static assert(bitSizeOf!lo8 == 8);
 static assert(bitSizeOf!(sliceBits!(4, 7)) == 3);
 static assert(bitSizeOf!(BitPacked!(uint, 2)) == 2);

 template Sequence(size_t start, size_t end)
 {
     static if (start < end)
         alias Sequence = AliasSeq!(start, Sequence!(start+1, end));
     else
         alias Sequence = AliasSeq!();
 }

 //---- TRIE TESTS ----
 @system unittest
 {
     import std.algorithm.iteration : map;
     import std.algorithm.sorting : sort;
     import std.array : array;
     import std.conv : text, to;
     import std.range : iota;
     static trieStats(TRIE)(TRIE t)
     {
         version (std_uni_stats)
         {
             import std.stdio : writefln, writeln;
             writeln("---TRIE FOOTPRINT STATS---");
             foreach (i; staticIota!(0, t.table.dim) )
             {
                 writefln("lvl%s = %s bytes;  %s pages"
                          , i, t.bytes!i, t.pages!i);
             }
             writefln("TOTAL: %s bytes", t.bytes);
             version (none)
             {
                 writeln("INDEX (excluding value level):");
                 foreach (i; staticIota!(0, t.table.dim-1) )
                     writeln(t.table.slice!(i)[0 .. t.table.length!i]);
             }
             writeln("---------------------------");
         }
     }
     //@@@BUG link failure, lambdas not found by linker somehow (in case of trie2)
     // alias lo8   = assumeSize!(8, function (uint x) { return x&0xFF; });
     // alias next8 = assumeSize!(7, function (uint x) { return (x&0x7F00)>>8; });
     alias Set = CodepointSet;
     auto set = Set('A','Z','a','z');
     auto trie = buildTrie!(bool, uint, 256, lo8)(set.byInterval);// simple bool array
     for (int a='a'; a<'z';a++)
         assert(trie[a]);
     for (int a='A'; a<'Z';a++)
         assert(trie[a]);
     for (int a=0; a<'A'; a++)
         assert(!trie[a]);
     for (int a ='Z'; a<'a'; a++)
         assert(!trie[a]);
     trieStats(trie);

     auto redundant2 = Set(
         1, 18, 256+2, 256+111, 512+1, 512+18, 768+2, 768+111);
     auto trie2 = buildTrie!(bool, uint, 1024, mlo8, lo8)(redundant2.byInterval);
     trieStats(trie2);
     foreach (e; redundant2.byCodepoint)
         assert(trie2[e], text(cast(uint) e, " - ", trie2[e]));
     foreach (i; 0 .. 1024)
     {
         assert(trie2[i] == (i in redundant2));
     }


     auto redundant3 = Set(
           2,    4,    6,    8,    16,
        2+16, 4+16, 16+6, 16+8, 16+16,
        2+32, 4+32, 32+6, 32+8,
       );

     enum max3 = 256;
     // sliceBits
     auto trie3 = buildTrie!(bool, uint, max3,
             sliceBits!(6,8), sliceBits!(4,6), sliceBits!(0,4)
         )(redundant3.byInterval);
     trieStats(trie3);
     foreach (i; 0 .. max3)
         assert(trie3[i] == (i in redundant3), text(cast(uint) i));

     auto redundant4 = Set(
             10, 64, 64+10, 128, 128+10, 256, 256+10, 512,
             1000, 2000, 3000, 4000, 5000, 6000
         );
     enum max4 = 2^^16;
     auto trie4 = buildTrie!(bool, size_t, max4,
             sliceBits!(13, 16), sliceBits!(9, 13), sliceBits!(6, 9) , sliceBits!(0, 6)
         )(redundant4.byInterval);
     foreach (i; 0 .. max4)
     {
         if (i in redundant4)
             assert(trie4[i], text(cast(uint) i));
     }
     trieStats(trie4);

         alias mapToS = mapTrieIndex!(useItemAt!(0, char));
         string[] redundantS = ["tea", "start", "orange"];
         redundantS.sort!((a,b) => mapToS(a) < mapToS(b))();
         auto strie = buildTrie!(bool, string, useItemAt!(0, char))(redundantS);
         // using first char only
         assert(redundantS == ["orange", "start", "tea"]);
         assert(strie["test"], text(strie["test"]));
         assert(!strie["aea"]);
         assert(strie["s"]);

     // a bit size test
     auto a = array(map!(x => to!ubyte(x))(iota(0, 256)));
     auto bt = buildTrie!(bool, ubyte, sliceBits!(7, 8), sliceBits!(5, 7), sliceBits!(0, 5))(a);
     trieStats(bt);
     foreach (i; 0 .. 256)
         assert(bt[cast(ubyte) i]);
 }

 template useItemAt(size_t idx, T)
 if (isIntegral!T || is(T: dchar))
 {
     size_t impl(in T[] arr){ return arr[idx]; }
     alias useItemAt = assumeSize!(impl, 8*T.sizeof);
 }

 template useLastItem(T)
 {
     size_t impl(in T[] arr){ return arr[$-1]; }
     alias useLastItem = assumeSize!(impl, 8*T.sizeof);
 }

 template fullBitSize(Prefix...)
 {
     static if (Prefix.length > 0)
         enum fullBitSize = bitSizeOf!(Prefix[0])+fullBitSize!(Prefix[1..$]);
     else
         enum fullBitSize = 0;
 }

 template idxTypes(Key, size_t fullBits, Prefix...)
 {
     static if (Prefix.length == 1)
     {// the last level is value level, so no index once reduced to 1-level
         alias idxTypes = AliasSeq!();
     }
     else
     {
         // Important note on bit packing
         // Each level has to hold enough of bits to address the next one
         // The bottom level is known to hold full bit width
         // thus it's size in pages is full_bit_width - size_of_last_prefix
         // Recourse on this notion
         alias idxTypes =
             AliasSeq!(
                 idxTypes!(Key, fullBits - bitSizeOf!(Prefix[$-1]), Prefix[0..$-1]),
                 BitPacked!(typeof(Prefix[$-2](Key.init)), fullBits - bitSizeOf!(Prefix[$-1]))
             );
     }
 }

 //============================================================================

 @safe pure int comparePropertyName(Char1, Char2)(const(Char1)[] a, const(Char2)[] b)
 if (is(Char1 : dchar) && is(Char2 : dchar))
 {
     import std.algorithm.comparison : cmp;
     import std.algorithm.iteration : map, filter;
     import std.ascii : toLower;
     static bool pred(dchar c) {return !c.isWhite && c != '-' && c != '_';}
     return cmp(
         a.map!toLower.filter!pred,
         b.map!toLower.filter!pred);
 }

 @safe pure unittest
 {
     assert(!comparePropertyName("foo-bar", "fooBar"));
 }

 bool propertyNameLess(Char1, Char2)(const(Char1)[] a, const(Char2)[] b) @safe pure
 if (is(Char1 : dchar) && is(Char2 : dchar))
 {
     return comparePropertyName(a, b) < 0;
 }

 //============================================================================
 // Utilities for compression of Unicode code point sets
 //============================================================================

 @safe void compressTo(uint val, ref ubyte[] arr) pure nothrow
 {
     // not optimized as usually done 1 time (and not public interface)
     if (val < 128)
         arr ~= cast(ubyte) val;
     else if (val < (1 << 13))
     {
         arr ~= (0b1_00 << 5) | cast(ubyte)(val >> 8);
         arr ~= val & 0xFF;
     }
     else
     {
         assert(val < (1 << 21));
         arr ~= (0b1_01 << 5) | cast(ubyte)(val >> 16);
         arr ~= (val >> 8) & 0xFF;
         arr ~= val  & 0xFF;
     }
 }

 @safe uint decompressFrom(const(ubyte)[] arr, ref size_t idx) pure
 {
     import std.exception : enforce;
     immutable first = arr[idx++];
     if (!(first & 0x80)) // no top bit -> [0 .. 127]
         return first;
     immutable extra = ((first >> 5) & 1) + 1; // [1, 2]
     uint val = (first & 0x1F);
     enforce(idx + extra <= arr.length, "bad code point interval encoding");
     foreach (j; 0 .. extra)
         val = (val << 8) | arr[idx+j];
     idx += extra;
     return val;
 }


 package ubyte[] compressIntervals(Range)(Range intervals)
 if (isInputRange!Range && isIntegralPair!(ElementType!Range))
 {
     ubyte[] storage;
     uint base = 0;
     // RLE encode
     foreach (val; intervals)
     {
         compressTo(val[0]-base, storage);
         base = val[0];
         if (val[1] != lastDchar+1) // till the end of the domain so don't store it
         {
             compressTo(val[1]-base, storage);
             base = val[1];
         }
     }
     return storage;
 }

 @safe pure unittest
 {
     import std.algorithm.comparison : equal;
     import std.typecons : tuple;

     auto run = [tuple(80, 127), tuple(128, (1 << 10)+128)];
     ubyte[] enc = [cast(ubyte) 80, 47, 1, (0b1_00 << 5) | (1 << 2), 0];
     assert(compressIntervals(run) == enc);
     auto run2 = [tuple(0, (1 << 20)+512+1), tuple((1 << 20)+512+4, lastDchar+1)];
     ubyte[] enc2 = [cast(ubyte) 0, (0b1_01 << 5) | (1 << 4), 2, 1, 3]; // odd length-ed
     assert(compressIntervals(run2) == enc2);
     size_t  idx = 0;
     assert(decompressFrom(enc, idx) == 80);
     assert(decompressFrom(enc, idx) == 47);
     assert(decompressFrom(enc, idx) == 1);
     assert(decompressFrom(enc, idx) == (1 << 10));
     idx = 0;
     assert(decompressFrom(enc2, idx) == 0);
     assert(decompressFrom(enc2, idx) == (1 << 20)+512+1);
     assert(equal(decompressIntervals(compressIntervals(run)), run));
     assert(equal(decompressIntervals(compressIntervals(run2)), run2));
 }

 // Creates a range of $(D CodepointInterval) that lazily decodes compressed data.
 @safe package auto decompressIntervals(const(ubyte)[] data) pure
 {
     return DecompressedIntervals(data);
 }

 @safe struct DecompressedIntervals
 {
 pure:
     const(ubyte)[] _stream;
     size_t _idx;
     CodepointInterval _front;

     this(const(ubyte)[] stream)
     {
         _stream = stream;
         popFront();
     }

     @property CodepointInterval front()
     {
         assert(!empty);
         return _front;
     }

     void popFront()
     {
         if (_idx == _stream.length)
         {
             _idx = size_t.max;
             return;
         }
         uint base = _front[1];
         _front[0] = base + decompressFrom(_stream, _idx);
         if (_idx == _stream.length)// odd length ---> till the end
             _front[1] = lastDchar+1;
         else
         {
             base = _front[0];
             _front[1] = base + decompressFrom(_stream, _idx);
         }
     }

     @property bool empty() const
     {
         return _idx == size_t.max;
     }

     @property DecompressedIntervals save() { return this; }
 }

 static assert(isInputRange!DecompressedIntervals);
 static assert(isForwardRange!DecompressedIntervals);
 //============================================================================

 version (std_uni_bootstrap){}
 else
 {

 // helper for looking up code point sets
 @trusted ptrdiff_t findUnicodeSet(alias table, C)(in C[] name) pure
 {
     import std.algorithm.iteration : map;
     import std.range : assumeSorted;
     auto range = assumeSorted!((a,b) => propertyNameLess(a,b))
         (table.map!"a.name"());
     size_t idx = range.lowerBound(name).length;
     if (idx < range.length && comparePropertyName(range[idx], name) == 0)
         return idx;
     return -1;
 }

 // another one that loads it
 @trusted bool loadUnicodeSet(alias table, Set, C)(in C[] name, ref Set dest) pure
 {
     auto idx = findUnicodeSet!table(name);
     if (idx >= 0)
     {
         dest = Set(asSet(table[idx].compressed));
         return true;
     }
     return false;
 }

 @trusted bool loadProperty(Set=CodepointSet, C)
     (in C[] name, ref Set target) pure
 {
     import std.internal.unicode_tables : uniProps; // generated file
     alias ucmp = comparePropertyName;
     // conjure cumulative properties by hand
     if (ucmp(name, "L") == 0 || ucmp(name, "Letter") == 0)
     {
         target = asSet(uniProps.Lu);
         target |= asSet(uniProps.Ll);
         target |= asSet(uniProps.Lt);
         target |= asSet(uniProps.Lo);
         target |= asSet(uniProps.Lm);
     }
     else if (ucmp(name,"LC") == 0 || ucmp(name,"Cased Letter")==0)
     {
         target = asSet(uniProps.Ll);
         target |= asSet(uniProps.Lu);
         target |= asSet(uniProps.Lt);// Title case
     }
     else if (ucmp(name, "M") == 0 || ucmp(name, "Mark") == 0)
     {
         target = asSet(uniProps.Mn);
         target |= asSet(uniProps.Mc);
         target |= asSet(uniProps.Me);
     }
     else if (ucmp(name, "N") == 0 || ucmp(name, "Number") == 0)
     {
         target = asSet(uniProps.Nd);
         target |= asSet(uniProps.Nl);
         target |= asSet(uniProps.No);
     }
     else if (ucmp(name, "P") == 0 || ucmp(name, "Punctuation") == 0)
     {
         target = asSet(uniProps.Pc);
         target |= asSet(uniProps.Pd);
         target |= asSet(uniProps.Ps);
         target |= asSet(uniProps.Pe);
         target |= asSet(uniProps.Pi);
         target |= asSet(uniProps.Pf);
         target |= asSet(uniProps.Po);
     }
     else if (ucmp(name, "S") == 0 || ucmp(name, "Symbol") == 0)
     {
         target = asSet(uniProps.Sm);
         target |= asSet(uniProps.Sc);
         target |= asSet(uniProps.Sk);
         target |= asSet(uniProps.So);
     }
     else if (ucmp(name, "Z") == 0 || ucmp(name, "Separator") == 0)
     {
         target = asSet(uniProps.Zs);
         target |= asSet(uniProps.Zl);
         target |= asSet(uniProps.Zp);
     }
     else if (ucmp(name, "C") == 0 || ucmp(name, "Other") == 0)
     {
         target = asSet(uniProps.Co);
         target |= asSet(uniProps.Lo);
         target |= asSet(uniProps.No);
         target |= asSet(uniProps.So);
         target |= asSet(uniProps.Po);
     }
     else if (ucmp(name, "graphical") == 0)
     {
         target = asSet(uniProps.Alphabetic);

         target |= asSet(uniProps.Mn);
         target |= asSet(uniProps.Mc);
         target |= asSet(uniProps.Me);

         target |= asSet(uniProps.Nd);
         target |= asSet(uniProps.Nl);
         target |= asSet(uniProps.No);

         target |= asSet(uniProps.Pc);
         target |= asSet(uniProps.Pd);
         target |= asSet(uniProps.Ps);
         target |= asSet(uniProps.Pe);
         target |= asSet(uniProps.Pi);
         target |= asSet(uniProps.Pf);
         target |= asSet(uniProps.Po);

         target |= asSet(uniProps.Zs);

         target |= asSet(uniProps.Sm);
         target |= asSet(uniProps.Sc);
         target |= asSet(uniProps.Sk);
         target |= asSet(uniProps.So);
     }
     else if (ucmp(name, "any") == 0)
         target = Set.fromIntervals(0, 0x110000);
     else if (ucmp(name, "ascii") == 0)
         target = Set.fromIntervals(0, 0x80);
     else
         return loadUnicodeSet!(uniProps.tab)(name, target);
     return true;
 }

 // CTFE-only helper for checking property names at compile-time
 @safe bool isPrettyPropertyName(C)(in C[] name)
 {
     import std.algorithm.searching : find;
     auto names = [
         "L", "Letter",
         "LC", "Cased Letter",
         "M", "Mark",
         "N", "Number",
         "P", "Punctuation",
         "S", "Symbol",
         "Z", "Separator",
         "Graphical",
         "any",
         "ascii"
     ];
     auto x = find!(x => comparePropertyName(x, name) == 0)(names);
     return !x.empty;
 }

 // ditto, CTFE-only, not optimized
 @safe private static bool findSetName(alias table, C)(in C[] name)
 {
     return findUnicodeSet!table(name) >= 0;
 }

 template SetSearcher(alias table, string kind)
 {
     /// Run-time checked search.
     static auto opCall(C)(in C[] name)
         if (is(C : dchar))
     {
         import std.conv : to;
         CodepointSet set;
         if (loadUnicodeSet!table(name, set))
             return set;
         throw new Exception("No unicode set for "~kind~" by name "
             ~name.to!string()~" was found.");
     }
     /// Compile-time checked search.
     static @property auto opDispatch(string name)()
     {
         static if (findSetName!table(name))
         {
             CodepointSet set;
             loadUnicodeSet!table(name, set);
             return set;
         }
         else
             static assert(false, "No unicode set for "~kind~" by name "
                 ~name~" was found.");
     }
 }

 /**
     A single entry point to lookup Unicode $(CODEPOINT) sets by name or alias of
     a block, script or general category.

     It uses well defined standard rules of property name lookup.
     This includes fuzzy matching of names, so that
     'White_Space', 'white-SpAce' and 'whitespace' are all considered equal
     and yield the same set of white space $(CHARACTERS).
 */
 @safe public struct unicode
 {
     /**
         Performs the lookup of set of $(CODEPOINTS)
         with compile-time correctness checking.
         This short-cut version combines 3 searches:
         across blocks, scripts, and common binary properties.

         Note that since scripts and blocks overlap the
         usual trick to disambiguate is used - to get a block use
         $(D unicode.InBlockName), to search a script
         use $(D unicode.ScriptName).

         See_Also: $(LREF block), $(LREF script)
         and (not included in this search) $(LREF hangulSyllableType).
     */

     static @property auto opDispatch(string name)() pure
     {
         static if (findAny(name))
             return loadAny(name);
         else
             static assert(false, "No unicode set by name "~name~" was found.");
     }

     ///
     @safe unittest
     {
         import std.exception : collectException;
         auto ascii = unicode.ASCII;
         assert(ascii['A']);
         assert(ascii['~']);
         assert(!ascii['\u00e0']);
         // matching is case-insensitive
         assert(ascii == unicode.ascII);
         assert(!ascii['à']);
         // underscores, '-' and whitespace in names are ignored too
         auto latin = unicode.in_latin1_Supplement;
         assert(latin['à']);
         assert(!latin['$']);
         // BTW Latin 1 Supplement is a block, hence "In" prefix
         assert(latin == unicode("In Latin 1 Supplement"));
         // run-time look up throws if no such set is found
         assert(collectException(unicode("InCyrilliac")));
     }

     /**
         The same lookup across blocks, scripts, or binary properties,
         but performed at run-time.
         This version is provided for cases where $(D name)
         is not known beforehand; otherwise compile-time
         checked $(LREF opDispatch) is typically a better choice.

         See the $(S_LINK Unicode properties, table of properties) for available
         sets.
     */
     static auto opCall(C)(in C[] name)
         if (is(C : dchar))
     {
         return loadAny(name);
     }

     /**
         Narrows down the search for sets of $(CODEPOINTS) to all Unicode blocks.

         Note:
         Here block names are unambiguous as no scripts are searched
         and thus to search use simply $(D unicode.block.BlockName) notation.

         See $(S_LINK Unicode properties, table of properties) for available sets.
         See_Also: $(S_LINK Unicode properties, table of properties).
     */
     struct block
     {
         import std.internal.unicode_tables : blocks; // generated file
         mixin SetSearcher!(blocks.tab, "block");
     }

     ///
     @safe unittest
     {
         // use .block for explicitness
         assert(unicode.block.Greek_and_Coptic == unicode.InGreek_and_Coptic);
     }

     /**
         Narrows down the search for sets of $(CODEPOINTS) to all Unicode scripts.

         See the $(S_LINK Unicode properties, table of properties) for available
         sets.
     */
     struct script
     {
         import std.internal.unicode_tables : scripts; // generated file
         mixin SetSearcher!(scripts.tab, "script");
     }

     ///
     @safe unittest
     {
         auto arabicScript = unicode.script.arabic;
         auto arabicBlock = unicode.block.arabic;
         // there is an intersection between script and block
         assert(arabicBlock['؁']);
         assert(arabicScript['؁']);
         // but they are different
         assert(arabicBlock != arabicScript);
         assert(arabicBlock == unicode.inArabic);
         assert(arabicScript == unicode.arabic);
     }

     /**
         Fetch a set of $(CODEPOINTS) that have the given hangul syllable type.

         Other non-binary properties (once supported) follow the same
         notation - $(D unicode.propertyName.propertyValue) for compile-time
         checked access and $(D unicode.propertyName(propertyValue))
         for run-time checked one.

         See the $(S_LINK Unicode properties, table of properties) for available
         sets.
     */
     struct hangulSyllableType
     {
         import std.internal.unicode_tables : hangul; // generated file
         mixin SetSearcher!(hangul.tab, "hangul syllable type");
     }

     ///
     @safe unittest
     {
         // L here is syllable type not Letter as in unicode.L short-cut
         auto leadingVowel = unicode.hangulSyllableType("L");
         // check that some leading vowels are present
         foreach (vowel; '\u1110'..'\u115F')
             assert(leadingVowel[vowel]);
         assert(leadingVowel == unicode.hangulSyllableType.L);
     }

 private:
     alias ucmp = comparePropertyName;

     static bool findAny(string name)
     {
         import std.internal.unicode_tables : blocks, scripts, uniProps; // generated file
         return isPrettyPropertyName(name)
             || findSetName!(uniProps.tab)(name) || findSetName!(scripts.tab)(name)
             || (ucmp(name[0 .. 2],"In") == 0 && findSetName!(blocks.tab)(name[2..$]));
     }

     static auto loadAny(Set=CodepointSet, C)(in C[] name) pure
     {
         import std.conv : to;
         import std.internal.unicode_tables : blocks, scripts; // generated file
         Set set;
         immutable loaded = loadProperty(name, set) || loadUnicodeSet!(scripts.tab)(name, set)
             || (name.length > 2 && ucmp(name[0 .. 2],"In") == 0
                 && loadUnicodeSet!(blocks.tab)(name[2..$], set));
         if (loaded)
             return set;
         throw new Exception("No unicode set by name "~name.to!string()~" was found.");
     }

     // FIXME: re-disable once the compiler is fixed
     // Disabled to prevent the mistake of creating instances of this pseudo-struct.
     //@disable ~this();
 }

 @safe unittest
 {
     import std.internal.unicode_tables : blocks, uniProps; // generated file
     assert(unicode("InHebrew") == asSet(blocks.Hebrew));
     assert(unicode("separator") == (asSet(uniProps.Zs) | asSet(uniProps.Zl) | asSet(uniProps.Zp)));
     assert(unicode("In-Kharoshthi") == asSet(blocks.Kharoshthi));
 }

 enum EMPTY_CASE_TRIE = ushort.max;// from what gen_uni uses internally

 // control - '\r'
 enum controlSwitch = `
     case '\u0000':..case '\u0008':case '\u000E':..case '\u001F':case '\u007F':..
     case '\u0084':case '\u0086':..case '\u009F': case '\u0009':..case '\u000C': case '\u0085':
 `;
 // TODO: redo the most of hangul stuff algorithmically in case of Graphemes too
 // kill unrolled switches

 private static bool isRegionalIndicator(dchar ch) @safe pure @nogc nothrow
 {
     return ch >= '\U0001F1E6' && ch <= '\U0001F1FF';
 }

 template genericDecodeGrapheme(bool getValue)
 {
     alias graphemeExtend = graphemeExtendTrie;
     alias spacingMark = mcTrie;
     static if (getValue)
         alias Value = Grapheme;
     else
         alias Value = void;

     Value genericDecodeGrapheme(Input)(ref Input range)
     {
         import std.internal.unicode_tables : isHangL, isHangT, isHangV; // generated file
         enum GraphemeState {
             Start,
             CR,
             RI,
             L,
             V,
             LVT
         }
         static if (getValue)
             Grapheme grapheme;
         auto state = GraphemeState.Start;
         enum eat = q{
             static if (getValue)
                 grapheme ~= ch;
             range.popFront();
         };

         dchar ch;
         assert(!range.empty, "Attempting to decode grapheme from an empty " ~ Input.stringof);
         while (!range.empty)
         {
             ch = range.front;
             final switch (state) with(GraphemeState)
             {
             case Start:
                 mixin(eat);
                 if (ch == '\r')
                     state = CR;
                 else if (isRegionalIndicator(ch))
                     state = RI;
                 else if (isHangL(ch))
                     state = L;
                 else if (hangLV[ch] || isHangV(ch))
                     state = V;
                 else if (hangLVT[ch])
                     state = LVT;
                 else if (isHangT(ch))
                     state = LVT;
                 else
                 {
                     switch (ch)
                     {
                     mixin(controlSwitch);
                         goto L_End;
                     default:
                         goto L_End_Extend;
                     }
                 }
             break;
             case CR:
                 if (ch == '\n')
                     mixin(eat);
                 goto L_End_Extend;
             case RI:
                 if (isRegionalIndicator(ch))
                     mixin(eat);
                 else
                     goto L_End_Extend;
             break;
             case L:
                 if (isHangL(ch))
                     mixin(eat);
                 else if (isHangV(ch) || hangLV[ch])
                 {
                     state = V;
                     mixin(eat);
                 }
                 else if (hangLVT[ch])
                 {
                     state = LVT;
                     mixin(eat);
                 }
                 else
                     goto L_End_Extend;
             break;
             case V:
                 if (isHangV(ch))
                     mixin(eat);
                 else if (isHangT(ch))
                 {
                     state = LVT;
                     mixin(eat);
                 }
                 else
                     goto L_End_Extend;
             break;
             case LVT:
                 if (isHangT(ch))
                 {
                     mixin(eat);
                 }
                 else
                     goto L_End_Extend;
             break;
             }
         }
     L_End_Extend:
         while (!range.empty)
         {
             ch = range.front;
             // extend & spacing marks
             if (!graphemeExtend[ch] && !spacingMark[ch])
                 break;
             mixin(eat);
         }
     L_End:
         static if (getValue)
             return grapheme;
     }

 }

 public: // Public API continues

 /++
     Computes the length of grapheme cluster starting at $(D index).
     Both the resulting length and the $(D index) are measured
     in $(S_LINK Code unit, code units).

     Params:
         C = type that is implicitly convertible to $(D dchars)
         input = array of grapheme clusters
         index = starting index into $(D input[])

     Returns:
         length of grapheme cluster
 +/
 size_t graphemeStride(C)(in C[] input, size_t index)
 if (is(C : dchar))
 {
     auto src = input[index..$];
     auto n = src.length;
     genericDecodeGrapheme!(false)(src);
     return n - src.length;
 }

 ///
 @safe unittest
 {
     assert(graphemeStride("  ", 1) == 1);
     // A + combing ring above
     string city = "A\u030Arhus";
     size_t first = graphemeStride(city, 0);
     assert(first == 3); //\u030A has 2 UTF-8 code units
     assert(city[0 .. first] == "A\u030A");
     assert(city[first..$] == "rhus");
 }

 /++
     Reads one full grapheme cluster from an input range of dchar $(D inp).

     For examples see the $(LREF Grapheme) below.

     Note:
     This function modifies $(D inp) and thus $(D inp)
     must be an L-value.
 +/
 Grapheme decodeGrapheme(Input)(ref Input inp)
 if (isInputRange!Input && is(Unqual!(ElementType!Input) == dchar))
 {
     return genericDecodeGrapheme!true(inp);
 }

 @system unittest
 {
     import std.algorithm.comparison : equal;

     Grapheme gr;
     string s = " \u0020\u0308 ";
     gr = decodeGrapheme(s);
     assert(gr.length == 1 && gr[0] == ' ');
     gr = decodeGrapheme(s);
     assert(gr.length == 2 && equal(gr[0 .. 2], " \u0308"));
     s = "\u0300\u0308\u1100";
     assert(equal(decodeGrapheme(s)[], "\u0300\u0308"));
     assert(equal(decodeGrapheme(s)[], "\u1100"));
     s = "\u11A8\u0308\uAC01";
     assert(equal(decodeGrapheme(s)[], "\u11A8\u0308"));
     assert(equal(decodeGrapheme(s)[], "\uAC01"));
 }

 /++
     $(P Iterate a string by grapheme.)

     $(P Useful for doing string manipulation that needs to be aware
     of graphemes.)

     See_Also:
         $(LREF byCodePoint)
 +/
 auto byGrapheme(Range)(Range range)
 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar))
 {
     // TODO: Bidirectional access
     static struct Result(R)
     {
         private R _range;
         private Grapheme _front;

         bool empty() @property
         {
             return _front.length == 0;
         }

         Grapheme front() @property
         {
             return _front;
         }

         void popFront()
         {
             _front = _range.empty ? Grapheme.init : _range.decodeGrapheme();
         }

         static if (isForwardRange!R)
         {
             Result save() @property
             {
                 return Result(_range.save, _front);
             }
         }
     }

     auto result = Result!(Range)(range);
     result.popFront();
     return result;
 }

 ///
 @safe unittest
 {
     import std.algorithm.comparison : equal;
     import std.range.primitives : walkLength;
     import std.range : take, drop;
     auto text = "noe\u0308l"; // noël using e + combining diaeresis
     assert(text.walkLength == 5); // 5 code points

     auto gText = text.byGrapheme;
     assert(gText.walkLength == 4); // 4 graphemes

     assert(gText.take(3).equal("noe\u0308".byGrapheme));
     assert(gText.drop(3).equal("l".byGrapheme));
 }

 // For testing non-forward-range input ranges
 version (unittest)
 private static struct InputRangeString
 {
     private string s;

     bool empty() @property { return s.empty; }
     dchar front() @property { return s.front; }
     void popFront() { s.popFront(); }
 }

 @system unittest
 {
     import std.algorithm.comparison : equal;
     import std.array : array;
     import std.range : retro;
     import std.range.primitives : walkLength;
     assert("".byGrapheme.walkLength == 0);

     auto reverse = "le\u0308on";
     assert(reverse.walkLength == 5);

     auto gReverse = reverse.byGrapheme;
     assert(gReverse.walkLength == 4);

     foreach (text; AliasSeq!("noe\u0308l"c, "noe\u0308l"w, "noe\u0308l"d))
     {
         assert(text.walkLength == 5);
         static assert(isForwardRange!(typeof(text)));

         auto gText = text.byGrapheme;
         static assert(isForwardRange!(typeof(gText)));
         assert(gText.walkLength == 4);
         assert(gText.array.retro.equal(gReverse));
     }

     auto nonForwardRange = InputRangeString("noe\u0308l").byGrapheme;
     static assert(!isForwardRange!(typeof(nonForwardRange)));
     assert(nonForwardRange.walkLength == 4);
 }

 /++
     $(P Lazily transform a range of $(LREF Grapheme)s to a range of code points.)

     $(P Useful for converting the result to a string after doing operations
     on graphemes.)

     $(P Acts as the identity function when given a range of code points.)
 +/
 auto byCodePoint(Range)(Range range)
 if (isInputRange!Range && is(Unqual!(ElementType!Range) == Grapheme))
 {
     // TODO: Propagate bidirectional access
     static struct Result
     {
         private Range _range;
         private size_t i = 0;

         bool empty() @property
         {
             return _range.empty;
         }

         dchar front() @property
         {
             return _range.front[i];
         }

         void popFront()
         {
             ++i;

             if (i >= _range.front.length)
             {
                 _range.popFront();
                 i = 0;
             }
         }

         static if (isForwardRange!Range)
         {
             Result save() @property
             {
                 return Result(_range.save, i);
             }
         }
     }

     return Result(range);
 }

 /// Ditto
 Range byCodePoint(Range)(Range range)
 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar))
 {
     return range;
 }

 ///
 @safe unittest
 {
     import std.array : array;
     import std.conv : text;
     import std.range : retro;

     string s = "noe\u0308l"; // noël

     // reverse it and convert the result to a string
     string reverse = s.byGrapheme
         .array
         .retro
         .byCodePoint
         .text;

     assert(reverse == "le\u0308on"); // lëon
 }

 @system unittest
 {
     import std.algorithm.comparison : equal;
     import std.range.primitives : walkLength;
     assert("".byGrapheme.byCodePoint.equal(""));

     string text = "noe\u0308l";
     static assert(is(typeof(text.byCodePoint) == string));

     auto gText = InputRangeString(text).byGrapheme;
     static assert(!isForwardRange!(typeof(gText)));

     auto cpText = gText.byCodePoint;
     static assert(!isForwardRange!(typeof(cpText)));

     assert(cpText.walkLength == text.walkLength);
 }

 @trusted:

 /++
     $(P A structure designed to effectively pack $(CHARACTERS)
     of a $(CLUSTER).
     )

     $(P $(D Grapheme) has value semantics so 2 copies of a $(D Grapheme)
     always refer to distinct objects. In most actual scenarios a $(D Grapheme)
     fits on the stack and avoids memory allocation overhead for all but quite
     long clusters.
     )

     See_Also: $(LREF decodeGrapheme), $(LREF graphemeStride)
 +/
 @trusted struct Grapheme
 {
     import std.traits : isDynamicArray;

 public:
     /// Ctor
     this(C)(in C[] chars...)
         if (is(C : dchar))
     {
         this ~= chars;
     }

     ///ditto
     this(Input)(Input seq)
         if (!isDynamicArray!Input
             && isInputRange!Input && is(ElementType!Input : dchar))
     {
         this ~= seq;
     }

     /// Gets a $(CODEPOINT) at the given index in this cluster.
     dchar opIndex(size_t index) const pure nothrow @nogc
     {
         assert(index < length);
         return read24(isBig ? ptr_ : small_.ptr, index);
     }

     /++
         Writes a $(CODEPOINT) $(D ch) at given index in this cluster.

         Warning:
         Use of this facility may invalidate grapheme cluster,
         see also $(LREF Grapheme.valid).
     +/
     void opIndexAssign(dchar ch, size_t index) pure nothrow @nogc
     {
         assert(index < length);
         write24(isBig ? ptr_ : small_.ptr, ch, index);
     }

     ///
     @safe unittest
     {
         auto g = Grapheme("A\u0302");
         assert(g[0] == 'A');
         assert(g.valid);
         g[1] = '~'; // ASCII tilda is not a combining mark
         assert(g[1] == '~');
         assert(!g.valid);
     }

     /++
         Random-access range over Grapheme's $(CHARACTERS).

         Warning: Invalidates when this Grapheme leaves the scope,
         attempts to use it then would lead to memory corruption.
     +/
     @system SliceOverIndexed!Grapheme opSlice(size_t a, size_t b) pure nothrow @nogc
     {
         return sliceOverIndexed(a, b, &this);
     }

     /// ditto
     @system SliceOverIndexed!Grapheme opSlice() pure nothrow @nogc
     {
         return sliceOverIndexed(0, length, &this);
     }

     /// Grapheme cluster length in $(CODEPOINTS).
     @property size_t length() const pure nothrow @nogc
     {
         return isBig ? len_ : slen_ & 0x7F;
     }

     /++
         Append $(CHARACTER) $(D ch) to this grapheme.
         Warning:
         Use of this facility may invalidate grapheme cluster,
         see also $(D valid).

         See_Also: $(LREF Grapheme.valid)
     +/
     ref opOpAssign(string op)(dchar ch)
     {
         static if (op == "~")
         {
             if (!isBig)
             {
                 if (slen_ == small_cap)
                     convertToBig();// & fallthrough to "big" branch
                 else
                 {
                     write24(small_.ptr, ch, smallLength);
                     slen_++;
                     return this;
                 }
             }

             assert(isBig);
             if (len_ == cap_)
             {
                 import core.checkedint : addu, mulu;
                 bool overflow;
                 cap_ = addu(cap_, grow, overflow);
                 auto nelems = mulu(3, addu(cap_, 1, overflow), overflow);
                 if (overflow) assert(0);
                 ptr_ = cast(ubyte*) pureRealloc(ptr_, nelems);
                 if (ptr_ is null) onOutOfMemoryError();
             }
             write24(ptr_, ch, len_++);
             return this;
         }
         else
             static assert(false, "No operation "~op~" defined for Grapheme");
     }

     ///
     @system unittest
     {
         import std.algorithm.comparison : equal;
         auto g = Grapheme("A");
         assert(g.valid);
         g ~= '\u0301';
         assert(g[].equal("A\u0301"));
         assert(g.valid);
         g ~= "B";
         // not a valid grapheme cluster anymore
         assert(!g.valid);
         // still could be useful though
         assert(g[].equal("A\u0301B"));
     }

     /// Append all $(CHARACTERS) from the input range $(D inp) to this Grapheme.
     ref opOpAssign(string op, Input)(Input inp)
         if (isInputRange!Input && is(ElementType!Input : dchar))
     {
         static if (op == "~")
         {
             foreach (dchar ch; inp)
                 this ~= ch;
             return this;
         }
         else
             static assert(false, "No operation "~op~" defined for Grapheme");
     }

     /++
         True if this object contains valid extended grapheme cluster.
         Decoding primitives of this module always return a valid $(D Grapheme).

         Appending to and direct manipulation of grapheme's $(CHARACTERS) may
         render it no longer valid. Certain applications may chose to use
         Grapheme as a "small string" of any $(CODEPOINTS) and ignore this property
         entirely.
     +/
     @property bool valid()() /*const*/
     {
         auto r = this[];
         genericDecodeGrapheme!false(r);
         return r.length == 0;
     }

     this(this) pure @nogc nothrow
     {
         if (isBig)
         {// dup it
             import core.checkedint : addu, mulu;
             bool overflow;
             auto raw_cap = mulu(3, addu(cap_, 1, overflow), overflow);
             if (overflow) assert(0);

             auto p = cast(ubyte*) pureMalloc(raw_cap);
             if (p is null) onOutOfMemoryError();
             p[0 .. raw_cap] = ptr_[0 .. raw_cap];
             ptr_ = p;
         }
     }

     ~this() pure @nogc nothrow
     {
         if (isBig)
         {
             pureFree(ptr_);
         }
     }


 private:
     enum small_bytes = ((ubyte*).sizeof+3*size_t.sizeof-1);
     // "out of the blue" grow rate, needs testing
     // (though graphemes are typically small < 9)
     enum grow = 20;
     enum small_cap = small_bytes/3;
     enum small_flag = 0x80, small_mask = 0x7F;
     // 16 bytes in 32bits, should be enough for the majority of cases
     union
     {
         struct
         {
             ubyte* ptr_;
             size_t cap_;
             size_t len_;
             size_t padding_;
         }
         struct
         {
             ubyte[small_bytes] small_;
             ubyte slen_;
         }
     }

     void convertToBig() pure @nogc nothrow
     {
         static assert(grow.max / 3 - 1 >= grow);
         enum nbytes = 3 * (grow + 1);
         size_t k = smallLength;
         ubyte* p = cast(ubyte*) pureMalloc(nbytes);
         if (p is null) onOutOfMemoryError();
         for (int i=0; i<k; i++)
             write24(p, read24(small_.ptr, i), i);
         // now we can overwrite small array data
         ptr_ = p;
         len_ = slen_;
         assert(grow > len_);
         cap_ = grow;
         setBig();
     }

     void setBig() pure nothrow @nogc { slen_ |= small_flag; }

     @property size_t smallLength() const pure nothrow @nogc
     {
         return slen_ & small_mask;
     }
     @property ubyte isBig() const pure nothrow @nogc
     {
         return slen_ & small_flag;
     }
 }

 static assert(Grapheme.sizeof == size_t.sizeof*4);


 @system pure /*nothrow @nogc*/ unittest // TODO: string .front is GC and throw
 {
     import std.algorithm.comparison : equal;
     Grapheme[3] data = [Grapheme("Ю"), Grapheme("У"), Grapheme("З")];
     assert(byGrapheme("ЮУЗ").equal(data[]));
 }

 ///
 @system unittest
 {
     import std.algorithm.comparison : equal;
     import std.algorithm.iteration : filter;
     import std.range : isRandomAccessRange;

     string bold = "ku\u0308hn";

     // note that decodeGrapheme takes parameter by ref
     auto first = decodeGrapheme(bold);

     assert(first.length == 1);
     assert(first[0] == 'k');

     // the next grapheme is 2 characters long
     auto wideOne = decodeGrapheme(bold);
     // slicing a grapheme yields a random-access range of dchar
     assert(wideOne[].equal("u\u0308"));
     assert(wideOne.length == 2);
     static assert(isRandomAccessRange!(typeof(wideOne[])));

     // all of the usual range manipulation is possible
     assert(wideOne[].filter!isMark().equal("\u0308"));

     auto g = Grapheme("A");
     assert(g.valid);
     g ~= '\u0301';
     assert(g[].equal("A\u0301"));
     assert(g.valid);
     g ~= "B";
     // not a valid grapheme cluster anymore
     assert(!g.valid);
     // still could be useful though
     assert(g[].equal("A\u0301B"));
 }

 @safe unittest
 {
     auto g = Grapheme("A\u0302");
     assert(g[0] == 'A');
     assert(g.valid);
     g[1] = '~'; // ASCII tilda is not a combining mark
     assert(g[1] == '~');
     assert(!g.valid);
 }

 @system unittest
 {
     import std.algorithm.comparison : equal;
     import std.algorithm.iteration : map;
     import std.conv : text;
     import std.range : iota;

     // not valid clusters (but it just a test)
     auto g  = Grapheme('a', 'b', 'c', 'd', 'e');
     assert(g[0] == 'a');
     assert(g[1] == 'b');
     assert(g[2] == 'c');
     assert(g[3] == 'd');
     assert(g[4] == 'e');
     g[3] = 'Й';
     assert(g[2] == 'c');
     assert(g[3] == 'Й', text(g[3], " vs ", 'Й'));
     assert(g[4] == 'e');
     assert(!g.valid);

     g ~= 'ц';
     g ~= '~';
     assert(g[0] == 'a');
     assert(g[1] == 'b');
     assert(g[2] == 'c');
     assert(g[3] == 'Й');
     assert(g[4] == 'e');
     assert(g[5] == 'ц');
     assert(g[6] == '~');
     assert(!g.valid);

     Grapheme copy = g;
     copy[0] = 'X';
     copy[1] = '-';
     assert(g[0] == 'a' && copy[0] == 'X');
     assert(g[1] == 'b' && copy[1] == '-');
     assert(equal(g[2 .. g.length], copy[2 .. copy.length]));
     copy = Grapheme("АБВГДЕЁЖЗИКЛМ");
     assert(equal(copy[0 .. 8], "АБВГДЕЁЖ"), text(copy[0 .. 8]));
     copy ~= "xyz";
     assert(equal(copy[13 .. 15], "xy"), text(copy[13 .. 15]));
     assert(!copy.valid);

     Grapheme h;
     foreach (dchar v; iota(cast(int)'A', cast(int)'Z'+1).map!"cast(dchar)a"())
         h ~= v;
     assert(equal(h[], iota(cast(int)'A', cast(int)'Z'+1)));
 }

 /++
     $(P Does basic case-insensitive comparison of $(D r1) and $(D r2).
     This function uses simpler comparison rule thus achieving better performance
     than $(LREF icmp). However keep in mind the warning below.)

     Params:
         r1 = an input range of characters
         r2 = an input range of characters

     Returns:
         An $(D int) that is 0 if the strings match,
         &lt;0 if $(D r1) is lexicographically "less" than $(D r2),
         &gt;0 if $(D r1) is lexicographically "greater" than $(D r2)

     Warning:
     This function only handles 1:1 $(CODEPOINT) mapping
     and thus is not sufficient for certain alphabets
     like German, Greek and few others.

     See_Also:
         $(LREF icmp)
         $(REF cmp, std,algorithm,comparison)
 +/
 int sicmp(S1, S2)(S1 r1, S2 r2)
 if (isInputRange!S1 && isSomeChar!(ElementEncodingType!S1)
     && isInputRange!S2 && isSomeChar!(ElementEncodingType!S2))
 {
     import std.internal.unicode_tables : sTable = simpleCaseTable; // generated file
     import std.utf : byDchar;

     auto str1 = r1.byDchar;
     auto str2 = r2.byDchar;

     foreach (immutable lhs; str1)
     {
         if (str2.empty)
             return 1;
         immutable rhs = str2.front;
         str2.popFront();
         int diff = lhs - rhs;
         if (!diff)
             continue;
         size_t idx = simpleCaseTrie[lhs];
         size_t idx2 = simpleCaseTrie[rhs];
         // simpleCaseTrie is packed index table
         if (idx != EMPTY_CASE_TRIE)
         {
             if (idx2 != EMPTY_CASE_TRIE)
             {// both cased chars
                 // adjust idx --> start of bucket
                 idx = idx - sTable[idx].n;
                 idx2 = idx2 - sTable[idx2].n;
                 if (idx == idx2)// one bucket, equivalent chars
                     continue;
                 else//  not the same bucket
                     diff = sTable[idx].ch - sTable[idx2].ch;
             }
             else
                 diff = sTable[idx - sTable[idx].n].ch - rhs;
         }
         else if (idx2 != EMPTY_CASE_TRIE)
         {
             diff = lhs - sTable[idx2 - sTable[idx2].n].ch;
         }
         // one of chars is not cased at all
         return diff;
     }
     return str2.empty ? 0 : -1;
 }

 ///
 @safe @nogc pure nothrow unittest
 {
     assert(sicmp("Август", "авгусТ") == 0);
     // Greek also works as long as there is no 1:M mapping in sight
     assert(sicmp("ΌΎ", "όύ") == 0);
     // things like the following won't get matched as equal
     // Greek small letter iota with dialytika and tonos
     assert(sicmp("ΐ", "\u03B9\u0308\u0301") != 0);

     // while icmp has no problem with that
     assert(icmp("ΐ", "\u03B9\u0308\u0301") == 0);
     assert(icmp("ΌΎ", "όύ") == 0);
 }

 // overloads for the most common cases to reduce compile time
 @safe @nogc pure nothrow
 {
     int sicmp(const(char)[] str1, const(char)[] str2)
     { return sicmp!(const(char)[], const(char)[])(str1, str2); }
     int sicmp(const(wchar)[] str1, const(wchar)[] str2)
     { return sicmp!(const(wchar)[], const(wchar)[])(str1, str2); }
     int sicmp(const(dchar)[] str1, const(dchar)[] str2)
     { return sicmp!(const(dchar)[], const(dchar)[])(str1, str2); }
 }

 private int fullCasedCmp(Range)(dchar lhs, dchar rhs, ref Range rtail)
 {
     import std.algorithm.searching : skipOver;
     import std.internal.unicode_tables : fullCaseTable; // generated file
     alias fTable = fullCaseTable;
     size_t idx = fullCaseTrie[lhs];
     // fullCaseTrie is packed index table
     if (idx == EMPTY_CASE_TRIE)
         return lhs;
     immutable start = idx - fTable[idx].n;
     immutable end = fTable[idx].size + start;
     assert(fTable[start].entry_len == 1);
     for (idx=start; idx<end; idx++)
     {
         auto entryLen = fTable[idx].entry_len;
         if (entryLen == 1)
         {
             if (fTable[idx].seq[0] == rhs)
             {
                 return 0;
             }
         }
         else
         {// OK it's a long chunk, like 'ss' for German
             dstring seq = fTable[idx].seq[0 .. entryLen];
             if (rhs == seq[0]
                 && rtail.skipOver(seq[1..$]))
             {
                 // note that this path modifies rtail
                 // iff we managed to get there
                 return 0;
             }
         }
     }
     return fTable[start].seq[0]; // new remapped character for accurate diffs
 }

 /++
     Does case insensitive comparison of `r1` and `r2`.
     Follows the rules of full case-folding mapping.
     This includes matching as equal german ß with "ss" and
     other 1:M $(CODEPOINT) mappings unlike $(LREF sicmp).
     The cost of `icmp` being pedantically correct is
     slightly worse performance.

     Params:
         r1 = a forward range of characters
         r2 = a forward range of characters

     Returns:
         An $(D int) that is 0 if the strings match,
         &lt;0 if $(D str1) is lexicographically "less" than $(D str2),
         &gt;0 if $(D str1) is lexicographically "greater" than $(D str2)

     See_Also:
         $(LREF sicmp)
         $(REF cmp, std,algorithm,comparison)
 +/
 int icmp(S1, S2)(S1 r1, S2 r2)
 if (isForwardRange!S1 && isSomeChar!(ElementEncodingType!S1)
     && isForwardRange!S2 && isSomeChar!(ElementEncodingType!S2))
 {
     import std.utf : byDchar;

     auto str1 = r1.byDchar;
     auto str2 = r2.byDchar;

     for (;;)
     {
         if (str1.empty)
             return str2.empty ? 0 : -1;
         immutable lhs = str1.front;
         if (str2.empty)
             return 1;
         immutable rhs = str2.front;
         str1.popFront();
         str2.popFront();
         if (!(lhs - rhs))
             continue;
         // first try to match lhs to <rhs,right-tail> sequence
         immutable cmpLR = fullCasedCmp(lhs, rhs, str2);
         if (!cmpLR)
             continue;
         // then rhs to <lhs,left-tail> sequence
         immutable cmpRL = fullCasedCmp(rhs, lhs, str1);
         if (!cmpRL)
             continue;
         // cmpXX contain remapped codepoints
         // to obtain stable ordering of icmp
         return cmpLR - cmpRL;
     }
 }

 ///
 @safe @nogc pure nothrow unittest
 {
     assert(icmp("Rußland", "Russland") == 0);
     assert(icmp("ᾩ -> \u1F70\u03B9", "\u1F61\u03B9 -> ᾲ") == 0);
 }

 /**
  * By using $(REF byUTF, std,utf) and its aliases, GC allocations via auto-decoding
  * and thrown exceptions can be avoided, making `icmp` `@safe @nogc nothrow pure`.
  */
 @safe @nogc nothrow pure unittest
 {
     import std.utf : byDchar;

     assert(icmp("Rußland".byDchar, "Russland".byDchar) == 0);
     assert(icmp("ᾩ -> \u1F70\u03B9".byDchar, "\u1F61\u03B9 -> ᾲ".byDchar) == 0);
 }

 // test different character types
 @safe unittest
 {
     assert(icmp("Rußland", "Russland") == 0);
     assert(icmp("Rußland"w, "Russland") == 0);
     assert(icmp("Rußland", "Russland"w) == 0);
     assert(icmp("Rußland"w, "Russland"w) == 0);
     assert(icmp("Rußland"d, "Russland"w) == 0);
     assert(icmp("Rußland"w, "Russland"d) == 0);
 }

 // overloads for the most common cases to reduce compile time
 @safe @nogc pure nothrow
 {
     int icmp(const(char)[] str1, const(char)[] str2)
     { return icmp!(const(char)[], const(char)[])(str1, str2); }
     int icmp(const(wchar)[] str1, const(wchar)[] str2)
     { return icmp!(const(wchar)[], const(wchar)[])(str1, str2); }
     int icmp(const(dchar)[] str1, const(dchar)[] str2)
     { return icmp!(const(dchar)[], const(dchar)[])(str1, str2); }
 }

 @safe unittest
 {
     import std.algorithm.sorting : sort;
     import std.conv : to;
     import std.exception : assertCTFEable;
     assertCTFEable!(
     {
     foreach (cfunc; AliasSeq!(icmp, sicmp))
     {
         foreach (S1; AliasSeq!(string, wstring, dstring))
         foreach (S2; AliasSeq!(string, wstring, dstring))
         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
             assert(cfunc("".to!S1(), "".to!S2()) == 0);
             assert(cfunc("A".to!S1(), "".to!S2()) > 0);
             assert(cfunc("".to!S1(), "0".to!S2()) < 0);
             assert(cfunc("abc".to!S1(), "abc".to!S2()) == 0);
             assert(cfunc("abcd".to!S1(), "abc".to!S2()) > 0);
             assert(cfunc("abc".to!S1(), "abcd".to!S2()) < 0);
             assert(cfunc("Abc".to!S1(), "aBc".to!S2()) == 0);
             assert(cfunc("авГуст".to!S1(), "АВгУСТ".to!S2()) == 0);
             // Check example:
             assert(cfunc("Август".to!S1(), "авгусТ".to!S2()) == 0);
             assert(cfunc("ΌΎ".to!S1(), "όύ".to!S2()) == 0);
         }();
         // check that the order is properly agnostic to the case
         auto strs = [ "Apple", "ORANGE",  "orAcle", "amp", "banana"];
         sort!((a,b) => cfunc(a,b) < 0)(strs);
         assert(strs == ["amp", "Apple",  "banana", "orAcle", "ORANGE"]);
     }
     assert(icmp("ßb", "ssa") > 0);
     // Check example:
     assert(icmp("Russland", "Rußland") == 0);
     assert(icmp("ᾩ -> \u1F70\u03B9", "\u1F61\u03B9 -> ᾲ") == 0);
     assert(icmp("ΐ"w, "\u03B9\u0308\u0301") == 0);
     assert(sicmp("ΐ", "\u03B9\u0308\u0301") != 0);
     //bugzilla 11057
     assert( icmp("K", "L") < 0 );
     });
 }

 // issue 17372
 @safe pure unittest
 {
     import std.algorithm.iteration : joiner, map;
     import std.algorithm.sorting : sort;
     import std.array : array;
     auto a = [["foo", "bar"], ["baz"]].map!(line => line.joiner(" ")).array.sort!((a, b) => icmp(a, b) < 0);
 }

 // This is package for the moment to be used as a support tool for std.regex
 // It needs a better API
 /*
     Return a range of all $(CODEPOINTS) that casefold to
     and from this $(D ch).
 */
 package auto simpleCaseFoldings(dchar ch) @safe
 {
     import std.internal.unicode_tables : simpleCaseTable; // generated file
     alias sTable = simpleCaseTable;
     static struct Range
     {
     @safe pure nothrow:
         uint idx; //if == uint.max, then read c.
         union
         {
             dchar c; // == 0 - empty range
             uint len;
         }
         @property bool isSmall() const { return idx == uint.max; }

         this(dchar ch)
         {
             idx = uint.max;
             c = ch;
         }

         this(uint start, uint size)
         {
             idx = start;
             len = size;
         }

         @property dchar front() const
         {
             assert(!empty);
             if (isSmall)
             {
                 return c;
             }
             auto ch = sTable[idx].ch;
             return ch;
         }

         @property bool empty() const
         {
             if (isSmall)
             {
                 return c == 0;
             }
             return len == 0;
         }

         @property size_t length() const
         {
             if (isSmall)
             {
                 return c == 0 ? 0 : 1;
             }
             return len;
         }

         void popFront()
         {
             if (isSmall)
                 c = 0;
             else
             {
                 idx++;
                 len--;
             }
         }
     }
     immutable idx = simpleCaseTrie[ch];
     if (idx == EMPTY_CASE_TRIE)
         return Range(ch);
     auto entry = sTable[idx];
     immutable start = idx - entry.n;
     return Range(start, entry.size);
 }

 @system unittest
 {
     import std.algorithm.comparison : equal;
     import std.algorithm.searching : canFind;
     import std.array : array;
     import std.exception : assertCTFEable;
     assertCTFEable!((){
         auto r = simpleCaseFoldings('Э').array;
         assert(r.length == 2);
         assert(r.canFind('э') && r.canFind('Э'));
         auto sr = simpleCaseFoldings('~');
         assert(sr.equal("~"));
         //A with ring above - casefolds to the same bucket as Angstrom sign
         sr = simpleCaseFoldings('Å');
         assert(sr.length == 3);
         assert(sr.canFind('å') && sr.canFind('Å') && sr.canFind('\u212B'));
     });
 }

 /++
     $(P Returns the $(S_LINK Combining class, combining class) of $(D ch).)
 +/
 ubyte combiningClass(dchar ch) @safe pure nothrow @nogc
 {
     return combiningClassTrie[ch];
 }

 ///
 @safe unittest
 {
     // shorten the code
     alias CC = combiningClass;

     // combining tilda
     assert(CC('\u0303') == 230);
     // combining ring below
     assert(CC('\u0325') == 220);
     // the simple consequence is that  "tilda" should be
     // placed after a "ring below" in a sequence
 }

 @safe pure nothrow @nogc unittest
 {
     foreach (ch; 0 .. 0x80)
         assert(combiningClass(ch) == 0);
     assert(combiningClass('\u05BD') == 22);
     assert(combiningClass('\u0300') == 230);
     assert(combiningClass('\u0317') == 220);
     assert(combiningClass('\u1939') == 222);
 }

 /// Unicode character decomposition type.
 enum UnicodeDecomposition {
     /// Canonical decomposition. The result is canonically equivalent sequence.
     Canonical,
     /**
          Compatibility decomposition. The result is compatibility equivalent sequence.
          Note: Compatibility decomposition is a $(B lossy) conversion,
          typically suitable only for fuzzy matching and internal processing.
     */
     Compatibility
 }

 /**
     Shorthand aliases for character decomposition type, passed as a
     template parameter to $(LREF decompose).
 */
 enum {
     Canonical = UnicodeDecomposition.Canonical,
     Compatibility = UnicodeDecomposition.Compatibility
 }

 /++
     Try to canonically compose 2 $(CHARACTERS).
     Returns the composed $(CHARACTER) if they do compose and dchar.init otherwise.

     The assumption is that $(D first) comes before $(D second) in the original text,
     usually meaning that the first is a starter.

     Note: Hangul syllables are not covered by this function.
     See $(D composeJamo) below.
 +/
 public dchar compose(dchar first, dchar second) pure nothrow @safe
 {
     import std.algorithm.iteration : map;
     import std.internal.unicode_comp : compositionTable, composeCntShift, composeIdxMask;
     import std.range : assumeSorted;
     immutable packed = compositionJumpTrie[first];
     if (packed == ushort.max)
         return dchar.init;
     // unpack offset and length
     immutable idx = packed & composeIdxMask, cnt = packed >> composeCntShift;
     // TODO: optimize this micro binary search (no more then 4-5 steps)
     auto r = compositionTable[idx .. idx+cnt].map!"a.rhs"().assumeSorted();
     immutable target = r.lowerBound(second).length;
     if (target == cnt)
         return dchar.init;
     immutable entry = compositionTable[idx+target];
     if (entry.rhs != second)
         return dchar.init;
     return entry.composed;
 }

 ///
 @safe unittest
 {
     assert(compose('A','\u0308') == '\u00C4');
     assert(compose('A', 'B') == dchar.init);
     assert(compose('C', '\u0301') == '\u0106');
     // note that the starter is the first one
     // thus the following doesn't compose
     assert(compose('\u0308', 'A') == dchar.init);
 }

 /++
     Returns a full $(S_LINK Canonical decomposition, Canonical)
     (by default) or $(S_LINK Compatibility decomposition, Compatibility)
     decomposition of $(CHARACTER) $(D ch).
     If no decomposition is available returns a $(LREF Grapheme)
     with the $(D ch) itself.

     Note:
     This function also decomposes hangul syllables
     as prescribed by the standard.

     See_Also: $(LREF decomposeHangul) for a restricted version
     that takes into account only hangul syllables  but
     no other decompositions.
 +/
 public Grapheme decompose(UnicodeDecomposition decompType=Canonical)(dchar ch) @safe
 {
     import std.algorithm.searching : until;
     import std.internal.unicode_decomp : decompCompatTable, decompCanonTable;
     static if (decompType == Canonical)
     {
         alias table = decompCanonTable;
         alias mapping = canonMappingTrie;
     }
     else static if (decompType == Compatibility)
     {
         alias table = decompCompatTable;
         alias mapping = compatMappingTrie;
     }
     immutable idx = mapping[ch];
     if (!idx) // not found, check hangul arithmetic decomposition
         return decomposeHangul(ch);
     auto decomp = table[idx..$].until(0);
     return Grapheme(decomp);
 }

 ///
 @system unittest
 {
     import std.algorithm.comparison : equal;

     assert(compose('A','\u0308') == '\u00C4');
     assert(compose('A', 'B') == dchar.init);
     assert(compose('C', '\u0301') == '\u0106');
     // note that the starter is the first one
     // thus the following doesn't compose
     assert(compose('\u0308', 'A') == dchar.init);

     assert(decompose('Ĉ')[].equal("C\u0302"));
     assert(decompose('D')[].equal("D"));
     assert(decompose('\uD4DC')[].equal("\u1111\u1171\u11B7"));
     assert(decompose!Compatibility('¹')[].equal("1"));
 }

 //----------------------------------------------------------------------------
 // Hangul specific composition/decomposition
 enum jamoSBase = 0xAC00;
 enum jamoLBase = 0x1100;
 enum jamoVBase = 0x1161;
 enum jamoTBase = 0x11A7;
 enum jamoLCount = 19, jamoVCount = 21, jamoTCount = 28;
 enum jamoNCount = jamoVCount * jamoTCount;
 enum jamoSCount = jamoLCount * jamoNCount;

 // Tests if $(D ch) is a Hangul leading consonant jamo.
 bool isJamoL(dchar ch) pure nothrow @nogc @safe
 {
     // first cmp rejects ~ 1M code points above leading jamo range
     return ch < jamoLBase+jamoLCount && ch >= jamoLBase;
 }

 // Tests if $(D ch) is a Hangul vowel jamo.
 bool isJamoT(dchar ch) pure nothrow @nogc @safe
 {
     // first cmp rejects ~ 1M code points above trailing jamo range
     // Note: ch == jamoTBase doesn't indicate trailing jamo (TIndex must be > 0)
     return ch < jamoTBase+jamoTCount && ch > jamoTBase;
 }

 // Tests if $(D ch) is a Hangul trailnig consonant jamo.
 bool isJamoV(dchar ch) pure nothrow @nogc @safe
 {
     // first cmp rejects ~ 1M code points above vowel range
     return  ch < jamoVBase+jamoVCount && ch >= jamoVBase;
 }

 int hangulSyllableIndex(dchar ch) pure nothrow @nogc @safe
 {
     int idxS = cast(int) ch - jamoSBase;
     return idxS >= 0 && idxS < jamoSCount ? idxS : -1;
 }

 // internal helper: compose hangul syllables leaving dchar.init in holes
 void hangulRecompose(dchar[] seq) pure nothrow @nogc @safe
 {
     for (size_t idx = 0; idx + 1 < seq.length; )
     {
         if (isJamoL(seq[idx]) && isJamoV(seq[idx+1]))
         {
             immutable int indexL = seq[idx] - jamoLBase;
             immutable int indexV = seq[idx+1] - jamoVBase;
             immutable int indexLV = indexL * jamoNCount + indexV * jamoTCount;
             if (idx + 2 < seq.length && isJamoT(seq[idx+2]))
             {
                 seq[idx] = jamoSBase + indexLV + seq[idx+2] - jamoTBase;
                 seq[idx+1] = dchar.init;
                 seq[idx+2] = dchar.init;
                 idx += 3;
             }
             else
             {
                 seq[idx] = jamoSBase + indexLV;
                 seq[idx+1] = dchar.init;
                 idx += 2;
             }
         }
         else
             idx++;
     }
 }

 //----------------------------------------------------------------------------
 public:

 /**
     Decomposes a Hangul syllable. If $(D ch) is not a composed syllable
     then this function returns $(LREF Grapheme) containing only $(D ch) as is.
 */
 Grapheme decomposeHangul(dchar ch) @safe
 {
     immutable idxS = cast(int) ch - jamoSBase;
     if (idxS < 0 || idxS >= jamoSCount) return Grapheme(ch);
     immutable idxL = idxS / jamoNCount;
     immutable idxV = (idxS % jamoNCount) / jamoTCount;
     immutable idxT = idxS % jamoTCount;

     immutable partL = jamoLBase + idxL;
     immutable partV = jamoVBase + idxV;
     if (idxT > 0) // there is a trailling consonant (T); <L,V,T> decomposition
         return Grapheme(partL, partV, jamoTBase + idxT);
     else // <L, V> decomposition
         return Grapheme(partL, partV);
 }

 ///
 @system unittest
 {
     import std.algorithm.comparison : equal;
     assert(decomposeHangul('\uD4DB')[].equal("\u1111\u1171\u11B6"));
 }

 /++
     Try to compose hangul syllable out of a leading consonant ($(D lead)),
     a $(D vowel) and optional $(D trailing) consonant jamos.

     On success returns the composed LV or LVT hangul syllable.

     If any of $(D lead) and $(D vowel) are not a valid hangul jamo
     of the respective $(CHARACTER) class returns dchar.init.
 +/
 dchar composeJamo(dchar lead, dchar vowel, dchar trailing=dchar.init) pure nothrow @nogc @safe
 {
     if (!isJamoL(lead))
         return dchar.init;
     immutable indexL = lead - jamoLBase;
     if (!isJamoV(vowel))
         return dchar.init;
     immutable indexV = vowel - jamoVBase;
     immutable indexLV = indexL * jamoNCount + indexV * jamoTCount;
     immutable dchar syllable = jamoSBase + indexLV;
     return isJamoT(trailing) ? syllable + (trailing - jamoTBase) : syllable;
 }

 ///
 @safe unittest
 {
     assert(composeJamo('\u1111', '\u1171', '\u11B6') == '\uD4DB');
     // leaving out T-vowel, or passing any codepoint
     // that is not trailing consonant composes an LV-syllable
     assert(composeJamo('\u1111', '\u1171') == '\uD4CC');
     assert(composeJamo('\u1111', '\u1171', ' ') == '\uD4CC');
     assert(composeJamo('\u1111', 'A') == dchar.init);
     assert(composeJamo('A', '\u1171') == dchar.init);
 }

 @system unittest
 {
     import std.algorithm.comparison : equal;
     import std.conv : text;

     static void testDecomp(UnicodeDecomposition T)(dchar ch, string r)
     {
         Grapheme g = decompose!T(ch);
         assert(equal(g[], r), text(g[], " vs ", r));
     }
     testDecomp!Canonical('\u1FF4', "\u03C9\u0301\u0345");
     testDecomp!Canonical('\uF907', "\u9F9C");
     testDecomp!Compatibility('\u33FF', "\u0067\u0061\u006C");
     testDecomp!Compatibility('\uA7F9', "\u0153");

     // check examples
     assert(decomposeHangul('\uD4DB')[].equal("\u1111\u1171\u11B6"));
     assert(composeJamo('\u1111', '\u1171', '\u11B6') == '\uD4DB');
     assert(composeJamo('\u1111', '\u1171') == '\uD4CC'); // leave out T-vowel
     assert(composeJamo('\u1111', '\u1171', ' ') == '\uD4CC');
     assert(composeJamo('\u1111', 'A') == dchar.init);
     assert(composeJamo('A', '\u1171') == dchar.init);
 }

 /**
     Enumeration type for normalization forms,
     passed as template parameter for functions like $(LREF normalize).
 */
 enum NormalizationForm {
     NFC,
     NFD,
     NFKC,
     NFKD
 }


 enum {
     /**
         Shorthand aliases from values indicating normalization forms.
     */
     NFC = NormalizationForm.NFC,
     ///ditto
     NFD = NormalizationForm.NFD,
     ///ditto
     NFKC = NormalizationForm.NFKC,
     ///ditto
     NFKD = NormalizationForm.NFKD
 }

 /++
     Returns $(D input) string normalized to the chosen form.
     Form C is used by default.

     For more information on normalization forms see
     the $(S_LINK Normalization, normalization section).

     Note:
     In cases where the string in question is already normalized,
     it is returned unmodified and no memory allocation happens.
 +/
 inout(C)[] normalize(NormalizationForm norm=NFC, C)(inout(C)[] input)
 {
     import std.algorithm.mutation : SwapStrategy;
     import std.algorithm.sorting : sort;
     import std.array : appender;
     import std.range : zip;

     auto anchors = splitNormalized!norm(input);
     if (anchors[0] == input.length && anchors[1] == input.length)
         return input;
     dchar[] decomposed;
     decomposed.reserve(31);
     ubyte[] ccc;
     ccc.reserve(31);
     auto app = appender!(C[])();
     do
     {
         app.put(input[0 .. anchors[0]]);
         foreach (dchar ch; input[anchors[0]..anchors[1]])
             static if (norm == NFD || norm == NFC)
             {
                 foreach (dchar c; decompose!Canonical(ch)[])
                     decomposed ~= c;
             }
             else // NFKD & NFKC
             {
                 foreach (dchar c; decompose!Compatibility(ch)[])
                     decomposed ~= c;
             }
         ccc.length = decomposed.length;
         size_t firstNonStable = 0;
         ubyte lastClazz = 0;

         foreach (idx, dchar ch; decomposed)
         {
             immutable clazz = combiningClass(ch);
             ccc[idx] = clazz;
             if (clazz == 0 && lastClazz != 0)
             {
                 // found a stable code point after unstable ones
                 sort!("a[0] < b[0]", SwapStrategy.stable)
                     (zip(ccc[firstNonStable .. idx], decomposed[firstNonStable .. idx]));
                 firstNonStable = decomposed.length;
             }
             else if (clazz != 0 && lastClazz == 0)
             {
                 // found first unstable code point after stable ones
                 firstNonStable = idx;
             }
             lastClazz = clazz;
         }
         sort!("a[0] < b[0]", SwapStrategy.stable)
             (zip(ccc[firstNonStable..$], decomposed[firstNonStable..$]));
         static if (norm == NFC || norm == NFKC)
         {
             import std.algorithm.searching : countUntil;
             auto first = countUntil(ccc, 0);
             if (first >= 0) // no starters?? no recomposition
             {
                 for (;;)
                 {
                     immutable second = recompose(first, decomposed, ccc);
                     if (second == decomposed.length)
                         break;
                     first = second;
                 }
                 // 2nd pass for hangul syllables
                 hangulRecompose(decomposed);
             }
         }
         static if (norm == NFD || norm == NFKD)
             app.put(decomposed);
         else
         {
             import std.algorithm.mutation : remove;
             auto clean = remove!("a == dchar.init", SwapStrategy.stable)(decomposed);
             app.put(decomposed[0 .. clean.length]);
         }
         // reset variables
         decomposed.length = 0;
         decomposed.assumeSafeAppend();
         ccc.length = 0;
         ccc.assumeSafeAppend();
         input = input[anchors[1]..$];
         // and move on
         anchors = splitNormalized!norm(input);
     }while (anchors[0] != input.length);
     app.put(input[0 .. anchors[0]]);
     return cast(inout(C)[])app.data;
 }

 ///
 @safe unittest
 {
     // any encoding works
     wstring greet = "Hello world";
     assert(normalize(greet) is greet); // the same exact slice

     // An example of a character with all 4 forms being different:
     // Greek upsilon with acute and hook symbol (code point 0x03D3)
     assert(normalize!NFC("ϓ") == "\u03D3");
     assert(normalize!NFD("ϓ") == "\u03D2\u0301");
     assert(normalize!NFKC("ϓ") == "\u038E");
     assert(normalize!NFKD("ϓ") == "\u03A5\u0301");
 }

 @safe unittest
 {
     import std.conv : text;

     assert(normalize!NFD("abc\uF904def") == "abc\u6ED1def", text(normalize!NFD("abc\uF904def")));
     assert(normalize!NFKD("2¹⁰") == "210", normalize!NFKD("2¹⁰"));
     assert(normalize!NFD("Äffin") == "A\u0308ffin");

     // check example

     // any encoding works
     wstring greet = "Hello world";
     assert(normalize(greet) is greet); // the same exact slice

     // An example of a character with all 4 forms being different:
     // Greek upsilon with acute and hook symbol (code point 0x03D3)
     assert(normalize!NFC("ϓ") == "\u03D3");
     assert(normalize!NFD("ϓ") == "\u03D2\u0301");
     assert(normalize!NFKC("ϓ") == "\u038E");
     assert(normalize!NFKD("ϓ") == "\u03A5\u0301");
 }

 // canonically recompose given slice of code points, works in-place and mutates data
 private size_t recompose(size_t start, dchar[] input, ubyte[] ccc) pure nothrow @safe
 {
     assert(input.length == ccc.length);
     int accumCC = -1;// so that it's out of 0 .. 255 range
     // writefln("recomposing %( %04x %)", input);
     // first one is always a starter thus we start at i == 1
     size_t i = start+1;
     for (; ; )
     {
         if (i == input.length)
             break;
         immutable curCC = ccc[i];
         // In any character sequence beginning with a starter S
         // a character C is blocked from S if and only if there
         // is some character B between S and C, and either B
         // is a starter or it has the same or higher combining class as C.
         //------------------------
         // Applying to our case:
         // S is input[0]
         // accumCC is the maximum CCC of characters between C and S,
         //     as ccc are sorted
         // C is input[i]

         if (curCC > accumCC)
         {
             immutable comp = compose(input[start], input[i]);
             if (comp != dchar.init)
             {
                 input[start] = comp;
                 input[i] = dchar.init;// put a sentinel
                 // current was merged so its CCC shouldn't affect
                 // composing with the next one
             }
             else
             {
                 // if it was a starter then accumCC is now 0, end of loop
                 accumCC = curCC;
                 if (accumCC == 0)
                     break;
             }
         }
         else
         {
             // ditto here
             accumCC = curCC;
             if (accumCC == 0)
                 break;
         }
         i++;
     }
     return i;
 }

 // returns tuple of 2 indexes that delimit:
 // normalized text, piece that needs normalization and
 // the rest of input starting with stable code point
 private auto splitNormalized(NormalizationForm norm, C)(const(C)[] input)
 {
     import std.typecons : tuple;
     ubyte lastCC = 0;

     foreach (idx, dchar ch; input)
     {
         static if (norm == NFC)
             if (ch < 0x0300)
             {
                 lastCC = 0;
                 continue;
             }
         immutable ubyte CC = combiningClass(ch);
         if (lastCC > CC && CC != 0)
         {
             return seekStable!norm(idx, input);
         }

         if (notAllowedIn!norm(ch))
         {
            return seekStable!norm(idx, input);
         }
         lastCC = CC;
     }
     return tuple(input.length, input.length);
 }

 private auto seekStable(NormalizationForm norm, C)(size_t idx, in C[] input)
 {
     import std.typecons : tuple;
     import std.utf : codeLength;

     auto br = input[0 .. idx];
     size_t region_start = 0;// default
     for (;;)
     {
         if (br.empty)// start is 0
             break;
         dchar ch = br.back;
         if (combiningClass(ch) == 0 && allowedIn!norm(ch))
         {
             region_start = br.length - codeLength!C(ch);
             break;
         }
         br.popFront();
     }
     ///@@@BUG@@@ can't use find: " find is a nested function and can't be used..."
     size_t region_end=input.length;// end is $ by default
     foreach (i, dchar ch; input[idx..$])
     {
         if (combiningClass(ch) == 0 && allowedIn!norm(ch))
         {
             region_end = i+idx;
             break;
         }
     }
     // writeln("Region to normalize: ", input[region_start .. region_end]);
     return tuple(region_start, region_end);
 }

 /**
     Tests if dchar $(D ch) is always allowed (Quick_Check=YES) in normalization
     form $(D norm).
 */
 public bool allowedIn(NormalizationForm norm)(dchar ch)
 {
     return !notAllowedIn!norm(ch);
 }

 ///
 @safe unittest
 {
     // e.g. Cyrillic is always allowed, so is ASCII
     assert(allowedIn!NFC('я'));
     assert(allowedIn!NFD('я'));
     assert(allowedIn!NFKC('я'));
     assert(allowedIn!NFKD('я'));
     assert(allowedIn!NFC('Z'));
 }

 // not user friendly name but more direct
 private bool notAllowedIn(NormalizationForm norm)(dchar ch)
 {
     static if (norm == NFC)
         alias qcTrie = nfcQCTrie;
     else static if (norm == NFD)
         alias qcTrie = nfdQCTrie;
     else static if (norm == NFKC)
         alias qcTrie = nfkcQCTrie;
     else static if (norm == NFKD)
         alias qcTrie = nfkdQCTrie;
     else
         static assert("Unknown normalization form "~norm);
     return qcTrie[ch];
 }

 @safe unittest
 {
     assert(allowedIn!NFC('я'));
     assert(allowedIn!NFD('я'));
     assert(allowedIn!NFKC('я'));
     assert(allowedIn!NFKD('я'));
     assert(allowedIn!NFC('Z'));
 }

 }

 version (std_uni_bootstrap)
 {
     // old version used for bootstrapping of gen_uni.d that generates
     // up to date optimal versions of all of isXXX functions
     @safe pure nothrow @nogc public bool isWhite(dchar c)
     {
         import std.ascii : isWhite;
         return isWhite(c) ||
                c == lineSep || c == paraSep ||
                c == '\u0085' || c == '\u00A0' || c == '\u1680' || c == '\u180E' ||
                (c >= '\u2000' && c <= '\u200A') ||
                c == '\u202F' || c == '\u205F' || c == '\u3000';
     }
 }
 else
 {

 // trusted -> avoid bounds check
 @trusted pure nothrow @nogc private
 {
     import std.internal.unicode_tables; // : toLowerTable, toTitleTable, toUpperTable; // generated file

     // hide template instances behind functions (Bugzilla 13232)
     ushort toLowerIndex(dchar c) { return toLowerIndexTrie[c]; }
     ushort toLowerSimpleIndex(dchar c) { return toLowerSimpleIndexTrie[c]; }
     dchar toLowerTab(size_t idx) { return toLowerTable[idx]; }

     ushort toTitleIndex(dchar c) { return toTitleIndexTrie[c]; }
     ushort toTitleSimpleIndex(dchar c) { return toTitleSimpleIndexTrie[c]; }
     dchar toTitleTab(size_t idx) { return toTitleTable[idx]; }

     ushort toUpperIndex(dchar c) { return toUpperIndexTrie[c]; }
     ushort toUpperSimpleIndex(dchar c) { return toUpperSimpleIndexTrie[c]; }
     dchar toUpperTab(size_t idx) { return toUpperTable[idx]; }
 }

 public:

 /++
     Whether or not $(D c) is a Unicode whitespace $(CHARACTER).
     (general Unicode category: Part of C0(tab, vertical tab, form feed,
     carriage return, and linefeed characters), Zs, Zl, Zp, and NEL(U+0085))
 +/
 @safe pure nothrow @nogc
 public bool isWhite(dchar c)
 {
     import std.internal.unicode_tables : isWhiteGen; // generated file
     return isWhiteGen(c); // call pregenerated binary search
 }

 /++
     Return whether $(D c) is a Unicode lowercase $(CHARACTER).
 +/
 @safe pure nothrow @nogc
 bool isLower(dchar c)
 {
     import std.ascii : isLower, isASCII;
     if (isASCII(c))
         return isLower(c);
     return lowerCaseTrie[c];
 }

 @safe unittest
 {
     import std.ascii : isLower;
     foreach (v; 0 .. 0x80)
         assert(isLower(v) == .isLower(v));
     assert(.isLower('я'));
     assert(.isLower('й'));
     assert(!.isLower('Ж'));
     // Greek HETA
     assert(!.isLower('\u0370'));
     assert(.isLower('\u0371'));
     assert(!.isLower('\u039C')); // capital MU
     assert(.isLower('\u03B2')); // beta
     // from extended Greek
     assert(!.isLower('\u1F18'));
     assert(.isLower('\u1F00'));
     foreach (v; unicode.lowerCase.byCodepoint)
         assert(.isLower(v) && !isUpper(v));
 }


 /++
     Return whether $(D c) is a Unicode uppercase $(CHARACTER).
 +/
 @safe pure nothrow @nogc
 bool isUpper(dchar c)
 {
     import std.ascii : isUpper, isASCII;
     if (isASCII(c))
         return isUpper(c);
     return upperCaseTrie[c];
 }

 @safe unittest
 {
     import std.ascii : isLower;
     foreach (v; 0 .. 0x80)
         assert(isLower(v) == .isLower(v));
     assert(!isUpper('й'));
     assert(isUpper('Ж'));
     // Greek HETA
     assert(isUpper('\u0370'));
     assert(!isUpper('\u0371'));
     assert(isUpper('\u039C')); // capital MU
     assert(!isUpper('\u03B2')); // beta
     // from extended Greek
     assert(!isUpper('\u1F00'));
     assert(isUpper('\u1F18'));
     foreach (v; unicode.upperCase.byCodepoint)
         assert(isUpper(v) && !.isLower(v));
 }


 //TODO: Hidden for now, needs better API.
 //Other transforms could use better API as well, but this one is a new primitive.
 @safe pure nothrow @nogc
 private dchar toTitlecase(dchar c)
 {
     // optimize ASCII case
     if (c < 0xAA)
     {
         if (c < 'a')
             return c;
         if (c <= 'z')
             return c - 32;
         return c;
     }
     size_t idx = toTitleSimpleIndex(c);
     if (idx != ushort.max)
     {
         return toTitleTab(idx);
     }
     return c;
 }

 private alias UpperTriple = AliasSeq!(toUpperIndex, MAX_SIMPLE_UPPER, toUpperTab);
 private alias LowerTriple = AliasSeq!(toLowerIndex, MAX_SIMPLE_LOWER, toLowerTab);

 // generic toUpper/toLower on whole string, creates new or returns as is
 private S toCase(alias indexFn, uint maxIdx, alias tableFn, alias asciiConvert, S)(S s) @trusted pure
 if (isSomeString!S)
 {
     import std.array : appender;
     import std.ascii : isASCII;

     foreach (i, dchar cOuter; s)
     {
         ushort idx = indexFn(cOuter);
         if (idx == ushort.max)
             continue;
         auto result = appender!S(s[0 .. i]);
         result.reserve(s.length);
         foreach (dchar c; s[i .. $])
         {
             if (c.isASCII)
             {
                 result.put(asciiConvert(c));
             }
             else
             {
                 idx = indexFn(c);
                 if (idx == ushort.max)
                     result.put(c);
                 else if (idx < maxIdx)
                 {
                     c = tableFn(idx);
                     result.put(c);
                 }
                 else
                 {
                     auto val = tableFn(idx);
                     // unpack length + codepoint
                     immutable uint len = val >> 24;
                     result.put(cast(dchar)(val & 0xFF_FFFF));
                     foreach (j; idx+1 .. idx+len)
                         result.put(tableFn(j));
                 }
             }
         }
         return result.data;
     }
     return s;
 }

 @safe unittest //12428
 {
     import std.array : replicate;
     auto s = "abcdefghij".replicate(300);
     s = s[0 .. 10];

     toUpper(s);

     assert(s == "abcdefghij");
 }


 // generic toUpper/toLower on whole range, returns range
 private auto toCaser(alias indexFn, uint maxIdx, alias tableFn, alias asciiConvert, Range)(Range str)
     // Accept range of dchar's
 if (isInputRange!Range &&
     isSomeChar!(ElementEncodingType!Range) &&
     ElementEncodingType!Range.sizeof == dchar.sizeof)
 {
     static struct ToCaserImpl
     {
         @property bool empty()
         {
             return !nLeft && r.empty;
         }

         @property auto front()
         {
             import std.ascii : isASCII;

             if (!nLeft)
             {
                 dchar c = r.front;
                 if (c.isASCII)
                 {
                     buf[0] = asciiConvert(c);
                     nLeft = 1;
                 }
                 else
                 {
                     const idx = indexFn(c);
                     if (idx == ushort.max)
                     {
                         buf[0] = c;
                         nLeft = 1;
                     }
                     else if (idx < maxIdx)
                     {
                         buf[0] = tableFn(idx);
                         nLeft = 1;
                     }
                     else
                     {
                         immutable val = tableFn(idx);
                         // unpack length + codepoint
                         nLeft = val >> 24;
                         if (nLeft == 0)
                             nLeft = 1;
                         assert(nLeft <= buf.length);
                         buf[nLeft - 1] = cast(dchar)(val & 0xFF_FFFF);
                         foreach (j; 1 .. nLeft)
                             buf[nLeft - j - 1] = tableFn(idx + j);
                     }
                 }
             }
             return buf[nLeft - 1];
         }

         void popFront()
         {
             if (!nLeft)
                 front;
             assert(nLeft);
             --nLeft;
             if (!nLeft)
                 r.popFront();
         }

         static if (isForwardRange!Range)
         {
             @property auto save()
             {
                 auto ret = this;
                 ret.r = r.save;
                 return ret;
             }
         }

       private:
         Range r;
         uint nLeft;
         dchar[3] buf = void;
     }

     return ToCaserImpl(str);
 }

 /*********************
  * Convert input range or string to upper or lower case.
  *
  * Does not allocate memory.
  * Characters in UTF-8 or UTF-16 format that cannot be decoded
  * are treated as $(REF replacementDchar, std,utf).
  *
  * Params:
  *      str = string or range of characters
  *
  * Returns:
  *      an InputRange of dchars
  *
  * See_Also:
  *      $(LREF toUpper), $(LREF toLower)
  */

 auto asLowerCase(Range)(Range str)
 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
     !isConvertibleToString!Range)
 {
     static if (ElementEncodingType!Range.sizeof < dchar.sizeof)
     {
         import std.utf : byDchar;

         // Decode first
         return asLowerCase(str.byDchar);
     }
     else
     {
         static import std.ascii;
         return toCaser!(LowerTriple, std.ascii.toLower)(str);
     }
 }

 /// ditto
 auto asUpperCase(Range)(Range str)
 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
     !isConvertibleToString!Range)
 {
     static if (ElementEncodingType!Range.sizeof < dchar.sizeof)
     {
         import std.utf : byDchar;

         // Decode first
         return asUpperCase(str.byDchar);
     }
     else
     {
         static import std.ascii;
         return toCaser!(UpperTriple, std.ascii.toUpper)(str);
     }
 }

 ///
 @safe pure unittest
 {
     import std.algorithm.comparison : equal;

     assert("hEllo".asUpperCase.equal("HELLO"));
 }

 // explicitly undocumented
 auto asLowerCase(Range)(auto ref Range str)
 if (isConvertibleToString!Range)
 {
     import std.traits : StringTypeOf;
     return asLowerCase!(StringTypeOf!Range)(str);
 }

 // explicitly undocumented
 auto asUpperCase(Range)(auto ref Range str)
 if (isConvertibleToString!Range)
 {
     import std.traits : StringTypeOf;
     return asUpperCase!(StringTypeOf!Range)(str);
 }

 @safe unittest
 {
     assert(testAliasedString!asLowerCase("hEllo"));
     assert(testAliasedString!asUpperCase("hEllo"));
 }

 @safe unittest
 {
     import std.array : array;

     auto a = "HELLo".asLowerCase;
     auto savea = a.save;
     auto s = a.array;
     assert(s == "hello");
     s = savea.array;
     assert(s == "hello");

     string[] lower = ["123", "abcфеж", "\u0131\u023f\u03c9", "i\u0307\u1Fe2"];
     string[] upper = ["123", "ABCФЕЖ", "I\u2c7e\u2126", "\u0130\u03A5\u0308\u0300"];

     foreach (i, slwr; lower)
     {
         import std.utf : byChar;

         auto sx = slwr.asUpperCase.byChar.array;
         assert(sx == toUpper(slwr));
         auto sy = upper[i].asLowerCase.byChar.array;
         assert(sy == toLower(upper[i]));
     }

     // Not necessary to call r.front
     for (auto r = lower[3].asUpperCase; !r.empty; r.popFront())
     {
     }

     import std.algorithm.comparison : equal;

     "HELLo"w.asLowerCase.equal("hello"d);
     "HELLo"w.asUpperCase.equal("HELLO"d);
     "HELLo"d.asLowerCase.equal("hello"d);
     "HELLo"d.asUpperCase.equal("HELLO"d);

     import std.utf : byChar;
     assert(toLower("\u1Fe2") == asLowerCase("\u1Fe2").byChar.array);
 }

 // generic capitalizer on whole range, returns range
 private auto toCapitalizer(alias indexFnUpper, uint maxIdxUpper, alias tableFnUpper,
                            Range)(Range str)
     // Accept range of dchar's
 if (isInputRange!Range &&
     isSomeChar!(ElementEncodingType!Range) &&
     ElementEncodingType!Range.sizeof == dchar.sizeof)
 {
     static struct ToCapitalizerImpl
     {
         @property bool empty()
         {
             return lower ? lwr.empty : !nLeft && r.empty;
         }

         @property auto front()
         {
             if (lower)
                 return lwr.front;

             if (!nLeft)
             {
                 immutable dchar c = r.front;
                 const idx = indexFnUpper(c);
                 if (idx == ushort.max)
                 {
                     buf[0] = c;
                     nLeft = 1;
                 }
                 else if (idx < maxIdxUpper)
                 {
                     buf[0] = tableFnUpper(idx);
                     nLeft = 1;
                 }
                 else
                 {
                     immutable val = tableFnUpper(idx);
                     // unpack length + codepoint
                     nLeft = val >> 24;
                     if (nLeft == 0)
                         nLeft = 1;
                     assert(nLeft <= buf.length);
                     buf[nLeft - 1] = cast(dchar)(val & 0xFF_FFFF);
                     foreach (j; 1 .. nLeft)
                         buf[nLeft - j - 1] = tableFnUpper(idx + j);
                 }
             }
             return buf[nLeft - 1];
         }

         void popFront()
         {
             if (lower)
                 lwr.popFront();
             else
             {
                 if (!nLeft)
                     front;
                 assert(nLeft);
                 --nLeft;
                 if (!nLeft)
                 {
                     r.popFront();
                     lwr = r.asLowerCase();
                     lower = true;
                 }
             }
         }

         static if (isForwardRange!Range)
         {
             @property auto save()
             {
                 auto ret = this;
                 ret.r = r.save;
                 ret.lwr = lwr.save;
                 return ret;
             }
         }

       private:
         Range r;
         typeof(r.asLowerCase) lwr; // range representing the lower case rest of string
         bool lower = false;     // false for first character, true for rest of string
         dchar[3] buf = void;
         uint nLeft = 0;
     }

     return ToCapitalizerImpl(str);
 }

 /*********************
  * Capitalize input range or string, meaning convert the first
  * character to upper case and subsequent characters to lower case.
  *
  * Does not allocate memory.
  * Characters in UTF-8 or UTF-16 format that cannot be decoded
  * are treated as $(REF replacementDchar, std,utf).
  *
  * Params:
  *      str = string or range of characters
  *
  * Returns:
  *      an InputRange of dchars
  *
  * See_Also:
  *      $(LREF toUpper), $(LREF toLower)
  *      $(LREF asUpperCase), $(LREF asLowerCase)
  */

 auto asCapitalized(Range)(Range str)
 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
     !isConvertibleToString!Range)
 {
     static if (ElementEncodingType!Range.sizeof < dchar.sizeof)
     {
         import std.utf : byDchar;

         // Decode first
         return toCapitalizer!UpperTriple(str.byDchar);
     }
     else
     {
         return toCapitalizer!UpperTriple(str);
     }
 }

 ///
 @safe pure unittest
 {
     import std.algorithm.comparison : equal;

     assert("hEllo".asCapitalized.equal("Hello"));
 }

 auto asCapitalized(Range)(auto ref Range str)
 if (isConvertibleToString!Range)
 {
     import std.traits : StringTypeOf;
     return asCapitalized!(StringTypeOf!Range)(str);
 }

 @safe unittest
 {
     assert(testAliasedString!asCapitalized("hEllo"));
 }

 @safe pure nothrow @nogc unittest
 {
     auto r = "hEllo".asCapitalized();
     assert(r.front == 'H');
 }

 @safe unittest
 {
     import std.array : array;

     auto a = "hELLo".asCapitalized;
     auto savea = a.save;
     auto s = a.array;
     assert(s == "Hello");
     s = savea.array;
     assert(s == "Hello");

     string[2][] cases =
     [
         ["", ""],
         ["h", "H"],
         ["H", "H"],
         ["3", "3"],
         ["123", "123"],
         ["h123A", "H123a"],
         ["феж", "Феж"],
         ["\u1Fe2", "\u03a5\u0308\u0300"],
     ];

     foreach (i; 0 .. cases.length)
     {
         import std.utf : byChar;

         auto r = cases[i][0].asCapitalized.byChar.array;
         auto result = cases[i][1];
         assert(r == result);
     }

     // Don't call r.front
     for (auto r = "\u1Fe2".asCapitalized; !r.empty; r.popFront())
     {
     }

     import std.algorithm.comparison : equal;

     "HELLo"w.asCapitalized.equal("Hello"d);
     "hElLO"w.asCapitalized.equal("Hello"d);
     "hello"d.asCapitalized.equal("Hello"d);
     "HELLO"d.asCapitalized.equal("Hello"d);

     import std.utf : byChar;
     assert(asCapitalized("\u0130").byChar.array == asUpperCase("\u0130").byChar.array);
 }

 // TODO: helper, I wish std.utf was more flexible (and stright)
 private size_t encodeTo(scope char[] buf, size_t idx, dchar c) @trusted pure nothrow @nogc
 {
     if (c <= 0x7F)
     {
         buf[idx] = cast(char) c;
         idx++;
     }
     else if (c <= 0x7FF)
     {
         buf[idx] = cast(char)(0xC0 | (c >> 6));
         buf[idx+1] = cast(char)(0x80 | (c & 0x3F));
         idx += 2;
     }
     else if (c <= 0xFFFF)
     {
         buf[idx] = cast(char)(0xE0 | (c >> 12));
         buf[idx+1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
         buf[idx+2] = cast(char)(0x80 | (c & 0x3F));
         idx += 3;
     }
     else if (c <= 0x10FFFF)
     {
         buf[idx] = cast(char)(0xF0 | (c >> 18));
         buf[idx+1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
         buf[idx+2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
         buf[idx+3] = cast(char)(0x80 | (c & 0x3F));
         idx += 4;
     }
     else
         assert(0);
     return idx;
 }

 @safe unittest
 {
     char[] s = "abcd".dup;
     size_t i = 0;
     i = encodeTo(s, i, 'X');
     assert(s == "Xbcd");

     i = encodeTo(s, i, cast(dchar)'\u00A9');
     assert(s == "X\xC2\xA9d");
 }

 // TODO: helper, I wish std.utf was more flexible (and stright)
 private size_t encodeTo(scope wchar[] buf, size_t idx, dchar c) @trusted pure
 {
     import std.utf : UTFException;
     if (c <= 0xFFFF)
     {
         if (0xD800 <= c && c <= 0xDFFF)
             throw (new UTFException("Encoding an isolated surrogate code point in UTF-16")).setSequence(c);
         buf[idx] = cast(wchar) c;
         idx++;
     }
     else if (c <= 0x10FFFF)
     {
         buf[idx] = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
         buf[idx+1] = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00);
         idx += 2;
     }
     else
         assert(0);
     return idx;
 }

 private size_t encodeTo(scope dchar[] buf, size_t idx, dchar c) @trusted pure nothrow @nogc
 {
     buf[idx] = c;
     idx++;
     return idx;
 }

 private void toCaseInPlace(alias indexFn, uint maxIdx, alias tableFn, C)(ref C[] s) @trusted pure
 if (is(C == char) || is(C == wchar)  || is(C == dchar))
 {
     import std.utf : decode, codeLength;
     size_t curIdx = 0;
     size_t destIdx = 0;
     alias slowToCase = toCaseInPlaceAlloc!(indexFn, maxIdx, tableFn);
     size_t lastUnchanged = 0;
     // in-buffer move of bytes to a new start index
     // the trick is that it may not need to copy at all
     static size_t moveTo(C[] str, size_t dest, size_t from, size_t to)
     {
         // Interestingly we may just bump pointer for a while
         // then have to copy if a re-cased char was smaller the original
         // later we may regain pace with char that got bigger
         // In the end it sometimes flip-flops between the 2 cases below
         if (dest == from)
             return to;
         // got to copy
         foreach (C c; str[from .. to])
             str[dest++] = c;
         return dest;
     }
     while (curIdx != s.length)
     {
         size_t startIdx = curIdx;
         immutable ch = decode(s, curIdx);
         // TODO: special case for ASCII
         immutable caseIndex = indexFn(ch);
         if (caseIndex == ushort.max) // unchanged, skip over
         {
             continue;
         }
         else if (caseIndex < maxIdx)  // 1:1 codepoint mapping
         {
             // previous cased chars had the same length as uncased ones
             // thus can just adjust pointer
             destIdx = moveTo(s, destIdx, lastUnchanged, startIdx);
             lastUnchanged = curIdx;
             immutable cased = tableFn(caseIndex);
             immutable casedLen = codeLength!C(cased);
             if (casedLen + destIdx > curIdx) // no place to fit cased char
             {
                 // switch to slow codepath, where we allocate
                 return slowToCase(s, startIdx, destIdx);
             }
             else
             {
                 destIdx = encodeTo(s, destIdx, cased);
             }
         }
         else  // 1:m codepoint mapping, slow codepath
         {
             destIdx = moveTo(s, destIdx, lastUnchanged, startIdx);
             lastUnchanged = curIdx;
             return slowToCase(s, startIdx, destIdx);
         }
         assert(destIdx <= curIdx);
     }
     if (lastUnchanged != s.length)
     {
         destIdx = moveTo(s, destIdx, lastUnchanged, s.length);
     }
     s = s[0 .. destIdx];
 }

 // helper to precalculate size of case-converted string
 private template toCaseLength(alias indexFn, uint maxIdx, alias tableFn)
 {
     size_t toCaseLength(C)(in C[] str)
     {
         import std.utf : decode, codeLength;
         size_t codeLen = 0;
         size_t lastNonTrivial = 0;
         size_t curIdx = 0;
         while (curIdx != str.length)
         {
             immutable startIdx = curIdx;
             immutable ch = decode(str, curIdx);
             immutable ushort caseIndex = indexFn(ch);
             if (caseIndex == ushort.max)
                 continue;
             else if (caseIndex < maxIdx)
             {
                 codeLen += startIdx - lastNonTrivial;
                 lastNonTrivial = curIdx;
                 immutable cased = tableFn(caseIndex);
                 codeLen += codeLength!C(cased);
             }
             else
             {
                 codeLen += startIdx - lastNonTrivial;
                 lastNonTrivial = curIdx;
                 immutable val = tableFn(caseIndex);
                 immutable len = val >> 24;
                 immutable dchar cased = val & 0xFF_FFFF;
                 codeLen += codeLength!C(cased);
                 foreach (j; caseIndex+1 .. caseIndex+len)
                     codeLen += codeLength!C(tableFn(j));
             }
         }
         if (lastNonTrivial != str.length)
             codeLen += str.length - lastNonTrivial;
         return codeLen;
     }
 }

 @safe unittest
 {
     alias toLowerLength = toCaseLength!(LowerTriple);
     assert(toLowerLength("abcd") == 4);
     assert(toLowerLength("аБВгд456") == 10+3);
 }

 // slower code path that preallocates and then copies
 // case-converted stuf to the new string
 private template toCaseInPlaceAlloc(alias indexFn, uint maxIdx, alias tableFn)
 {
     void toCaseInPlaceAlloc(C)(ref C[] s, size_t curIdx,
         size_t destIdx) @trusted pure
         if (is(C == char) || is(C == wchar) || is(C == dchar))
     {
         import std.utf : decode;
         alias caseLength = toCaseLength!(indexFn, maxIdx, tableFn);
         auto trueLength = destIdx + caseLength(s[curIdx..$]);
         C[] ns = new C[trueLength];
         ns[0 .. destIdx] = s[0 .. destIdx];
         size_t lastUnchanged = curIdx;
         while (curIdx != s.length)
         {
             immutable startIdx = curIdx; // start of current codepoint
             immutable ch = decode(s, curIdx);
             immutable caseIndex = indexFn(ch);
             if (caseIndex == ushort.max) // skip over
             {
                 continue;
             }
             else if (caseIndex < maxIdx)  // 1:1 codepoint mapping
             {
                 immutable cased = tableFn(caseIndex);
                 auto toCopy = startIdx - lastUnchanged;
                 ns[destIdx .. destIdx+toCopy] = s[lastUnchanged .. startIdx];
                 lastUnchanged = curIdx;
                 destIdx += toCopy;
                 destIdx = encodeTo(ns, destIdx, cased);
             }
             else  // 1:m codepoint mapping, slow codepath
             {
                 auto toCopy = startIdx - lastUnchanged;
                 ns[destIdx .. destIdx+toCopy] = s[lastUnchanged .. startIdx];
                 lastUnchanged = curIdx;
                 destIdx += toCopy;
                 auto val = tableFn(caseIndex);
                 // unpack length + codepoint
                 immutable uint len = val >> 24;
                 destIdx = encodeTo(ns, destIdx, cast(dchar)(val & 0xFF_FFFF));
                 foreach (j; caseIndex+1 .. caseIndex+len)
                     destIdx = encodeTo(ns, destIdx, tableFn(j));
             }
         }
         if (lastUnchanged != s.length)
         {
             auto toCopy = s.length - lastUnchanged;
             ns[destIdx .. destIdx+toCopy] = s[lastUnchanged..$];
             destIdx += toCopy;
         }
         assert(ns.length == destIdx);
         s = ns;
     }
 }

 /++
     Converts $(D s) to lowercase (by performing Unicode lowercase mapping) in place.
     For a few characters string length may increase after the transformation,
     in such a case the function reallocates exactly once.
     If $(D s) does not have any uppercase characters, then $(D s) is unaltered.
 +/
 void toLowerInPlace(C)(ref C[] s) @trusted pure
 if (is(C == char) || is(C == wchar) || is(C == dchar))
 {
     toCaseInPlace!(LowerTriple)(s);
 }
 // overloads for the most common cases to reduce compile time
 @safe pure /*TODO nothrow*/
 {
     void toLowerInPlace(ref char[] s)
     { toLowerInPlace!char(s); }
     void toLowerInPlace(ref wchar[] s)
     { toLowerInPlace!wchar(s); }
     void toLowerInPlace(ref dchar[] s)
     { toLowerInPlace!dchar(s); }
 }

 /++
     Converts $(D s) to uppercase  (by performing Unicode uppercase mapping) in place.
     For a few characters string length may increase after the transformation,
     in such a case the function reallocates exactly once.
     If $(D s) does not have any lowercase characters, then $(D s) is unaltered.
 +/
 void toUpperInPlace(C)(ref C[] s) @trusted pure
 if (is(C == char) || is(C == wchar) || is(C == dchar))
 {
     toCaseInPlace!(UpperTriple)(s);
 }
 // overloads for the most common cases to reduce compile time/code size
 @safe pure /*TODO nothrow*/
 {
     void toUpperInPlace(ref char[] s)
     { toUpperInPlace!char(s); }
     void toUpperInPlace(ref wchar[] s)
     { toUpperInPlace!wchar(s); }
     void toUpperInPlace(ref dchar[] s)
     { toUpperInPlace!dchar(s); }
 }

 /++
     If $(D c) is a Unicode uppercase $(CHARACTER), then its lowercase equivalent
     is returned. Otherwise $(D c) is returned.

     Warning: certain alphabets like German and Greek have no 1:1
     upper-lower mapping. Use overload of toLower which takes full string instead.
 +/
 @safe pure nothrow @nogc
 dchar toLower(dchar c)
 {
      // optimize ASCII case
     if (c < 0xAA)
     {
         if (c < 'A')
             return c;
         if (c <= 'Z')
             return c + 32;
         return c;
     }
     size_t idx = toLowerSimpleIndex(c);
     if (idx != ushort.max)
     {
         return toLowerTab(idx);
     }
     return c;
 }

 /++
     Returns a string which is identical to $(D s) except that all of its
     characters are converted to lowercase (by preforming Unicode lowercase mapping).
     If none of $(D s) characters were affected, then $(D s) itself is returned.
 +/
 S toLower(S)(S s) @trusted pure
 if (isSomeString!S)
 {
     static import std.ascii;
     return toCase!(LowerTriple, std.ascii.toLower)(s);
 }
 // overloads for the most common cases to reduce compile time
 @safe pure /*TODO nothrow*/
 {
     string toLower(string s)
     { return toLower!string(s); }
     wstring toLower(wstring s)
     { return toLower!wstring(s); }
     dstring toLower(dstring s)
     { return toLower!dstring(s); }

     @safe unittest
     {
         // https://issues.dlang.org/show_bug.cgi?id=16663

         static struct String
         {
             string data;
             alias data this;
         }

         void foo()
         {
             auto u = toLower(String(""));
         }
     }
 }


 @system unittest //@@@BUG std.format is not @safe
 {
     static import std.ascii;
     import std.format : format;
     foreach (ch; 0 .. 0x80)
         assert(std.ascii.toLower(ch) == toLower(ch));
     assert(toLower('Я') == 'я');
     assert(toLower('Δ') == 'δ');
     foreach (ch; unicode.upperCase.byCodepoint)
     {
         dchar low = ch.toLower();
         assert(low == ch || isLower(low), format("%s -> %s", ch, low));
     }
     assert(toLower("АЯ") == "ая");

     assert("\u1E9E".toLower == "\u00df");
     assert("\u00df".toUpper == "SS");
 }

 //bugzilla 9629
 @safe unittest
 {
     wchar[] test = "hello þ world"w.dup;
     auto piece = test[6 .. 7];
     toUpperInPlace(piece);
     assert(test == "hello Þ world");
 }


 @safe unittest
 {
     import std.algorithm.comparison : cmp;
     string s1 = "FoL";
     string s2 = toLower(s1);
     assert(cmp(s2, "fol") == 0, s2);
     assert(s2 != s1);

     char[] s3 = s1.dup;
     toLowerInPlace(s3);
     assert(s3 == s2);

     s1 = "A\u0100B\u0101d";
     s2 = toLower(s1);
     s3 = s1.dup;
     assert(cmp(s2, "a\u0101b\u0101d") == 0);
     assert(s2 !is s1);
     toLowerInPlace(s3);
     assert(s3 == s2);

     s1 = "A\u0460B\u0461d";
     s2 = toLower(s1);
     s3 = s1.dup;
     assert(cmp(s2, "a\u0461b\u0461d") == 0);
     assert(s2 !is s1);
     toLowerInPlace(s3);
     assert(s3 == s2);

     s1 = "\u0130";
     s2 = toLower(s1);
     s3 = s1.dup;
     assert(s2 == "i\u0307");
     assert(s2 !is s1);
     toLowerInPlace(s3);
     assert(s3 == s2);

     // Test on wchar and dchar strings.
     assert(toLower("Some String"w) == "some string"w);
     assert(toLower("Some String"d) == "some string"d);

     // bugzilla 12455
     dchar c = 'İ'; // '\U0130' LATIN CAPITAL LETTER I WITH DOT ABOVE
     assert(isUpper(c));
     assert(toLower(c) == 'i');
     // extend on 12455 reprot - check simple-case toUpper too
     c = '\u1f87';
     assert(isLower(c));
     assert(toUpper(c) == '\u1F8F');
 }


 /++
     If $(D c) is a Unicode lowercase $(CHARACTER), then its uppercase equivalent
     is returned. Otherwise $(D c) is returned.

     Warning:
     Certain alphabets like German and Greek have no 1:1
     upper-lower mapping. Use overload of toUpper which takes full string instead.

     toUpper can be used as an argument to $(REF map, std,algorithm,iteration)
     to produce an algorithm that can convert a range of characters to upper case
     without allocating memory.
     A string can then be produced by using $(REF copy, std,algorithm,mutation)
     to send it to an $(REF appender, std,array).
 +/
 @safe pure nothrow @nogc
 dchar toUpper(dchar c)
 {
     // optimize ASCII case
     if (c < 0xAA)
     {
         if (c < 'a')
             return c;
         if (c <= 'z')
             return c - 32;
         return c;
     }
     size_t idx = toUpperSimpleIndex(c);
     if (idx != ushort.max)
     {
         return toUpperTab(idx);
     }
     return c;
 }

 ///
 @system unittest
 {
     import std.algorithm.iteration : map;
     import std.algorithm.mutation : copy;
     import std.array : appender;

     auto abuf = appender!(char[])();
     "hello".map!toUpper.copy(&abuf);
     assert(abuf.data == "HELLO");
 }

 @safe unittest
 {
     static import std.ascii;
     import std.format : format;
     foreach (ch; 0 .. 0x80)
         assert(std.ascii.toUpper(ch) == toUpper(ch));
     assert(toUpper('я') == 'Я');
     assert(toUpper('δ') == 'Δ');
     auto title = unicode.Titlecase_Letter;
     foreach (ch; unicode.lowerCase.byCodepoint)
     {
         dchar up = ch.toUpper();
         assert(up == ch || isUpper(up) || title[up],
             format("%x -> %x", ch, up));
     }
 }

 /++
     Returns a string which is identical to $(D s) except that all of its
     characters are converted to uppercase (by preforming Unicode uppercase mapping).
     If none of $(D s) characters were affected, then $(D s) itself is returned.
 +/
 S toUpper(S)(S s) @trusted pure
 if (isSomeString!S)
 {
     static import std.ascii;
     return toCase!(UpperTriple, std.ascii.toUpper)(s);
 }
 // overloads for the most common cases to reduce compile time
 @safe pure /*TODO nothrow*/
 {
     string toUpper(string s)
     { return toUpper!string(s); }
     wstring toUpper(wstring s)
     { return toUpper!wstring(s); }
     dstring toUpper(dstring s)
     { return toUpper!dstring(s); }

     @safe unittest
     {
         // https://issues.dlang.org/show_bug.cgi?id=16663

         static struct String
         {
             string data;
             alias data this;
         }

         void foo()
         {
             auto u = toUpper(String(""));
         }
     }
 }

 @safe unittest
 {
     import std.algorithm.comparison : cmp;

     string s1 = "FoL";
     string s2;
     char[] s3;

     s2 = toUpper(s1);
     s3 = s1.dup; toUpperInPlace(s3);
     assert(s3 == s2, s3);
     assert(cmp(s2, "FOL") == 0);
     assert(s2 !is s1);

     s1 = "a\u0100B\u0101d";
     s2 = toUpper(s1);
     s3 = s1.dup; toUpperInPlace(s3);
     assert(s3 == s2);
     assert(cmp(s2, "A\u0100B\u0100D") == 0);
     assert(s2 !is s1);

     s1 = "a\u0460B\u0461d";
     s2 = toUpper(s1);
     s3 = s1.dup; toUpperInPlace(s3);
     assert(s3 == s2);
     assert(cmp(s2, "A\u0460B\u0460D") == 0);
     assert(s2 !is s1);
 }

 @system unittest
 {
     static void doTest(C)(const(C)[] s, const(C)[] trueUp, const(C)[] trueLow)
     {
         import std.format : format;
         string diff = "src: %( %x %)\nres: %( %x %)\ntru: %( %x %)";
         auto low = s.toLower() , up = s.toUpper();
         auto lowInp = s.dup, upInp = s.dup;
         lowInp.toLowerInPlace();
         upInp.toUpperInPlace();
         assert(low == trueLow, format(diff, low, trueLow));
         assert(up == trueUp,  format(diff, up, trueUp));
         assert(lowInp == trueLow,
             format(diff, cast(ubyte[]) s, cast(ubyte[]) lowInp, cast(ubyte[]) trueLow));
         assert(upInp == trueUp,
             format(diff, cast(ubyte[]) s, cast(ubyte[]) upInp, cast(ubyte[]) trueUp));
     }
     foreach (S; AliasSeq!(dstring, wstring, string))
     {

         S easy = "123";
         S good = "abCФеж";
         S awful = "\u0131\u023f\u2126";
         S wicked = "\u0130\u1FE2";
         auto options = [easy, good, awful, wicked];
         S[] lower = ["123", "abcфеж", "\u0131\u023f\u03c9", "i\u0307\u1Fe2"];
         S[] upper = ["123", "ABCФЕЖ", "I\u2c7e\u2126", "\u0130\u03A5\u0308\u0300"];

         foreach (val; AliasSeq!(easy, good))
         {
             auto e = val.dup;
             auto g = e;
             e.toUpperInPlace();
             assert(e is g);
             e.toLowerInPlace();
             assert(e is g);
         }
         foreach (i, v; options)
         {
             doTest(v, upper[i], lower[i]);
         }

         // a few combinatorial runs
         foreach (i; 0 .. options.length)
         foreach (j; i .. options.length)
         foreach (k; j .. options.length)
         {
             auto sample = options[i] ~ options[j] ~ options[k];
             auto sample2 = options[k] ~ options[j] ~ options[i];
             doTest(sample, upper[i] ~ upper[j] ~ upper[k],
                 lower[i] ~ lower[j] ~ lower[k]);
             doTest(sample2, upper[k] ~ upper[j] ~ upper[i],
                 lower[k] ~ lower[j] ~ lower[i]);
         }
     }
 }


 /++
     Returns whether $(D c) is a Unicode alphabetic $(CHARACTER)
     (general Unicode category: Alphabetic).
 +/
 @safe pure nothrow @nogc
 bool isAlpha(dchar c)
 {
     // optimization
     if (c < 0xAA)
     {
         size_t x = c - 'A';
         if (x <= 'Z' - 'A')
             return true;
         else
         {
             x = c - 'a';
             if (x <= 'z'-'a')
                 return true;
         }
         return false;
     }

     return alphaTrie[c];
 }

 @safe unittest
 {
     auto alpha = unicode("Alphabetic");
     foreach (ch; alpha.byCodepoint)
         assert(isAlpha(ch));
     foreach (ch; 0 .. 0x4000)
         assert((ch in alpha) == isAlpha(ch));
 }


 /++
     Returns whether $(D c) is a Unicode mark
     (general Unicode category: Mn, Me, Mc).
 +/
 @safe pure nothrow @nogc
 bool isMark(dchar c)
 {
     return markTrie[c];
 }

 @safe unittest
 {
     auto mark = unicode("Mark");
     foreach (ch; mark.byCodepoint)
         assert(isMark(ch));
     foreach (ch; 0 .. 0x4000)
         assert((ch in mark) == isMark(ch));
 }

 /++
     Returns whether $(D c) is a Unicode numerical $(CHARACTER)
     (general Unicode category: Nd, Nl, No).
 +/
 @safe pure nothrow @nogc
 bool isNumber(dchar c)
 {
     // optimization for ascii case
     if (c <= 0x7F)
     {
         return c >= '0' && c <= '9';
     }
     else
     {
         return numberTrie[c];
     }
 }

 @safe unittest
 {
     auto n = unicode("N");
     foreach (ch; n.byCodepoint)
         assert(isNumber(ch));
     foreach (ch; 0 .. 0x4000)
         assert((ch in n) == isNumber(ch));
 }

 /++
     Returns whether $(D c) is a Unicode alphabetic $(CHARACTER) or number.
     (general Unicode category: Alphabetic, Nd, Nl, No).

     Params:
         c = any Unicode character
     Returns:
         `true` if the character is in the Alphabetic, Nd, Nl, or No Unicode
         categories
 +/
 @safe pure nothrow @nogc
 bool isAlphaNum(dchar c)
 {
     static import std.ascii;

     // optimization for ascii case
     if (std.ascii.isASCII(c))
     {
         return std.ascii.isAlphaNum(c);
     }
     else
     {
         return isAlpha(c) || isNumber(c);
     }
 }

 @safe unittest
 {
     auto n = unicode("N");
     auto alpha = unicode("Alphabetic");

     foreach (ch; n.byCodepoint)
         assert(isAlphaNum(ch));

     foreach (ch; alpha.byCodepoint)
         assert(isAlphaNum(ch));

     foreach (ch; 0 .. 0x4000)
     {
         assert(((ch in n) || (ch in alpha)) == isAlphaNum(ch));
     }
 }

 /++
     Returns whether $(D c) is a Unicode punctuation $(CHARACTER)
     (general Unicode category: Pd, Ps, Pe, Pc, Po, Pi, Pf).
 +/
 @safe pure nothrow @nogc
 bool isPunctuation(dchar c)
 {
     static import std.ascii;

     // optimization for ascii case
     if (c <= 0x7F)
     {
         return std.ascii.isPunctuation(c);
     }
     else
     {
         return punctuationTrie[c];
     }
 }

 @safe unittest
 {
     assert(isPunctuation('\u0021'));
     assert(isPunctuation('\u0028'));
     assert(isPunctuation('\u0029'));
     assert(isPunctuation('\u002D'));
     assert(isPunctuation('\u005F'));
     assert(isPunctuation('\u00AB'));
     assert(isPunctuation('\u00BB'));
     foreach (ch; unicode("P").byCodepoint)
         assert(isPunctuation(ch));
 }

 /++
     Returns whether $(D c) is a Unicode symbol $(CHARACTER)
     (general Unicode category: Sm, Sc, Sk, So).
 +/
 @safe pure nothrow @nogc
 bool isSymbol(dchar c)
 {
    return symbolTrie[c];
 }

 @safe unittest
 {
     import std.format : format;
     assert(isSymbol('\u0024'));
     assert(isSymbol('\u002B'));
     assert(isSymbol('\u005E'));
     assert(isSymbol('\u00A6'));
     foreach (ch; unicode("S").byCodepoint)
         assert(isSymbol(ch), format("%04x", ch));
 }

 /++
     Returns whether $(D c) is a Unicode space $(CHARACTER)
     (general Unicode category: Zs)
     Note: This doesn't include '\n', '\r', \t' and other non-space $(CHARACTER).
     For commonly used less strict semantics see $(LREF isWhite).
 +/
 @safe pure nothrow @nogc
 bool isSpace(dchar c)
 {
     import std.internal.unicode_tables : isSpaceGen; // generated file
     return isSpaceGen(c);
 }

 @safe unittest
 {
     assert(isSpace('\u0020'));
     auto space = unicode.Zs;
     foreach (ch; space.byCodepoint)
         assert(isSpace(ch));
     foreach (ch; 0 .. 0x1000)
         assert(isSpace(ch) == space[ch]);
 }


 /++
     Returns whether $(D c) is a Unicode graphical $(CHARACTER)
     (general Unicode category: L, M, N, P, S, Zs).

 +/
 @safe pure nothrow @nogc
 bool isGraphical(dchar c)
 {
     return graphicalTrie[c];
 }


 @safe unittest
 {
     auto set = unicode("Graphical");
     import std.format : format;
     foreach (ch; set.byCodepoint)
         assert(isGraphical(ch), format("%4x", ch));
     foreach (ch; 0 .. 0x4000)
         assert((ch in set) == isGraphical(ch));
 }


 /++
     Returns whether $(D c) is a Unicode control $(CHARACTER)
     (general Unicode category: Cc).
 +/
 @safe pure nothrow @nogc
 bool isControl(dchar c)
 {
     import std.internal.unicode_tables : isControlGen; // generated file
     return isControlGen(c);
 }

 @safe unittest
 {
     assert(isControl('\u0000'));
     assert(isControl('\u0081'));
     assert(!isControl('\u0100'));
     auto cc = unicode.Cc;
     foreach (ch; cc.byCodepoint)
         assert(isControl(ch));
     foreach (ch; 0 .. 0x1000)
         assert(isControl(ch) == cc[ch]);
 }


 /++
     Returns whether $(D c) is a Unicode formatting $(CHARACTER)
     (general Unicode category: Cf).
 +/
 @safe pure nothrow @nogc
 bool isFormat(dchar c)
 {
     import std.internal.unicode_tables : isFormatGen; // generated file
     return isFormatGen(c);
 }


 @safe unittest
 {
     assert(isFormat('\u00AD'));
     foreach (ch; unicode("Format").byCodepoint)
         assert(isFormat(ch));
 }

 // code points for private use, surrogates are not likely to change in near feature
 // if need be they can be generated from unicode data as well

 /++
     Returns whether $(D c) is a Unicode Private Use $(CODEPOINT)
     (general Unicode category: Co).
 +/
 @safe pure nothrow @nogc
 bool isPrivateUse(dchar c)
 {
     return (0x00_E000 <= c && c <= 0x00_F8FF)
         || (0x0F_0000 <= c && c <= 0x0F_FFFD)
         || (0x10_0000 <= c && c <= 0x10_FFFD);
 }

 /++
     Returns whether $(D c) is a Unicode surrogate $(CODEPOINT)
     (general Unicode category: Cs).
 +/
 @safe pure nothrow @nogc
 bool isSurrogate(dchar c)
 {
     return (0xD800 <= c && c <= 0xDFFF);
 }

 /++
     Returns whether $(D c) is a Unicode high surrogate (lead surrogate).
 +/
 @safe pure nothrow @nogc
 bool isSurrogateHi(dchar c)
 {
     return (0xD800 <= c && c <= 0xDBFF);
 }

 /++
     Returns whether $(D c) is a Unicode low surrogate (trail surrogate).
 +/
 @safe pure nothrow @nogc
 bool isSurrogateLo(dchar c)
 {
     return (0xDC00 <= c && c <= 0xDFFF);
 }

 /++
     Returns whether $(D c) is a Unicode non-character i.e.
     a $(CODEPOINT) with no assigned abstract character.
     (general Unicode category: Cn)
 +/
 @safe pure nothrow @nogc
 bool isNonCharacter(dchar c)
 {
     return nonCharacterTrie[c];
 }

 @safe unittest
 {
     auto set = unicode("Cn");
     foreach (ch; set.byCodepoint)
         assert(isNonCharacter(ch));
 }

 private:
 // load static data from pre-generated tables into usable datastructures


 @safe auto asSet(const (ubyte)[] compressed) pure
 {
     return CodepointSet.fromIntervals(decompressIntervals(compressed));
 }

 @safe pure nothrow auto asTrie(T...)(in TrieEntry!T e)
 {
     return const(CodepointTrie!T)(e.offsets, e.sizes, e.data);
 }

 @safe pure nothrow @nogc @property
 {
     import std.internal.unicode_tables; // generated file

     // It's important to use auto return here, so that the compiler
     // only runs semantic on the return type if the function gets
     // used. Also these are functions rather than templates to not
     // increase the object size of the caller.
     auto lowerCaseTrie() { static immutable res = asTrie(lowerCaseTrieEntries); return res; }
     auto upperCaseTrie() { static immutable res = asTrie(upperCaseTrieEntries); return res; }
     auto simpleCaseTrie() { static immutable res = asTrie(simpleCaseTrieEntries); return res; }
     auto fullCaseTrie() { static immutable res = asTrie(fullCaseTrieEntries); return res; }
     auto alphaTrie() { static immutable res = asTrie(alphaTrieEntries); return res; }
     auto markTrie() { static immutable res = asTrie(markTrieEntries); return res; }
     auto numberTrie() { static immutable res = asTrie(numberTrieEntries); return res; }
     auto punctuationTrie() { static immutable res = asTrie(punctuationTrieEntries); return res; }
     auto symbolTrie() { static immutable res = asTrie(symbolTrieEntries); return res; }
     auto graphicalTrie() { static immutable res = asTrie(graphicalTrieEntries); return res; }
     auto nonCharacterTrie() { static immutable res = asTrie(nonCharacterTrieEntries); return res; }

     //normalization quick-check tables
     auto nfcQCTrie()
     {
         import std.internal.unicode_norm : nfcQCTrieEntries;
         static immutable res = asTrie(nfcQCTrieEntries);
         return res;
     }

     auto nfdQCTrie()
     {
         import std.internal.unicode_norm : nfdQCTrieEntries;
         static immutable res = asTrie(nfdQCTrieEntries);
         return res;
     }

     auto nfkcQCTrie()
     {
         import std.internal.unicode_norm : nfkcQCTrieEntries;
         static immutable res = asTrie(nfkcQCTrieEntries);
         return res;
     }

     auto nfkdQCTrie()
     {
         import std.internal.unicode_norm : nfkdQCTrieEntries;
         static immutable res = asTrie(nfkdQCTrieEntries);
         return res;
     }

     //grapheme breaking algorithm tables
     auto mcTrie()
     {
         import std.internal.unicode_grapheme : mcTrieEntries;
         static immutable res = asTrie(mcTrieEntries);
         return res;
     }

     auto graphemeExtendTrie()
     {
         import std.internal.unicode_grapheme : graphemeExtendTrieEntries;
         static immutable res = asTrie(graphemeExtendTrieEntries);
         return res;
     }

     auto hangLV()
     {
         import std.internal.unicode_grapheme : hangulLVTrieEntries;
         static immutable res = asTrie(hangulLVTrieEntries);
         return res;
     }

     auto hangLVT()
     {
         import std.internal.unicode_grapheme : hangulLVTTrieEntries;
         static immutable res = asTrie(hangulLVTTrieEntries);
         return res;
     }

     // tables below are used for composition/decomposition
     auto combiningClassTrie()
     {
         import std.internal.unicode_comp : combiningClassTrieEntries;
         static immutable res = asTrie(combiningClassTrieEntries);
         return res;
     }

     auto compatMappingTrie()
     {
         import std.internal.unicode_decomp : compatMappingTrieEntries;
         static immutable res = asTrie(compatMappingTrieEntries);
         return res;
     }

     auto canonMappingTrie()
     {
         import std.internal.unicode_decomp : canonMappingTrieEntries;
         static immutable res = asTrie(canonMappingTrieEntries);
         return res;
     }

     auto compositionJumpTrie()
     {
         import std.internal.unicode_comp : compositionJumpTrieEntries;
         static immutable res = asTrie(compositionJumpTrieEntries);
         return res;
     }

     //case conversion tables
     auto toUpperIndexTrie() { static immutable res = asTrie(toUpperIndexTrieEntries); return res; }
     auto toLowerIndexTrie() { static immutable res = asTrie(toLowerIndexTrieEntries); return res; }
     auto toTitleIndexTrie() { static immutable res = asTrie(toTitleIndexTrieEntries); return res; }
     //simple case conversion tables
     auto toUpperSimpleIndexTrie() { static immutable res = asTrie(toUpperSimpleIndexTrieEntries); return res; }
     auto toLowerSimpleIndexTrie() { static immutable res = asTrie(toLowerSimpleIndexTrieEntries); return res; }
     auto toTitleSimpleIndexTrie() { static immutable res = asTrie(toTitleSimpleIndexTrieEntries); return res; }

 }

 }// version (!std_uni_bootstrap)