libphobos/testsuite/libphobos.phobos/std_uni_package.d - gcc - Git at Google

 pure @safe unittest
 {
     import std.uni;

         import std.algorithm.comparison : equal;

         auto set = CodepointSet('a', 'z'+1, 'а', 'я'+1);
         foreach (v; 'a'..'z'+1)
             assert(set[v]);
         // Cyrillic lowercase interval
         foreach (v; 'а'..'я'+1)
             assert(set[v]);
         //specific order is not required, intervals may interesect
         auto set2 = CodepointSet('а', 'я'+1, 'a', 'd', 'b', 'z'+1);
         //the same end result
         assert(set2.byInterval.equal(set.byInterval));
         // test constructor this(Range)(Range intervals)
         auto chessPiecesWhite = CodepointInterval(9812, 9818);
         auto chessPiecesBlack = CodepointInterval(9818, 9824);
         auto set3 = CodepointSet([chessPiecesWhite, chessPiecesBlack]);
         foreach (v; '♔'..'♟'+1)
             assert(set3[v]);

 }

 pure @safe unittest
 {
     import std.uni;

         auto gothic = unicode.Gothic;
         // Gothic letter ahsa
         assert(gothic['\U00010330']);
         // no ascii in Gothic obviously
         assert(!gothic['$']);

 }

 pure @safe unittest
 {
     import std.uni;

         import std.algorithm.comparison : equal;
         import std.range : iota;

         auto lower = unicode.LowerCase;
         auto upper = unicode.UpperCase;
         auto ascii = unicode.ASCII;

         assert((lower & upper).empty); // no intersection
         auto lowerASCII = lower & ascii;
         assert(lowerASCII.byCodepoint.equal(iota('a', 'z'+1)));
         // throw away all of the lowercase ASCII
         assert((ascii - lower).length == 128 - 26);

         auto onlyOneOf = lower ~ ascii;
         assert(!onlyOneOf['Δ']); // not ASCII and not lowercase
         assert(onlyOneOf['$']); // ASCII and not lowercase
         assert(!onlyOneOf['a']); // ASCII and lowercase
         assert(onlyOneOf['я']); // not ASCII but lowercase

         // throw away all cased letters from ASCII
         auto noLetters = ascii - (lower | upper);
         assert(noLetters.length == 128 - 26*2);

 }

 pure @safe unittest
 {
     import std.uni;

         assert('я' in unicode.Cyrillic);
         assert(!('z' in unicode.Cyrillic));

 }

 pure @safe unittest
 {
     import std.uni;

         import std.algorithm.comparison : equal;
         import std.range : iota;

         auto set = unicode.ASCII;
         set.byCodepoint.equal(iota(0, 0x80));

 }

 pure @safe unittest
 {
     import std.uni;

         import std.conv : to;
         import std.format : format;
         import std.uni : unicode;

         // This was originally using Cyrillic script.
         // Unfortunately this is a pretty active range for changes,
         // and hence broke in an update.
         // Therefore the range Basic latin was used instead as it
         // unlikely to ever change.

         assert(unicode.InBasic_latin.to!string == "[0..128)");

         // The specs '%s' and '%d' are equivalent to the to!string call above.
         assert(format("%d", unicode.InBasic_latin) == unicode.InBasic_latin.to!string);

         assert(format("%#x", unicode.InBasic_latin) == "[0..0x80)");
         assert(format("%#X", unicode.InBasic_latin) == "[0..0X80)");

 }

 pure @safe unittest
 {
     import std.uni;

         CodepointSet someSet;
         someSet.add('0', '5').add('A','Z'+1);
         someSet.add('5', '9'+1);
         assert(someSet['0']);
         assert(someSet['5']);
         assert(someSet['9']);
         assert(someSet['Z']);

 }

 pure @safe unittest
 {
     import std.uni;

         auto set = unicode.ASCII;
         // union with the inverse gets all of the code points in the Unicode
         assert((set | set.inverted).length == 0x110000);
         // no intersection with the inverse
         assert((set & set.inverted).empty);

 }

 pure @safe unittest
 {
     import std.uni;

         CodepointSet emptySet;
         assert(emptySet.length == 0);
         assert(emptySet.empty);

 }

 pure @safe unittest
 {
     import std.uni;

         string truth = "2² = 4";
         auto m = utfMatcher!char(unicode.Number);
         assert(m.match(truth)); // '2' is a number all right
         assert(truth == "² = 4"); // skips on match
         assert(m.match(truth)); // so is the superscript '2'
         assert(!m.match(truth)); // space is not a number
         assert(truth == " = 4"); // unaffected on no match
         assert(!m.skip(truth)); // same test ...
         assert(truth == "= 4"); // but skips a codepoint regardless
         assert(!m.test(truth)); // '=' is not a number
         assert(truth == "= 4"); // test never affects argument

 }

 @safe unittest
 {
     import std.uni;

         import std.exception : collectException;
         auto ascii = unicode.ASCII;
         assert(ascii['A']);
         assert(ascii['~']);
         assert(!ascii['\u00e0']);
         // matching is case-insensitive
         assert(ascii == unicode.ascII);
         assert(!ascii['à']);
         // underscores, '-' and whitespace in names are ignored too
         auto latin = unicode.in_latin1_Supplement;
         assert(latin['à']);
         assert(!latin['$']);
         // BTW Latin 1 Supplement is a block, hence "In" prefix
         assert(latin == unicode("In Latin 1 Supplement"));
         // run-time look up throws if no such set is found
         assert(collectException(unicode("InCyrilliac")));

 }

 @safe unittest
 {
     import std.uni;

         // use .block for explicitness
         assert(unicode.block.Greek_and_Coptic == unicode.InGreek_and_Coptic);

 }

 @safe unittest
 {
     import std.uni;

         auto arabicScript = unicode.script.arabic;
         auto arabicBlock = unicode.block.arabic;
         // there is an intersection between script and block
         assert(arabicBlock['؁']);
         assert(arabicScript['؁']);
         // but they are different
         assert(arabicBlock != arabicScript);
         assert(arabicBlock == unicode.inArabic);
         assert(arabicScript == unicode.arabic);

 }

 @safe unittest
 {
     import std.uni;

         // L here is syllable type not Letter as in unicode.L short-cut
         auto leadingVowel = unicode.hangulSyllableType("L");
         // check that some leading vowels are present
         foreach (vowel; '\u1110'..'\u115F')
             assert(leadingVowel[vowel]);
         assert(leadingVowel == unicode.hangulSyllableType.L);

 }

 @safe unittest
 {
     import std.uni;

         import std.uni : unicode;
         string pat = "[a-zA-Z0-9]hello";
         auto set = unicode.parseSet(pat);
         // check some of the codepoints
         assert(set['a'] && set['A'] && set['9']);
         assert(pat == "hello");

 }

 @safe unittest
 {
     import std.uni;

     assert(graphemeStride("  ", 1) == 1);
     // A + combing ring above
     string city = "A\u030Arhus";
     size_t first = graphemeStride(city, 0);
     assert(first == 3); //\u030A has 2 UTF-8 code units
     assert(city[0 .. first] == "A\u030A");
     assert(city[first..$] == "rhus");
 }

 @safe pure unittest
 {
     import std.uni;

     // Two Union Jacks of the Great Britain in each
     string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
     wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
     dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";

     // String pop length in code units, not points.
     assert(s.popGrapheme() == 8);
     assert(ws.popGrapheme() == 4);
     assert(ds.popGrapheme() == 2);

     assert(s == "\U0001F1EC\U0001F1E7");
     assert(ws == "\U0001F1EC\U0001F1E7");
     assert(ds == "\U0001F1EC\U0001F1E7");

     import std.algorithm.comparison : equal;
     import std.algorithm.iteration : filter;

     // Also works for non-random access ranges as long as the
     // character type is 32-bit.
     auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
     // Windows-style line ending is two code points in a single grapheme.
     assert(testPiece.popGrapheme() == 2);
     assert(testPiece.equal("!"d));
 }

 @safe unittest
 {
     import std.uni;

     import std.algorithm.comparison : equal;
     import std.range.primitives : walkLength;
     import std.range : take, drop;
     auto text = "noe\u0308l"; // noël using e + combining diaeresis
     assert(text.walkLength == 5); // 5 code points

     auto gText = text.byGrapheme;
     assert(gText.walkLength == 4); // 4 graphemes

     assert(gText.take(3).equal("noe\u0308".byGrapheme));
     assert(gText.drop(3).equal("l".byGrapheme));
 }

 @safe unittest
 {
     import std.uni;

     import std.array : array;
     import std.conv : text;
     import std.range : retro;

     string s = "noe\u0308l"; // noël

     // reverse it and convert the result to a string
     string reverse = s.byGrapheme
         .array
         .retro
         .byCodePoint
         .text;

     assert(reverse == "le\u0308on"); // lëon
 }

 @safe unittest
 {
     import std.uni;

         auto g = Grapheme("A\u0302");
         assert(g[0] == 'A');
         assert(g.valid);
         g[1] = '~'; // ASCII tilda is not a combining mark
         assert(g[1] == '~');
         assert(!g.valid);

 }

 @safe unittest
 {
     import std.uni;

         import std.algorithm.comparison : equal;
         auto g = Grapheme("A");
         assert(g.valid);
         g ~= '\u0301';
         assert(g[].equal("A\u0301"));
         assert(g.valid);
         g ~= "B";
         // not a valid grapheme cluster anymore
         assert(!g.valid);
         // still could be useful though
         assert(g[].equal("A\u0301B"));

 }

 @safe unittest
 {
     import std.uni;

     import std.algorithm.comparison : equal;
     import std.algorithm.iteration : filter;
     import std.range : isRandomAccessRange;

     string bold = "ku\u0308hn";

     // note that decodeGrapheme takes parameter by ref
     auto first = decodeGrapheme(bold);

     assert(first.length == 1);
     assert(first[0] == 'k');

     // the next grapheme is 2 characters long
     auto wideOne = decodeGrapheme(bold);
     // slicing a grapheme yields a random-access range of dchar
     assert(wideOne[].equal("u\u0308"));
     assert(wideOne.length == 2);
     static assert(isRandomAccessRange!(typeof(wideOne[])));

     // all of the usual range manipulation is possible
     assert(wideOne[].filter!isMark().equal("\u0308"));

     auto g = Grapheme("A");
     assert(g.valid);
     g ~= '\u0301';
     assert(g[].equal("A\u0301"));
     assert(g.valid);
     g ~= "B";
     // not a valid grapheme cluster anymore
     assert(!g.valid);
     // still could be useful though
     assert(g[].equal("A\u0301B"));
 }

 @safe @nogc pure nothrow unittest
 {
     import std.uni;

     assert(sicmp("Август", "авгусТ") == 0);
     // Greek also works as long as there is no 1:M mapping in sight
     assert(sicmp("ΌΎ", "όύ") == 0);
     // things like the following won't get matched as equal
     // Greek small letter iota with dialytika and tonos
     assert(sicmp("ΐ", "\u03B9\u0308\u0301") != 0);

     // while icmp has no problem with that
     assert(icmp("ΐ", "\u03B9\u0308\u0301") == 0);
     assert(icmp("ΌΎ", "όύ") == 0);
 }

 @safe @nogc pure nothrow unittest
 {
     import std.uni;

     assert(icmp("Rußland", "Russland") == 0);
     assert(icmp("ᾩ -> \u1F70\u03B9", "\u1F61\u03B9 -> ᾲ") == 0);
 }

 @safe @nogc nothrow pure unittest
 {
     import std.uni;

     import std.utf : byDchar;

     assert(icmp("Rußland".byDchar, "Russland".byDchar) == 0);
     assert(icmp("ᾩ -> \u1F70\u03B9".byDchar, "\u1F61\u03B9 -> ᾲ".byDchar) == 0);
 }

 @safe unittest
 {
     import std.uni;

     // shorten the code
     alias CC = combiningClass;

     // combining tilda
     assert(CC('\u0303') == 230);
     // combining ring below
     assert(CC('\u0325') == 220);
     // the simple consequence is that  "tilda" should be
     // placed after a "ring below" in a sequence
 }

 @safe unittest
 {
     import std.uni;

     assert(compose('A','\u0308') == '\u00C4');
     assert(compose('A', 'B') == dchar.init);
     assert(compose('C', '\u0301') == '\u0106');
     // note that the starter is the first one
     // thus the following doesn't compose
     assert(compose('\u0308', 'A') == dchar.init);
 }

 @safe unittest
 {
     import std.uni;

     import std.algorithm.comparison : equal;

     assert(compose('A','\u0308') == '\u00C4');
     assert(compose('A', 'B') == dchar.init);
     assert(compose('C', '\u0301') == '\u0106');
     // note that the starter is the first one
     // thus the following doesn't compose
     assert(compose('\u0308', 'A') == dchar.init);

     assert(decompose('Ĉ')[].equal("C\u0302"));
     assert(decompose('D')[].equal("D"));
     assert(decompose('\uD4DC')[].equal("\u1111\u1171\u11B7"));
     assert(decompose!Compatibility('¹')[].equal("1"));
 }

 @safe unittest
 {
     import std.uni;

     import std.algorithm.comparison : equal;
     assert(decomposeHangul('\uD4DB')[].equal("\u1111\u1171\u11B6"));
 }

 @safe unittest
 {
     import std.uni;

     assert(composeJamo('\u1111', '\u1171', '\u11B6') == '\uD4DB');
     // leaving out T-vowel, or passing any codepoint
     // that is not trailing consonant composes an LV-syllable
     assert(composeJamo('\u1111', '\u1171') == '\uD4CC');
     assert(composeJamo('\u1111', '\u1171', ' ') == '\uD4CC');
     assert(composeJamo('\u1111', 'A') == dchar.init);
     assert(composeJamo('A', '\u1171') == dchar.init);
 }

 @safe pure unittest
 {
     import std.uni;

     // any encoding works
     wstring greet = "Hello world";
     assert(normalize(greet) is greet); // the same exact slice

     // An example of a character with all 4 forms being different:
     // Greek upsilon with acute and hook symbol (code point 0x03D3)
     assert(normalize!NFC("ϓ") == "\u03D3");
     assert(normalize!NFD("ϓ") == "\u03D2\u0301");
     assert(normalize!NFKC("ϓ") == "\u038E");
     assert(normalize!NFKD("ϓ") == "\u03A5\u0301");
 }

 @safe unittest
 {
     import std.uni;

     // e.g. Cyrillic is always allowed, so is ASCII
     assert(allowedIn!NFC('я'));
     assert(allowedIn!NFD('я'));
     assert(allowedIn!NFKC('я'));
     assert(allowedIn!NFKD('я'));
     assert(allowedIn!NFC('Z'));
 }

 @safe pure unittest
 {
     import std.uni;

     import std.algorithm.comparison : equal;

     assert("hEllo".asUpperCase.equal("HELLO"));
 }

 @safe pure unittest
 {
     import std.uni;

     import std.algorithm.comparison : equal;

     assert("hEllo".asCapitalized.equal("Hello"));
 }

 @safe unittest
 {
     import std.uni;

     import std.algorithm.iteration : map;
     import std.algorithm.mutation : copy;
     import std.array : appender;

     auto abuf = appender!(char[])();
     "hello".map!toUpper.copy(abuf);
     assert(abuf.data == "HELLO");
 }
	pure @safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;

	auto set = CodepointSet('a', 'z'+1, 'а', 'я'+1);
	foreach (v; 'a'..'z'+1)
	assert(set[v]);
	// Cyrillic lowercase interval
	foreach (v; 'а'..'я'+1)
	assert(set[v]);
	//specific order is not required, intervals may interesect
	auto set2 = CodepointSet('а', 'я'+1, 'a', 'd', 'b', 'z'+1);
	//the same end result
	assert(set2.byInterval.equal(set.byInterval));
	// test constructor this(Range)(Range intervals)
	auto chessPiecesWhite = CodepointInterval(9812, 9818);
	auto chessPiecesBlack = CodepointInterval(9818, 9824);
	auto set3 = CodepointSet([chessPiecesWhite, chessPiecesBlack]);
	foreach (v; '♔'..'♟'+1)
	assert(set3[v]);

	}

	pure @safe unittest
	{
	import std.uni;

	auto gothic = unicode.Gothic;
	// Gothic letter ahsa
	assert(gothic['\U00010330']);
	// no ascii in Gothic obviously
	assert(!gothic['$']);

	}

	pure @safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;
	import std.range : iota;

	auto lower = unicode.LowerCase;
	auto upper = unicode.UpperCase;
	auto ascii = unicode.ASCII;

	assert((lower & upper).empty); // no intersection
	auto lowerASCII = lower & ascii;
	assert(lowerASCII.byCodepoint.equal(iota('a', 'z'+1)));
	// throw away all of the lowercase ASCII
	assert((ascii - lower).length == 128 - 26);

	auto onlyOneOf = lower ~ ascii;
	assert(!onlyOneOf['Δ']); // not ASCII and not lowercase
	assert(onlyOneOf['$']); // ASCII and not lowercase
	assert(!onlyOneOf['a']); // ASCII and lowercase
	assert(onlyOneOf['я']); // not ASCII but lowercase

	// throw away all cased letters from ASCII
	auto noLetters = ascii - (lower \| upper);
	assert(noLetters.length == 128 - 26*2);

	}

	pure @safe unittest
	{
	import std.uni;

	assert('я' in unicode.Cyrillic);
	assert(!('z' in unicode.Cyrillic));

	}

	pure @safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;
	import std.range : iota;

	auto set = unicode.ASCII;
	set.byCodepoint.equal(iota(0, 0x80));

	}

	pure @safe unittest
	{
	import std.uni;

	import std.conv : to;
	import std.format : format;
	import std.uni : unicode;

	// This was originally using Cyrillic script.
	// Unfortunately this is a pretty active range for changes,
	// and hence broke in an update.
	// Therefore the range Basic latin was used instead as it
	// unlikely to ever change.

	assert(unicode.InBasic_latin.to!string == "[0..128)");

	// The specs '%s' and '%d' are equivalent to the to!string call above.
	assert(format("%d", unicode.InBasic_latin) == unicode.InBasic_latin.to!string);

	assert(format("%#x", unicode.InBasic_latin) == "[0..0x80)");
	assert(format("%#X", unicode.InBasic_latin) == "[0..0X80)");

	}

	pure @safe unittest
	{
	import std.uni;

	CodepointSet someSet;
	someSet.add('0', '5').add('A','Z'+1);
	someSet.add('5', '9'+1);
	assert(someSet['0']);
	assert(someSet['5']);
	assert(someSet['9']);
	assert(someSet['Z']);

	}

	pure @safe unittest
	{
	import std.uni;

	auto set = unicode.ASCII;
	// union with the inverse gets all of the code points in the Unicode
	assert((set \| set.inverted).length == 0x110000);
	// no intersection with the inverse
	assert((set & set.inverted).empty);

	}

	pure @safe unittest
	{
	import std.uni;

	CodepointSet emptySet;
	assert(emptySet.length == 0);
	assert(emptySet.empty);

	}

	pure @safe unittest
	{
	import std.uni;

	string truth = "2² = 4";
	auto m = utfMatcher!char(unicode.Number);
	assert(m.match(truth)); // '2' is a number all right
	assert(truth == "² = 4"); // skips on match
	assert(m.match(truth)); // so is the superscript '2'
	assert(!m.match(truth)); // space is not a number
	assert(truth == " = 4"); // unaffected on no match
	assert(!m.skip(truth)); // same test ...
	assert(truth == "= 4"); // but skips a codepoint regardless
	assert(!m.test(truth)); // '=' is not a number
	assert(truth == "= 4"); // test never affects argument

	}

	@safe unittest
	{
	import std.uni;

	import std.exception : collectException;
	auto ascii = unicode.ASCII;
	assert(ascii['A']);
	assert(ascii['~']);
	assert(!ascii['\u00e0']);
	// matching is case-insensitive
	assert(ascii == unicode.ascII);
	assert(!ascii['à']);
	// underscores, '-' and whitespace in names are ignored too
	auto latin = unicode.in_latin1_Supplement;
	assert(latin['à']);
	assert(!latin['$']);
	// BTW Latin 1 Supplement is a block, hence "In" prefix
	assert(latin == unicode("In Latin 1 Supplement"));
	// run-time look up throws if no such set is found
	assert(collectException(unicode("InCyrilliac")));

	}

	@safe unittest
	{
	import std.uni;

	// use .block for explicitness
	assert(unicode.block.Greek_and_Coptic == unicode.InGreek_and_Coptic);

	}

	@safe unittest
	{
	import std.uni;

	auto arabicScript = unicode.script.arabic;
	auto arabicBlock = unicode.block.arabic;
	// there is an intersection between script and block
	assert(arabicBlock['؁']);
	assert(arabicScript['؁']);
	// but they are different
	assert(arabicBlock != arabicScript);
	assert(arabicBlock == unicode.inArabic);
	assert(arabicScript == unicode.arabic);

	}

	@safe unittest
	{
	import std.uni;

	// L here is syllable type not Letter as in unicode.L short-cut
	auto leadingVowel = unicode.hangulSyllableType("L");
	// check that some leading vowels are present
	foreach (vowel; '\u1110'..'\u115F')
	assert(leadingVowel[vowel]);
	assert(leadingVowel == unicode.hangulSyllableType.L);

	}

	@safe unittest
	{
	import std.uni;

	import std.uni : unicode;
	string pat = "[a-zA-Z0-9]hello";
	auto set = unicode.parseSet(pat);
	// check some of the codepoints
	assert(set['a'] && set['A'] && set['9']);
	assert(pat == "hello");

	}

	@safe unittest
	{
	import std.uni;

	assert(graphemeStride(" ", 1) == 1);
	// A + combing ring above
	string city = "A\u030Arhus";
	size_t first = graphemeStride(city, 0);
	assert(first == 3); //\u030A has 2 UTF-8 code units
	assert(city[0 .. first] == "A\u030A");
	assert(city[first..$] == "rhus");
	}

	@safe pure unittest
	{
	import std.uni;

	// Two Union Jacks of the Great Britain in each
	string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
	wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
	dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";

	// String pop length in code units, not points.
	assert(s.popGrapheme() == 8);
	assert(ws.popGrapheme() == 4);
	assert(ds.popGrapheme() == 2);

	assert(s == "\U0001F1EC\U0001F1E7");
	assert(ws == "\U0001F1EC\U0001F1E7");
	assert(ds == "\U0001F1EC\U0001F1E7");

	import std.algorithm.comparison : equal;
	import std.algorithm.iteration : filter;

	// Also works for non-random access ranges as long as the
	// character type is 32-bit.
	auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
	// Windows-style line ending is two code points in a single grapheme.
	assert(testPiece.popGrapheme() == 2);
	assert(testPiece.equal("!"d));
	}

	@safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;
	import std.range.primitives : walkLength;
	import std.range : take, drop;
	auto text = "noe\u0308l"; // noël using e + combining diaeresis
	assert(text.walkLength == 5); // 5 code points

	auto gText = text.byGrapheme;
	assert(gText.walkLength == 4); // 4 graphemes

	assert(gText.take(3).equal("noe\u0308".byGrapheme));
	assert(gText.drop(3).equal("l".byGrapheme));
	}

	@safe unittest
	{
	import std.uni;

	import std.array : array;
	import std.conv : text;
	import std.range : retro;

	string s = "noe\u0308l"; // noël

	// reverse it and convert the result to a string
	string reverse = s.byGrapheme
	.array
	.retro
	.byCodePoint
	.text;

	assert(reverse == "le\u0308on"); // lëon
	}

	@safe unittest
	{
	import std.uni;

	auto g = Grapheme("A\u0302");
	assert(g[0] == 'A');
	assert(g.valid);
	g[1] = '~'; // ASCII tilda is not a combining mark
	assert(g[1] == '~');
	assert(!g.valid);

	}

	@safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;
	auto g = Grapheme("A");
	assert(g.valid);
	g ~= '\u0301';
	assert(g[].equal("A\u0301"));
	assert(g.valid);
	g ~= "B";
	// not a valid grapheme cluster anymore
	assert(!g.valid);
	// still could be useful though
	assert(g[].equal("A\u0301B"));

	}

	@safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;
	import std.algorithm.iteration : filter;
	import std.range : isRandomAccessRange;

	string bold = "ku\u0308hn";

	// note that decodeGrapheme takes parameter by ref
	auto first = decodeGrapheme(bold);

	assert(first.length == 1);
	assert(first[0] == 'k');

	// the next grapheme is 2 characters long
	auto wideOne = decodeGrapheme(bold);
	// slicing a grapheme yields a random-access range of dchar
	assert(wideOne[].equal("u\u0308"));
	assert(wideOne.length == 2);
	static assert(isRandomAccessRange!(typeof(wideOne[])));

	// all of the usual range manipulation is possible
	assert(wideOne[].filter!isMark().equal("\u0308"));

	auto g = Grapheme("A");
	assert(g.valid);
	g ~= '\u0301';
	assert(g[].equal("A\u0301"));
	assert(g.valid);
	g ~= "B";
	// not a valid grapheme cluster anymore
	assert(!g.valid);
	// still could be useful though
	assert(g[].equal("A\u0301B"));
	}

	@safe @nogc pure nothrow unittest
	{
	import std.uni;

	assert(sicmp("Август", "авгусТ") == 0);
	// Greek also works as long as there is no 1:M mapping in sight
	assert(sicmp("ΌΎ", "όύ") == 0);
	// things like the following won't get matched as equal
	// Greek small letter iota with dialytika and tonos
	assert(sicmp("ΐ", "\u03B9\u0308\u0301") != 0);

	// while icmp has no problem with that
	assert(icmp("ΐ", "\u03B9\u0308\u0301") == 0);
	assert(icmp("ΌΎ", "όύ") == 0);
	}

	@safe @nogc pure nothrow unittest
	{
	import std.uni;

	assert(icmp("Rußland", "Russland") == 0);
	assert(icmp("ᾩ -> \u1F70\u03B9", "\u1F61\u03B9 -> ᾲ") == 0);
	}

	@safe @nogc nothrow pure unittest
	{
	import std.uni;

	import std.utf : byDchar;

	assert(icmp("Rußland".byDchar, "Russland".byDchar) == 0);
	assert(icmp("ᾩ -> \u1F70\u03B9".byDchar, "\u1F61\u03B9 -> ᾲ".byDchar) == 0);
	}

	@safe unittest
	{
	import std.uni;

	// shorten the code
	alias CC = combiningClass;

	// combining tilda
	assert(CC('\u0303') == 230);
	// combining ring below
	assert(CC('\u0325') == 220);
	// the simple consequence is that "tilda" should be
	// placed after a "ring below" in a sequence
	}

	@safe unittest
	{
	import std.uni;

	assert(compose('A','\u0308') == '\u00C4');
	assert(compose('A', 'B') == dchar.init);
	assert(compose('C', '\u0301') == '\u0106');
	// note that the starter is the first one
	// thus the following doesn't compose
	assert(compose('\u0308', 'A') == dchar.init);
	}

	@safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;

	assert(compose('A','\u0308') == '\u00C4');
	assert(compose('A', 'B') == dchar.init);
	assert(compose('C', '\u0301') == '\u0106');
	// note that the starter is the first one
	// thus the following doesn't compose
	assert(compose('\u0308', 'A') == dchar.init);

	assert(decompose('Ĉ')[].equal("C\u0302"));
	assert(decompose('D')[].equal("D"));
	assert(decompose('\uD4DC')[].equal("\u1111\u1171\u11B7"));
	assert(decompose!Compatibility('¹')[].equal("1"));
	}

	@safe unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;
	assert(decomposeHangul('\uD4DB')[].equal("\u1111\u1171\u11B6"));
	}

	@safe unittest
	{
	import std.uni;

	assert(composeJamo('\u1111', '\u1171', '\u11B6') == '\uD4DB');
	// leaving out T-vowel, or passing any codepoint
	// that is not trailing consonant composes an LV-syllable
	assert(composeJamo('\u1111', '\u1171') == '\uD4CC');
	assert(composeJamo('\u1111', '\u1171', ' ') == '\uD4CC');
	assert(composeJamo('\u1111', 'A') == dchar.init);
	assert(composeJamo('A', '\u1171') == dchar.init);
	}

	@safe pure unittest
	{
	import std.uni;

	// any encoding works
	wstring greet = "Hello world";
	assert(normalize(greet) is greet); // the same exact slice

	// An example of a character with all 4 forms being different:
	// Greek upsilon with acute and hook symbol (code point 0x03D3)
	assert(normalize!NFC("ϓ") == "\u03D3");
	assert(normalize!NFD("ϓ") == "\u03D2\u0301");
	assert(normalize!NFKC("ϓ") == "\u038E");
	assert(normalize!NFKD("ϓ") == "\u03A5\u0301");
	}

	@safe unittest
	{
	import std.uni;

	// e.g. Cyrillic is always allowed, so is ASCII
	assert(allowedIn!NFC('я'));
	assert(allowedIn!NFD('я'));
	assert(allowedIn!NFKC('я'));
	assert(allowedIn!NFKD('я'));
	assert(allowedIn!NFC('Z'));
	}

	@safe pure unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;

	assert("hEllo".asUpperCase.equal("HELLO"));
	}

	@safe pure unittest
	{
	import std.uni;

	import std.algorithm.comparison : equal;

	assert("hEllo".asCapitalized.equal("Hello"));
	}

	@safe unittest
	{
	import std.uni;

	import std.algorithm.iteration : map;
	import std.algorithm.mutation : copy;
	import std.array : appender;

	auto abuf = appender!(char[])();
	"hello".map!toUpper.copy(abuf);
	assert(abuf.data == "HELLO");
	}