libphobos/testsuite/libphobos.phobos/std_utf.d - gcc - Git at Google

 @safe unittest
 {
     import std.utf;

     import std.exception : assertThrown;

     char[4] buf;
     assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));
     assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF));
     assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00));
     assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF));
     assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));
 }

 @safe @nogc pure nothrow unittest
 {
     import std.utf;

     assert( isValidDchar(cast(dchar) 0x41));
     assert( isValidDchar(cast(dchar) 0x00));
     assert(!isValidDchar(cast(dchar) 0xD800));
     assert(!isValidDchar(cast(dchar) 0x11FFFF));
 }

 @safe pure nothrow unittest
 {
     import std.utf;

     assert( isValidCodepoint(cast(char) 0x40));
     assert(!isValidCodepoint(cast(char) 0x80));
     assert( isValidCodepoint(cast(wchar) 0x1234));
     assert(!isValidCodepoint(cast(wchar) 0xD800));
     assert( isValidCodepoint(cast(dchar) 0x0010FFFF));
     assert(!isValidCodepoint(cast(dchar) 0x12345678));
 }

 @safe unittest
 {
     import std.utf;

     assert("a".stride == 1);
     assert("λ".stride == 2);
     assert("aλ".stride == 1);
     assert("aλ".stride(1) == 2);
     assert("𐐷".stride == 4);
 }

 @safe unittest
 {
     import std.utf;

     assert("a".strideBack == 1);
     assert("λ".strideBack == 2);
     assert("aλ".strideBack == 2);
     assert("aλ".strideBack(1) == 1);
     assert("𐐷".strideBack == 4);
 }

 @safe unittest
 {
     import std.utf;

     assert(toUCSindex(`hello world`, 7) == 7);
     assert(toUCSindex(`hello world`w, 7) == 7);
     assert(toUCSindex(`hello world`d, 7) == 7);

     assert(toUCSindex(`Ma Chérie`, 7) == 6);
     assert(toUCSindex(`Ma Chérie`w, 7) == 7);
     assert(toUCSindex(`Ma Chérie`d, 7) == 7);

     assert(toUCSindex(`さいごの果実 / ミツバチと科学者`, 9) == 3);
     assert(toUCSindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9);
     assert(toUCSindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9);
 }

 @safe unittest
 {
     import std.utf;

     assert(toUTFindex(`hello world`, 7) == 7);
     assert(toUTFindex(`hello world`w, 7) == 7);
     assert(toUTFindex(`hello world`d, 7) == 7);

     assert(toUTFindex(`Ma Chérie`, 6) == 7);
     assert(toUTFindex(`Ma Chérie`w, 7) == 7);
     assert(toUTFindex(`Ma Chérie`d, 7) == 7);

     assert(toUTFindex(`さいごの果実 / ミツバチと科学者`, 3) == 9);
     assert(toUTFindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9);
     assert(toUTFindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9);
 }

 @safe pure unittest
 {
     import std.utf;

     size_t i;

     assert("a".decode(i) == 'a' && i == 1);
     i = 0;
     assert("å".decode(i) == 'å' && i == 2);
     i = 1;
     assert("aå".decode(i) == 'å' && i == 3);
     i = 0;
     assert("å"w.decode(i) == 'å' && i == 1);

     // ë as a multi-code point grapheme
     i = 0;
     assert("e\u0308".decode(i) == 'e' && i == 1);
     // ë as a single code point grapheme
     i = 0;
     assert("ë".decode(i) == 'ë' && i == 2);
     i = 0;
     assert("ë"w.decode(i) == 'ë' && i == 1);
 }

 @safe pure unittest
 {
     import std.utf;

     import std.range.primitives;
     string str = "Hello, World!";

     assert(str.decodeFront == 'H' && str == "ello, World!");
     str = "å";
     assert(str.decodeFront == 'å' && str.empty);
     str = "å";
     size_t i;
     assert(str.decodeFront(i) == 'å' && i == 2 && str.empty);
 }

 @system pure unittest
 {
     import std.utf;

     import std.range.primitives;
     string str = "Hello, World!";

     assert(str.decodeBack == '!' && str == "Hello, World");
     str = "å";
     assert(str.decodeBack == 'å' && str.empty);
     str = "å";
     size_t i;
     assert(str.decodeBack(i) == 'å' && i == 2 && str.empty);
 }

 @safe unittest
 {
     import std.utf;

     import std.exception : assertThrown;
     import std.typecons : Yes;

     char[4] buf;

     assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000");
     assert(encode(buf, '\u007F') == 1 && buf[0 .. 1] == "\u007F");
     assert(encode(buf, '\u0080') == 2 && buf[0 .. 2] == "\u0080");
     assert(encode(buf, '\uE000') == 3 && buf[0 .. 3] == "\uE000");
     assert(encode(buf, 0xFFFE) == 3 && buf[0 .. 3] == "\xEF\xBF\xBE");
     assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));

     encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
     auto slice = buf[];
     assert(slice.decodeFront == replacementDchar);
 }

 @safe unittest
 {
     import std.utf;

     import std.exception : assertThrown;
     import std.typecons : Yes;

     wchar[2] buf;

     assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000");
     assert(encode(buf, '\uD7FF') == 1 && buf[0 .. 1] == "\uD7FF");
     assert(encode(buf, '\uE000') == 1 && buf[0 .. 1] == "\uE000");
     assert(encode(buf, '\U00010000') == 2 && buf[0 .. 2] == "\U00010000");
     assert(encode(buf, '\U0010FFFF') == 2 && buf[0 .. 2] == "\U0010FFFF");
     assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));

     encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
     auto slice = buf[];
     assert(slice.decodeFront == replacementDchar);
 }

 @safe unittest
 {
     import std.utf;

     import std.exception : assertThrown;
     import std.typecons : Yes;

     dchar[1] buf;

     assert(encode(buf, '\u0000') == 1 && buf[0] == '\u0000');
     assert(encode(buf, '\uD7FF') == 1 && buf[0] == '\uD7FF');
     assert(encode(buf, '\uE000') == 1 && buf[0] == '\uE000');
     assert(encode(buf, '\U0010FFFF') == 1 && buf[0] == '\U0010FFFF');
     assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));

     encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
     assert(buf[0] == replacementDchar);
 }

 @safe unittest
 {
     import std.utf;

     char[] s = "abcd".dup;
     dchar d1 = 'a';
     dchar d2 = 'ø';

     encode(s, d1);
     assert(s.length == 5);
     assert(s == "abcda");
     encode(s, d2);
     assert(s.length == 7);
     assert(s == "abcdaø");
 }

 @safe pure nothrow @nogc unittest
 {
     import std.utf;

     assert(codeLength!char('a') == 1);
     assert(codeLength!wchar('a') == 1);
     assert(codeLength!dchar('a') == 1);

     assert(codeLength!char('\U0010FFFF') == 4);
     assert(codeLength!wchar('\U0010FFFF') == 2);
     assert(codeLength!dchar('\U0010FFFF') == 1);
 }

 @safe unittest
 {
     import std.utf;

     assert(codeLength!char("hello world") ==
            "hello world".length);
     assert(codeLength!wchar("hello world") ==
            "hello world"w.length);
     assert(codeLength!dchar("hello world") ==
            "hello world"d.length);

     assert(codeLength!char(`プログラミング`) ==
            `プログラミング`.length);
     assert(codeLength!wchar(`プログラミング`) ==
            `プログラミング`w.length);
     assert(codeLength!dchar(`プログラミング`) ==
            `プログラミング`d.length);

     string haystack = `Être sans la verité, ça, ce ne serait pas bien.`;
     wstring needle = `Être sans la verité`;
     assert(haystack[codeLength!char(needle) .. $] ==
            `, ça, ce ne serait pas bien.`);
 }

 @safe unittest
 {
     import std.utf;

     import std.exception : assertThrown;
     char[] a = [167, 133, 175];
     assertThrown!UTFException(validate(a));
 }

 @safe pure unittest
 {
     import std.utf;

     import std.algorithm.comparison : equal;

     // The ö is represented by two UTF-8 code units
     assert("Hellø"w.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8]));

     // 𐐷 is four code units in UTF-8
     assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7]));
 }

 @safe pure unittest
 {
     import std.utf;

     import std.algorithm.comparison : equal;

     // these graphemes are two code units in UTF-16 and one in UTF-32
     assert("𤭢"d.length == 1);
     assert("𐐷"d.length == 1);

     assert("𤭢"d.toUTF16.equal([0xD852, 0xDF62]));
     assert("𐐷"d.toUTF16.equal([0xD801, 0xDC37]));
 }

 @safe pure unittest
 {
     import std.utf;

     import std.algorithm.comparison : equal;

     // these graphemes are two code units in UTF-16 and one in UTF-32
     assert("𤭢"w.length == 2);
     assert("𐐷"w.length == 2);

     assert("𤭢"w.toUTF32.equal([0x00024B62]));
     assert("𐐷"w.toUTF32.equal([0x00010437]));
 }

 @safe pure unittest
 {
     import std.utf;

     auto p1 = toUTFz!(char*)("hello world");
     auto p2 = toUTFz!(const(char)*)("hello world");
     auto p3 = toUTFz!(immutable(char)*)("hello world");
     auto p4 = toUTFz!(char*)("hello world"d);
     auto p5 = toUTFz!(const(wchar)*)("hello world");
     auto p6 = toUTFz!(immutable(dchar)*)("hello world"w);
 }

 @system unittest
 {
     import std.utf;

     string str = "Hello, World!";
     const(wchar)* p = str.toUTF16z;
     assert(p[str.length] == '\0');
 }

 @safe pure nothrow @nogc unittest
 {
     import std.utf;

     assert(count("") == 0);
     assert(count("a") == 1);
     assert(count("abc") == 3);
     assert(count("\u20AC100") == 4);
 }

 @safe unittest
 {
     import std.utf;

     import std.range.primitives;
     import std.traits : isAutodecodableString;

     auto r = "Hello, World!".byCodeUnit();
     static assert(hasLength!(typeof(r)));
     static assert(hasSlicing!(typeof(r)));
     static assert(isRandomAccessRange!(typeof(r)));
     static assert(is(ElementType!(typeof(r)) == immutable char));

     // contrast with the range capabilities of standard strings (with or
     // without autodecoding enabled).
     auto s = "Hello, World!";
     static assert(isBidirectionalRange!(typeof(r)));
     static if (isAutodecodableString!(typeof(s)))
     {
         // with autodecoding enabled, strings are non-random-access ranges of
         // dchar.
         static assert(is(ElementType!(typeof(s)) == dchar));
         static assert(!isRandomAccessRange!(typeof(s)));
         static assert(!hasSlicing!(typeof(s)));
         static assert(!hasLength!(typeof(s)));
     }
     else
     {
         // without autodecoding, strings are normal arrays.
         static assert(is(ElementType!(typeof(s)) == immutable char));
         static assert(isRandomAccessRange!(typeof(s)));
         static assert(hasSlicing!(typeof(s)));
         static assert(hasLength!(typeof(s)));
     }
 }

 @safe unittest
 {
     import std.utf;

     string noel1 = "noe\u0308l"; // noël using e + combining diaeresis
     assert(noel1.byCodeUnit[2] != 'ë');
     assert(noel1.byCodeUnit[2] == 'e');

     string noel2 = "no\u00EBl"; // noël using a precomposed ë character
     // Because string is UTF-8, the code unit at index 2 is just
     // the first of a sequence that encodes 'ë'
     assert(noel2.byCodeUnit[2] != 'ë');
 }

 @safe unittest
 {
     import std.utf;

     import std.algorithm.comparison : equal;
     import std.range : popFrontN;
     import std.traits : isAutodecodableString;
     {
         auto range = byCodeUnit("hello world");
         range.popFrontN(3);
         assert(equal(range.save, "lo world"));
         static if (isAutodecodableString!string) // only enabled with autodecoding
         {
             string str = range.source;
             assert(str == "lo world");
         }
     }
     // source only exists if the range was wrapped
     {
         auto range = byCodeUnit("hello world"d);
         static assert(!__traits(compiles, range.source));
     }
 }

 @safe pure nothrow unittest
 {
     import std.utf;

     import std.algorithm.comparison : equal;

     // hellö as a range of `char`s, which are UTF-8
     assert("hell\u00F6".byUTF!char().equal(['h', 'e', 'l', 'l', 0xC3, 0xB6]));

     // `wchar`s are able to hold the ö in a single element (UTF-16 code unit)
     assert("hell\u00F6".byUTF!wchar().equal(['h', 'e', 'l', 'l', 'ö']));

     // 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32
     assert("𐐷".byUTF!char().equal([0xF0, 0x90, 0x90, 0xB7]));
     assert("𐐷".byUTF!wchar().equal([0xD801, 0xDC37]));
     assert("𐐷".byUTF!dchar().equal([0x00010437]));
 }

 @safe unittest
 {
     import std.utf;

     import std.algorithm.comparison : equal;
     import std.exception : assertThrown;

     assert("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.yes).equal("hello\uFFFDetty"));
     assertThrown!UTFException("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.no).equal("hello betty"));
 }

 @safe pure nothrow unittest
 {
     import std.utf;

     import std.range.primitives;
     wchar[] s = ['ă', 'î'];

     auto rc = s.byUTF!char;
     static assert(isBidirectionalRange!(typeof(rc)));
     assert(rc.back == 0xae);
     rc.popBack;
     assert(rc.back == 0xc3);
     rc.popBack;
     assert(rc.back == 0x83);
     rc.popBack;
     assert(rc.back == 0xc4);

     auto rw = s.byUTF!wchar;
     static assert(isBidirectionalRange!(typeof(rw)));
     assert(rw.back == 'î');
     rw.popBack;
     assert(rw.back == 'ă');

     auto rd = s.byUTF!dchar;
     static assert(isBidirectionalRange!(typeof(rd)));
     assert(rd.back == 'î');
     rd.popBack;
     assert(rd.back == 'ă');
 }
	@safe unittest
	{
	import std.utf;

	import std.exception : assertThrown;

	char[4] buf;
	assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));
	assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF));
	assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00));
	assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF));
	assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));
	}

	@safe @nogc pure nothrow unittest
	{
	import std.utf;

	assert( isValidDchar(cast(dchar) 0x41));
	assert( isValidDchar(cast(dchar) 0x00));
	assert(!isValidDchar(cast(dchar) 0xD800));
	assert(!isValidDchar(cast(dchar) 0x11FFFF));
	}

	@safe pure nothrow unittest
	{
	import std.utf;

	assert( isValidCodepoint(cast(char) 0x40));
	assert(!isValidCodepoint(cast(char) 0x80));
	assert( isValidCodepoint(cast(wchar) 0x1234));
	assert(!isValidCodepoint(cast(wchar) 0xD800));
	assert( isValidCodepoint(cast(dchar) 0x0010FFFF));
	assert(!isValidCodepoint(cast(dchar) 0x12345678));
	}

	@safe unittest
	{
	import std.utf;

	assert("a".stride == 1);
	assert("λ".stride == 2);
	assert("aλ".stride == 1);
	assert("aλ".stride(1) == 2);
	assert("𐐷".stride == 4);
	}

	@safe unittest
	{
	import std.utf;

	assert("a".strideBack == 1);
	assert("λ".strideBack == 2);
	assert("aλ".strideBack == 2);
	assert("aλ".strideBack(1) == 1);
	assert("𐐷".strideBack == 4);
	}

	@safe unittest
	{
	import std.utf;

	assert(toUCSindex(`hello world`, 7) == 7);
	assert(toUCSindex(`hello world`w, 7) == 7);
	assert(toUCSindex(`hello world`d, 7) == 7);

	assert(toUCSindex(`Ma Chérie`, 7) == 6);
	assert(toUCSindex(`Ma Chérie`w, 7) == 7);
	assert(toUCSindex(`Ma Chérie`d, 7) == 7);

	assert(toUCSindex(`さいごの果実 / ミツバチと科学者`, 9) == 3);
	assert(toUCSindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9);
	assert(toUCSindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9);
	}

	@safe unittest
	{
	import std.utf;

	assert(toUTFindex(`hello world`, 7) == 7);
	assert(toUTFindex(`hello world`w, 7) == 7);
	assert(toUTFindex(`hello world`d, 7) == 7);

	assert(toUTFindex(`Ma Chérie`, 6) == 7);
	assert(toUTFindex(`Ma Chérie`w, 7) == 7);
	assert(toUTFindex(`Ma Chérie`d, 7) == 7);

	assert(toUTFindex(`さいごの果実 / ミツバチと科学者`, 3) == 9);
	assert(toUTFindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9);
	assert(toUTFindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9);
	}

	@safe pure unittest
	{
	import std.utf;

	size_t i;

	assert("a".decode(i) == 'a' && i == 1);
	i = 0;
	assert("å".decode(i) == 'å' && i == 2);
	i = 1;
	assert("aå".decode(i) == 'å' && i == 3);
	i = 0;
	assert("å"w.decode(i) == 'å' && i == 1);

	// ë as a multi-code point grapheme
	i = 0;
	assert("e\u0308".decode(i) == 'e' && i == 1);
	// ë as a single code point grapheme
	i = 0;
	assert("ë".decode(i) == 'ë' && i == 2);
	i = 0;
	assert("ë"w.decode(i) == 'ë' && i == 1);
	}

	@safe pure unittest
	{
	import std.utf;

	import std.range.primitives;
	string str = "Hello, World!";

	assert(str.decodeFront == 'H' && str == "ello, World!");
	str = "å";
	assert(str.decodeFront == 'å' && str.empty);
	str = "å";
	size_t i;
	assert(str.decodeFront(i) == 'å' && i == 2 && str.empty);
	}

	@system pure unittest
	{
	import std.utf;

	import std.range.primitives;
	string str = "Hello, World!";

	assert(str.decodeBack == '!' && str == "Hello, World");
	str = "å";
	assert(str.decodeBack == 'å' && str.empty);
	str = "å";
	size_t i;
	assert(str.decodeBack(i) == 'å' && i == 2 && str.empty);
	}

	@safe unittest
	{
	import std.utf;

	import std.exception : assertThrown;
	import std.typecons : Yes;

	char[4] buf;

	assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000");
	assert(encode(buf, '\u007F') == 1 && buf[0 .. 1] == "\u007F");
	assert(encode(buf, '\u0080') == 2 && buf[0 .. 2] == "\u0080");
	assert(encode(buf, '\uE000') == 3 && buf[0 .. 3] == "\uE000");
	assert(encode(buf, 0xFFFE) == 3 && buf[0 .. 3] == "\xEF\xBF\xBE");
	assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));

	encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
	auto slice = buf[];
	assert(slice.decodeFront == replacementDchar);
	}

	@safe unittest
	{
	import std.utf;

	import std.exception : assertThrown;
	import std.typecons : Yes;

	wchar[2] buf;

	assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000");
	assert(encode(buf, '\uD7FF') == 1 && buf[0 .. 1] == "\uD7FF");
	assert(encode(buf, '\uE000') == 1 && buf[0 .. 1] == "\uE000");
	assert(encode(buf, '\U00010000') == 2 && buf[0 .. 2] == "\U00010000");
	assert(encode(buf, '\U0010FFFF') == 2 && buf[0 .. 2] == "\U0010FFFF");
	assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));

	encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
	auto slice = buf[];
	assert(slice.decodeFront == replacementDchar);
	}

	@safe unittest
	{
	import std.utf;

	import std.exception : assertThrown;
	import std.typecons : Yes;

	dchar[1] buf;

	assert(encode(buf, '\u0000') == 1 && buf[0] == '\u0000');
	assert(encode(buf, '\uD7FF') == 1 && buf[0] == '\uD7FF');
	assert(encode(buf, '\uE000') == 1 && buf[0] == '\uE000');
	assert(encode(buf, '\U0010FFFF') == 1 && buf[0] == '\U0010FFFF');
	assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));

	encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
	assert(buf[0] == replacementDchar);
	}

	@safe unittest
	{
	import std.utf;

	char[] s = "abcd".dup;
	dchar d1 = 'a';
	dchar d2 = 'ø';

	encode(s, d1);
	assert(s.length == 5);
	assert(s == "abcda");
	encode(s, d2);
	assert(s.length == 7);
	assert(s == "abcdaø");
	}

	@safe pure nothrow @nogc unittest
	{
	import std.utf;

	assert(codeLength!char('a') == 1);
	assert(codeLength!wchar('a') == 1);
	assert(codeLength!dchar('a') == 1);

	assert(codeLength!char('\U0010FFFF') == 4);
	assert(codeLength!wchar('\U0010FFFF') == 2);
	assert(codeLength!dchar('\U0010FFFF') == 1);
	}

	@safe unittest
	{
	import std.utf;

	assert(codeLength!char("hello world") ==
	"hello world".length);
	assert(codeLength!wchar("hello world") ==
	"hello world"w.length);
	assert(codeLength!dchar("hello world") ==
	"hello world"d.length);

	assert(codeLength!char(`プログラミング`) ==
	`プログラミング`.length);
	assert(codeLength!wchar(`プログラミング`) ==
	`プログラミング`w.length);
	assert(codeLength!dchar(`プログラミング`) ==
	`プログラミング`d.length);

	string haystack = `Être sans la verité, ça, ce ne serait pas bien.`;
	wstring needle = `Être sans la verité`;
	assert(haystack[codeLength!char(needle) .. $] ==
	`, ça, ce ne serait pas bien.`);
	}

	@safe unittest
	{
	import std.utf;

	import std.exception : assertThrown;
	char[] a = [167, 133, 175];
	assertThrown!UTFException(validate(a));
	}

	@safe pure unittest
	{
	import std.utf;

	import std.algorithm.comparison : equal;

	// The ö is represented by two UTF-8 code units
	assert("Hellø"w.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8]));

	// 𐐷 is four code units in UTF-8
	assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7]));
	}

	@safe pure unittest
	{
	import std.utf;

	import std.algorithm.comparison : equal;

	// these graphemes are two code units in UTF-16 and one in UTF-32
	assert("𤭢"d.length == 1);
	assert("𐐷"d.length == 1);

	assert("𤭢"d.toUTF16.equal([0xD852, 0xDF62]));
	assert("𐐷"d.toUTF16.equal([0xD801, 0xDC37]));
	}

	@safe pure unittest
	{
	import std.utf;

	import std.algorithm.comparison : equal;

	// these graphemes are two code units in UTF-16 and one in UTF-32
	assert("𤭢"w.length == 2);
	assert("𐐷"w.length == 2);

	assert("𤭢"w.toUTF32.equal([0x00024B62]));
	assert("𐐷"w.toUTF32.equal([0x00010437]));
	}

	@safe pure unittest
	{
	import std.utf;

	auto p1 = toUTFz!(char*)("hello world");
	auto p2 = toUTFz!(const(char)*)("hello world");
	auto p3 = toUTFz!(immutable(char)*)("hello world");
	auto p4 = toUTFz!(char*)("hello world"d);
	auto p5 = toUTFz!(const(wchar)*)("hello world");
	auto p6 = toUTFz!(immutable(dchar)*)("hello world"w);
	}

	@system unittest
	{
	import std.utf;

	string str = "Hello, World!";
	const(wchar)* p = str.toUTF16z;
	assert(p[str.length] == '\0');
	}

	@safe pure nothrow @nogc unittest
	{
	import std.utf;

	assert(count("") == 0);
	assert(count("a") == 1);
	assert(count("abc") == 3);
	assert(count("\u20AC100") == 4);
	}

	@safe unittest
	{
	import std.utf;

	import std.range.primitives;
	import std.traits : isAutodecodableString;

	auto r = "Hello, World!".byCodeUnit();
	static assert(hasLength!(typeof(r)));
	static assert(hasSlicing!(typeof(r)));
	static assert(isRandomAccessRange!(typeof(r)));
	static assert(is(ElementType!(typeof(r)) == immutable char));

	// contrast with the range capabilities of standard strings (with or
	// without autodecoding enabled).
	auto s = "Hello, World!";
	static assert(isBidirectionalRange!(typeof(r)));
	static if (isAutodecodableString!(typeof(s)))
	{
	// with autodecoding enabled, strings are non-random-access ranges of
	// dchar.
	static assert(is(ElementType!(typeof(s)) == dchar));
	static assert(!isRandomAccessRange!(typeof(s)));
	static assert(!hasSlicing!(typeof(s)));
	static assert(!hasLength!(typeof(s)));
	}
	else
	{
	// without autodecoding, strings are normal arrays.
	static assert(is(ElementType!(typeof(s)) == immutable char));
	static assert(isRandomAccessRange!(typeof(s)));
	static assert(hasSlicing!(typeof(s)));
	static assert(hasLength!(typeof(s)));
	}
	}

	@safe unittest
	{
	import std.utf;

	string noel1 = "noe\u0308l"; // noël using e + combining diaeresis
	assert(noel1.byCodeUnit[2] != 'ë');
	assert(noel1.byCodeUnit[2] == 'e');

	string noel2 = "no\u00EBl"; // noël using a precomposed ë character
	// Because string is UTF-8, the code unit at index 2 is just
	// the first of a sequence that encodes 'ë'
	assert(noel2.byCodeUnit[2] != 'ë');
	}

	@safe unittest
	{
	import std.utf;

	import std.algorithm.comparison : equal;
	import std.range : popFrontN;
	import std.traits : isAutodecodableString;
	{
	auto range = byCodeUnit("hello world");
	range.popFrontN(3);
	assert(equal(range.save, "lo world"));
	static if (isAutodecodableString!string) // only enabled with autodecoding
	{
	string str = range.source;
	assert(str == "lo world");
	}
	}
	// source only exists if the range was wrapped
	{
	auto range = byCodeUnit("hello world"d);
	static assert(!__traits(compiles, range.source));
	}
	}

	@safe pure nothrow unittest
	{
	import std.utf;

	import std.algorithm.comparison : equal;

	// hellö as a range of `char`s, which are UTF-8
	assert("hell\u00F6".byUTF!char().equal(['h', 'e', 'l', 'l', 0xC3, 0xB6]));

	// `wchar`s are able to hold the ö in a single element (UTF-16 code unit)
	assert("hell\u00F6".byUTF!wchar().equal(['h', 'e', 'l', 'l', 'ö']));

	// 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32
	assert("𐐷".byUTF!char().equal([0xF0, 0x90, 0x90, 0xB7]));
	assert("𐐷".byUTF!wchar().equal([0xD801, 0xDC37]));
	assert("𐐷".byUTF!dchar().equal([0x00010437]));
	}

	@safe unittest
	{
	import std.utf;

	import std.algorithm.comparison : equal;
	import std.exception : assertThrown;

	assert("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.yes).equal("hello\uFFFDetty"));
	assertThrown!UTFException("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.no).equal("hello betty"));
	}

	@safe pure nothrow unittest
	{
	import std.utf;

	import std.range.primitives;
	wchar[] s = ['ă', 'î'];

	auto rc = s.byUTF!char;
	static assert(isBidirectionalRange!(typeof(rc)));
	assert(rc.back == 0xae);
	rc.popBack;
	assert(rc.back == 0xc3);
	rc.popBack;
	assert(rc.back == 0x83);
	rc.popBack;
	assert(rc.back == 0xc4);

	auto rw = s.byUTF!wchar;
	static assert(isBidirectionalRange!(typeof(rw)));
	assert(rw.back == 'î');
	rw.popBack;
	assert(rw.back == 'ă');

	auto rd = s.byUTF!dchar;
	static assert(isBidirectionalRange!(typeof(rd)));
	assert(rd.back == 'î');
	rd.popBack;
	assert(rd.back == 'ă');
	}