| @safe unittest |
| { |
| import std.utf; |
| |
| import std.exception : assertThrown; |
| |
| char[4] buf; |
| assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); |
| assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); |
| assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); |
| assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); |
| assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); |
| } |
| |
| @safe @nogc pure nothrow unittest |
| { |
| import std.utf; |
| |
| assert( isValidDchar(cast(dchar) 0x41)); |
| assert( isValidDchar(cast(dchar) 0x00)); |
| assert(!isValidDchar(cast(dchar) 0xD800)); |
| assert(!isValidDchar(cast(dchar) 0x11FFFF)); |
| } |
| |
| @safe pure nothrow unittest |
| { |
| import std.utf; |
| |
| assert( isValidCodepoint(cast(char) 0x40)); |
| assert(!isValidCodepoint(cast(char) 0x80)); |
| assert( isValidCodepoint(cast(wchar) 0x1234)); |
| assert(!isValidCodepoint(cast(wchar) 0xD800)); |
| assert( isValidCodepoint(cast(dchar) 0x0010FFFF)); |
| assert(!isValidCodepoint(cast(dchar) 0x12345678)); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| assert("a".stride == 1); |
| assert("λ".stride == 2); |
| assert("aλ".stride == 1); |
| assert("aλ".stride(1) == 2); |
| assert("𐐷".stride == 4); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| assert("a".strideBack == 1); |
| assert("λ".strideBack == 2); |
| assert("aλ".strideBack == 2); |
| assert("aλ".strideBack(1) == 1); |
| assert("𐐷".strideBack == 4); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| assert(toUCSindex(`hello world`, 7) == 7); |
| assert(toUCSindex(`hello world`w, 7) == 7); |
| assert(toUCSindex(`hello world`d, 7) == 7); |
| |
| assert(toUCSindex(`Ma Chérie`, 7) == 6); |
| assert(toUCSindex(`Ma Chérie`w, 7) == 7); |
| assert(toUCSindex(`Ma Chérie`d, 7) == 7); |
| |
| assert(toUCSindex(`さいごの果実 / ミツバチと科学者`, 9) == 3); |
| assert(toUCSindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9); |
| assert(toUCSindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| assert(toUTFindex(`hello world`, 7) == 7); |
| assert(toUTFindex(`hello world`w, 7) == 7); |
| assert(toUTFindex(`hello world`d, 7) == 7); |
| |
| assert(toUTFindex(`Ma Chérie`, 6) == 7); |
| assert(toUTFindex(`Ma Chérie`w, 7) == 7); |
| assert(toUTFindex(`Ma Chérie`d, 7) == 7); |
| |
| assert(toUTFindex(`さいごの果実 / ミツバチと科学者`, 3) == 9); |
| assert(toUTFindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9); |
| assert(toUTFindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9); |
| } |
| |
| @safe pure unittest |
| { |
| import std.utf; |
| |
| size_t i; |
| |
| assert("a".decode(i) == 'a' && i == 1); |
| i = 0; |
| assert("å".decode(i) == 'å' && i == 2); |
| i = 1; |
| assert("aå".decode(i) == 'å' && i == 3); |
| i = 0; |
| assert("å"w.decode(i) == 'å' && i == 1); |
| |
| // ë as a multi-code point grapheme |
| i = 0; |
| assert("e\u0308".decode(i) == 'e' && i == 1); |
| // ë as a single code point grapheme |
| i = 0; |
| assert("ë".decode(i) == 'ë' && i == 2); |
| i = 0; |
| assert("ë"w.decode(i) == 'ë' && i == 1); |
| } |
| |
| @safe pure unittest |
| { |
| import std.utf; |
| |
| import std.range.primitives; |
| string str = "Hello, World!"; |
| |
| assert(str.decodeFront == 'H' && str == "ello, World!"); |
| str = "å"; |
| assert(str.decodeFront == 'å' && str.empty); |
| str = "å"; |
| size_t i; |
| assert(str.decodeFront(i) == 'å' && i == 2 && str.empty); |
| } |
| |
| @system pure unittest |
| { |
| import std.utf; |
| |
| import std.range.primitives; |
| string str = "Hello, World!"; |
| |
| assert(str.decodeBack == '!' && str == "Hello, World"); |
| str = "å"; |
| assert(str.decodeBack == 'å' && str.empty); |
| str = "å"; |
| size_t i; |
| assert(str.decodeBack(i) == 'å' && i == 2 && str.empty); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| import std.exception : assertThrown; |
| import std.typecons : Yes; |
| |
| char[4] buf; |
| |
| assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000"); |
| assert(encode(buf, '\u007F') == 1 && buf[0 .. 1] == "\u007F"); |
| assert(encode(buf, '\u0080') == 2 && buf[0 .. 2] == "\u0080"); |
| assert(encode(buf, '\uE000') == 3 && buf[0 .. 3] == "\uE000"); |
| assert(encode(buf, 0xFFFE) == 3 && buf[0 .. 3] == "\xEF\xBF\xBE"); |
| assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); |
| |
| encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); |
| auto slice = buf[]; |
| assert(slice.decodeFront == replacementDchar); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| import std.exception : assertThrown; |
| import std.typecons : Yes; |
| |
| wchar[2] buf; |
| |
| assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000"); |
| assert(encode(buf, '\uD7FF') == 1 && buf[0 .. 1] == "\uD7FF"); |
| assert(encode(buf, '\uE000') == 1 && buf[0 .. 1] == "\uE000"); |
| assert(encode(buf, '\U00010000') == 2 && buf[0 .. 2] == "\U00010000"); |
| assert(encode(buf, '\U0010FFFF') == 2 && buf[0 .. 2] == "\U0010FFFF"); |
| assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); |
| |
| encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); |
| auto slice = buf[]; |
| assert(slice.decodeFront == replacementDchar); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| import std.exception : assertThrown; |
| import std.typecons : Yes; |
| |
| dchar[1] buf; |
| |
| assert(encode(buf, '\u0000') == 1 && buf[0] == '\u0000'); |
| assert(encode(buf, '\uD7FF') == 1 && buf[0] == '\uD7FF'); |
| assert(encode(buf, '\uE000') == 1 && buf[0] == '\uE000'); |
| assert(encode(buf, '\U0010FFFF') == 1 && buf[0] == '\U0010FFFF'); |
| assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); |
| |
| encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); |
| assert(buf[0] == replacementDchar); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| char[] s = "abcd".dup; |
| dchar d1 = 'a'; |
| dchar d2 = 'ø'; |
| |
| encode(s, d1); |
| assert(s.length == 5); |
| assert(s == "abcda"); |
| encode(s, d2); |
| assert(s.length == 7); |
| assert(s == "abcdaø"); |
| } |
| |
| @safe pure nothrow @nogc unittest |
| { |
| import std.utf; |
| |
| assert(codeLength!char('a') == 1); |
| assert(codeLength!wchar('a') == 1); |
| assert(codeLength!dchar('a') == 1); |
| |
| assert(codeLength!char('\U0010FFFF') == 4); |
| assert(codeLength!wchar('\U0010FFFF') == 2); |
| assert(codeLength!dchar('\U0010FFFF') == 1); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| assert(codeLength!char("hello world") == |
| "hello world".length); |
| assert(codeLength!wchar("hello world") == |
| "hello world"w.length); |
| assert(codeLength!dchar("hello world") == |
| "hello world"d.length); |
| |
| assert(codeLength!char(`プログラミング`) == |
| `プログラミング`.length); |
| assert(codeLength!wchar(`プログラミング`) == |
| `プログラミング`w.length); |
| assert(codeLength!dchar(`プログラミング`) == |
| `プログラミング`d.length); |
| |
| string haystack = `Être sans la verité, ça, ce ne serait pas bien.`; |
| wstring needle = `Être sans la verité`; |
| assert(haystack[codeLength!char(needle) .. $] == |
| `, ça, ce ne serait pas bien.`); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| import std.exception : assertThrown; |
| char[] a = [167, 133, 175]; |
| assertThrown!UTFException(validate(a)); |
| } |
| |
| @safe pure unittest |
| { |
| import std.utf; |
| |
| import std.algorithm.comparison : equal; |
| |
| // The ö is represented by two UTF-8 code units |
| assert("Hellø"w.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8])); |
| |
| // 𐐷 is four code units in UTF-8 |
| assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7])); |
| } |
| |
| @safe pure unittest |
| { |
| import std.utf; |
| |
| import std.algorithm.comparison : equal; |
| |
| // these graphemes are two code units in UTF-16 and one in UTF-32 |
| assert("𤭢"d.length == 1); |
| assert("𐐷"d.length == 1); |
| |
| assert("𤭢"d.toUTF16.equal([0xD852, 0xDF62])); |
| assert("𐐷"d.toUTF16.equal([0xD801, 0xDC37])); |
| } |
| |
| @safe pure unittest |
| { |
| import std.utf; |
| |
| import std.algorithm.comparison : equal; |
| |
| // these graphemes are two code units in UTF-16 and one in UTF-32 |
| assert("𤭢"w.length == 2); |
| assert("𐐷"w.length == 2); |
| |
| assert("𤭢"w.toUTF32.equal([0x00024B62])); |
| assert("𐐷"w.toUTF32.equal([0x00010437])); |
| } |
| |
| @safe pure unittest |
| { |
| import std.utf; |
| |
| auto p1 = toUTFz!(char*)("hello world"); |
| auto p2 = toUTFz!(const(char)*)("hello world"); |
| auto p3 = toUTFz!(immutable(char)*)("hello world"); |
| auto p4 = toUTFz!(char*)("hello world"d); |
| auto p5 = toUTFz!(const(wchar)*)("hello world"); |
| auto p6 = toUTFz!(immutable(dchar)*)("hello world"w); |
| } |
| |
| @system unittest |
| { |
| import std.utf; |
| |
| string str = "Hello, World!"; |
| const(wchar)* p = str.toUTF16z; |
| assert(p[str.length] == '\0'); |
| } |
| |
| @safe pure nothrow @nogc unittest |
| { |
| import std.utf; |
| |
| assert(count("") == 0); |
| assert(count("a") == 1); |
| assert(count("abc") == 3); |
| assert(count("\u20AC100") == 4); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| import std.range.primitives; |
| import std.traits : isAutodecodableString; |
| |
| auto r = "Hello, World!".byCodeUnit(); |
| static assert(hasLength!(typeof(r))); |
| static assert(hasSlicing!(typeof(r))); |
| static assert(isRandomAccessRange!(typeof(r))); |
| static assert(is(ElementType!(typeof(r)) == immutable char)); |
| |
| // contrast with the range capabilities of standard strings (with or |
| // without autodecoding enabled). |
| auto s = "Hello, World!"; |
| static assert(isBidirectionalRange!(typeof(r))); |
| static if (isAutodecodableString!(typeof(s))) |
| { |
| // with autodecoding enabled, strings are non-random-access ranges of |
| // dchar. |
| static assert(is(ElementType!(typeof(s)) == dchar)); |
| static assert(!isRandomAccessRange!(typeof(s))); |
| static assert(!hasSlicing!(typeof(s))); |
| static assert(!hasLength!(typeof(s))); |
| } |
| else |
| { |
| // without autodecoding, strings are normal arrays. |
| static assert(is(ElementType!(typeof(s)) == immutable char)); |
| static assert(isRandomAccessRange!(typeof(s))); |
| static assert(hasSlicing!(typeof(s))); |
| static assert(hasLength!(typeof(s))); |
| } |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| string noel1 = "noe\u0308l"; // noël using e + combining diaeresis |
| assert(noel1.byCodeUnit[2] != 'ë'); |
| assert(noel1.byCodeUnit[2] == 'e'); |
| |
| string noel2 = "no\u00EBl"; // noël using a precomposed ë character |
| // Because string is UTF-8, the code unit at index 2 is just |
| // the first of a sequence that encodes 'ë' |
| assert(noel2.byCodeUnit[2] != 'ë'); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| import std.algorithm.comparison : equal; |
| import std.range : popFrontN; |
| import std.traits : isAutodecodableString; |
| { |
| auto range = byCodeUnit("hello world"); |
| range.popFrontN(3); |
| assert(equal(range.save, "lo world")); |
| static if (isAutodecodableString!string) // only enabled with autodecoding |
| { |
| string str = range.source; |
| assert(str == "lo world"); |
| } |
| } |
| // source only exists if the range was wrapped |
| { |
| auto range = byCodeUnit("hello world"d); |
| static assert(!__traits(compiles, range.source)); |
| } |
| } |
| |
| @safe pure nothrow unittest |
| { |
| import std.utf; |
| |
| import std.algorithm.comparison : equal; |
| |
| // hellö as a range of `char`s, which are UTF-8 |
| assert("hell\u00F6".byUTF!char().equal(['h', 'e', 'l', 'l', 0xC3, 0xB6])); |
| |
| // `wchar`s are able to hold the ö in a single element (UTF-16 code unit) |
| assert("hell\u00F6".byUTF!wchar().equal(['h', 'e', 'l', 'l', 'ö'])); |
| |
| // 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32 |
| assert("𐐷".byUTF!char().equal([0xF0, 0x90, 0x90, 0xB7])); |
| assert("𐐷".byUTF!wchar().equal([0xD801, 0xDC37])); |
| assert("𐐷".byUTF!dchar().equal([0x00010437])); |
| } |
| |
| @safe unittest |
| { |
| import std.utf; |
| |
| import std.algorithm.comparison : equal; |
| import std.exception : assertThrown; |
| |
| assert("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.yes).equal("hello\uFFFDetty")); |
| assertThrown!UTFException("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.no).equal("hello betty")); |
| } |
| |
| @safe pure nothrow unittest |
| { |
| import std.utf; |
| |
| import std.range.primitives; |
| wchar[] s = ['ă', 'î']; |
| |
| auto rc = s.byUTF!char; |
| static assert(isBidirectionalRange!(typeof(rc))); |
| assert(rc.back == 0xae); |
| rc.popBack; |
| assert(rc.back == 0xc3); |
| rc.popBack; |
| assert(rc.back == 0x83); |
| rc.popBack; |
| assert(rc.back == 0xc4); |
| |
| auto rw = s.byUTF!wchar; |
| static assert(isBidirectionalRange!(typeof(rw))); |
| assert(rw.back == 'î'); |
| rw.popBack; |
| assert(rw.back == 'ă'); |
| |
| auto rd = s.byUTF!dchar; |
| static assert(isBidirectionalRange!(typeof(rd))); |
| assert(rd.back == 'î'); |
| rd.popBack; |
| assert(rd.back == 'ă'); |
| } |
| |