blob: e758d488ae018e93735b308d119934470fff6129 [file] [log] [blame]
@safe unittest
{
import std.utf;
import std.exception : assertThrown;
char[4] buf;
assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));
assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF));
assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00));
assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF));
assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));
}
@safe @nogc pure nothrow unittest
{
import std.utf;
assert( isValidDchar(cast(dchar) 0x41));
assert( isValidDchar(cast(dchar) 0x00));
assert(!isValidDchar(cast(dchar) 0xD800));
assert(!isValidDchar(cast(dchar) 0x11FFFF));
}
@safe pure nothrow unittest
{
import std.utf;
assert( isValidCodepoint(cast(char) 0x40));
assert(!isValidCodepoint(cast(char) 0x80));
assert( isValidCodepoint(cast(wchar) 0x1234));
assert(!isValidCodepoint(cast(wchar) 0xD800));
assert( isValidCodepoint(cast(dchar) 0x0010FFFF));
assert(!isValidCodepoint(cast(dchar) 0x12345678));
}
@safe unittest
{
import std.utf;
assert("a".stride == 1);
assert("λ".stride == 2);
assert("aλ".stride == 1);
assert("aλ".stride(1) == 2);
assert("𐐷".stride == 4);
}
@safe unittest
{
import std.utf;
assert("a".strideBack == 1);
assert("λ".strideBack == 2);
assert("aλ".strideBack == 2);
assert("aλ".strideBack(1) == 1);
assert("𐐷".strideBack == 4);
}
@safe unittest
{
import std.utf;
assert(toUCSindex(`hello world`, 7) == 7);
assert(toUCSindex(`hello world`w, 7) == 7);
assert(toUCSindex(`hello world`d, 7) == 7);
assert(toUCSindex(`Ma Chérie`, 7) == 6);
assert(toUCSindex(`Ma Chérie`w, 7) == 7);
assert(toUCSindex(`Ma Chérie`d, 7) == 7);
assert(toUCSindex(`さいごの果実 / ミツバチと科学者`, 9) == 3);
assert(toUCSindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9);
assert(toUCSindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9);
}
@safe unittest
{
import std.utf;
assert(toUTFindex(`hello world`, 7) == 7);
assert(toUTFindex(`hello world`w, 7) == 7);
assert(toUTFindex(`hello world`d, 7) == 7);
assert(toUTFindex(`Ma Chérie`, 6) == 7);
assert(toUTFindex(`Ma Chérie`w, 7) == 7);
assert(toUTFindex(`Ma Chérie`d, 7) == 7);
assert(toUTFindex(`さいごの果実 / ミツバチと科学者`, 3) == 9);
assert(toUTFindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9);
assert(toUTFindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9);
}
@safe pure unittest
{
import std.utf;
size_t i;
assert("a".decode(i) == 'a' && i == 1);
i = 0;
assert("å".decode(i) == 'å' && i == 2);
i = 1;
assert("aå".decode(i) == 'å' && i == 3);
i = 0;
assert("å"w.decode(i) == 'å' && i == 1);
// ë as a multi-code point grapheme
i = 0;
assert("e\u0308".decode(i) == 'e' && i == 1);
// ë as a single code point grapheme
i = 0;
assert("ë".decode(i) == 'ë' && i == 2);
i = 0;
assert("ë"w.decode(i) == 'ë' && i == 1);
}
@safe pure unittest
{
import std.utf;
import std.range.primitives;
string str = "Hello, World!";
assert(str.decodeFront == 'H' && str == "ello, World!");
str = "å";
assert(str.decodeFront == 'å' && str.empty);
str = "å";
size_t i;
assert(str.decodeFront(i) == 'å' && i == 2 && str.empty);
}
@system pure unittest
{
import std.utf;
import std.range.primitives;
string str = "Hello, World!";
assert(str.decodeBack == '!' && str == "Hello, World");
str = "å";
assert(str.decodeBack == 'å' && str.empty);
str = "å";
size_t i;
assert(str.decodeBack(i) == 'å' && i == 2 && str.empty);
}
@safe unittest
{
import std.utf;
import std.exception : assertThrown;
import std.typecons : Yes;
char[4] buf;
assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000");
assert(encode(buf, '\u007F') == 1 && buf[0 .. 1] == "\u007F");
assert(encode(buf, '\u0080') == 2 && buf[0 .. 2] == "\u0080");
assert(encode(buf, '\uE000') == 3 && buf[0 .. 3] == "\uE000");
assert(encode(buf, 0xFFFE) == 3 && buf[0 .. 3] == "\xEF\xBF\xBE");
assertThrown!UTFException(encode(buf, cast(dchar) 0x110000));
encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
auto slice = buf[];
assert(slice.decodeFront == replacementDchar);
}
@safe unittest
{
import std.utf;
import std.exception : assertThrown;
import std.typecons : Yes;
wchar[2] buf;
assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000");
assert(encode(buf, '\uD7FF') == 1 && buf[0 .. 1] == "\uD7FF");
assert(encode(buf, '\uE000') == 1 && buf[0 .. 1] == "\uE000");
assert(encode(buf, '\U00010000') == 2 && buf[0 .. 2] == "\U00010000");
assert(encode(buf, '\U0010FFFF') == 2 && buf[0 .. 2] == "\U0010FFFF");
assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));
encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
auto slice = buf[];
assert(slice.decodeFront == replacementDchar);
}
@safe unittest
{
import std.utf;
import std.exception : assertThrown;
import std.typecons : Yes;
dchar[1] buf;
assert(encode(buf, '\u0000') == 1 && buf[0] == '\u0000');
assert(encode(buf, '\uD7FF') == 1 && buf[0] == '\uD7FF');
assert(encode(buf, '\uE000') == 1 && buf[0] == '\uE000');
assert(encode(buf, '\U0010FFFF') == 1 && buf[0] == '\U0010FFFF');
assertThrown!UTFException(encode(buf, cast(dchar) 0xD800));
encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000);
assert(buf[0] == replacementDchar);
}
@safe unittest
{
import std.utf;
char[] s = "abcd".dup;
dchar d1 = 'a';
dchar d2 = 'ø';
encode(s, d1);
assert(s.length == 5);
assert(s == "abcda");
encode(s, d2);
assert(s.length == 7);
assert(s == "abcdaø");
}
@safe pure nothrow @nogc unittest
{
import std.utf;
assert(codeLength!char('a') == 1);
assert(codeLength!wchar('a') == 1);
assert(codeLength!dchar('a') == 1);
assert(codeLength!char('\U0010FFFF') == 4);
assert(codeLength!wchar('\U0010FFFF') == 2);
assert(codeLength!dchar('\U0010FFFF') == 1);
}
@safe unittest
{
import std.utf;
assert(codeLength!char("hello world") ==
"hello world".length);
assert(codeLength!wchar("hello world") ==
"hello world"w.length);
assert(codeLength!dchar("hello world") ==
"hello world"d.length);
assert(codeLength!char(`プログラミング`) ==
`プログラミング`.length);
assert(codeLength!wchar(`プログラミング`) ==
`プログラミング`w.length);
assert(codeLength!dchar(`プログラミング`) ==
`プログラミング`d.length);
string haystack = `Être sans la verité, ça, ce ne serait pas bien.`;
wstring needle = `Être sans la verité`;
assert(haystack[codeLength!char(needle) .. $] ==
`, ça, ce ne serait pas bien.`);
}
@safe unittest
{
import std.utf;
import std.exception : assertThrown;
char[] a = [167, 133, 175];
assertThrown!UTFException(validate(a));
}
@safe pure unittest
{
import std.utf;
import std.algorithm.comparison : equal;
// The ö is represented by two UTF-8 code units
assert("Hellø"w.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8]));
// 𐐷 is four code units in UTF-8
assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7]));
}
@safe pure unittest
{
import std.utf;
import std.algorithm.comparison : equal;
// these graphemes are two code units in UTF-16 and one in UTF-32
assert("𤭢"d.length == 1);
assert("𐐷"d.length == 1);
assert("𤭢"d.toUTF16.equal([0xD852, 0xDF62]));
assert("𐐷"d.toUTF16.equal([0xD801, 0xDC37]));
}
@safe pure unittest
{
import std.utf;
import std.algorithm.comparison : equal;
// these graphemes are two code units in UTF-16 and one in UTF-32
assert("𤭢"w.length == 2);
assert("𐐷"w.length == 2);
assert("𤭢"w.toUTF32.equal([0x00024B62]));
assert("𐐷"w.toUTF32.equal([0x00010437]));
}
@safe pure unittest
{
import std.utf;
auto p1 = toUTFz!(char*)("hello world");
auto p2 = toUTFz!(const(char)*)("hello world");
auto p3 = toUTFz!(immutable(char)*)("hello world");
auto p4 = toUTFz!(char*)("hello world"d);
auto p5 = toUTFz!(const(wchar)*)("hello world");
auto p6 = toUTFz!(immutable(dchar)*)("hello world"w);
}
@system unittest
{
import std.utf;
string str = "Hello, World!";
const(wchar)* p = str.toUTF16z;
assert(p[str.length] == '\0');
}
@safe pure nothrow @nogc unittest
{
import std.utf;
assert(count("") == 0);
assert(count("a") == 1);
assert(count("abc") == 3);
assert(count("\u20AC100") == 4);
}
@safe unittest
{
import std.utf;
import std.range.primitives;
import std.traits : isAutodecodableString;
auto r = "Hello, World!".byCodeUnit();
static assert(hasLength!(typeof(r)));
static assert(hasSlicing!(typeof(r)));
static assert(isRandomAccessRange!(typeof(r)));
static assert(is(ElementType!(typeof(r)) == immutable char));
// contrast with the range capabilities of standard strings (with or
// without autodecoding enabled).
auto s = "Hello, World!";
static assert(isBidirectionalRange!(typeof(r)));
static if (isAutodecodableString!(typeof(s)))
{
// with autodecoding enabled, strings are non-random-access ranges of
// dchar.
static assert(is(ElementType!(typeof(s)) == dchar));
static assert(!isRandomAccessRange!(typeof(s)));
static assert(!hasSlicing!(typeof(s)));
static assert(!hasLength!(typeof(s)));
}
else
{
// without autodecoding, strings are normal arrays.
static assert(is(ElementType!(typeof(s)) == immutable char));
static assert(isRandomAccessRange!(typeof(s)));
static assert(hasSlicing!(typeof(s)));
static assert(hasLength!(typeof(s)));
}
}
@safe unittest
{
import std.utf;
string noel1 = "noe\u0308l"; // noël using e + combining diaeresis
assert(noel1.byCodeUnit[2] != 'ë');
assert(noel1.byCodeUnit[2] == 'e');
string noel2 = "no\u00EBl"; // noël using a precomposed ë character
// Because string is UTF-8, the code unit at index 2 is just
// the first of a sequence that encodes 'ë'
assert(noel2.byCodeUnit[2] != 'ë');
}
@safe unittest
{
import std.utf;
import std.algorithm.comparison : equal;
import std.range : popFrontN;
import std.traits : isAutodecodableString;
{
auto range = byCodeUnit("hello world");
range.popFrontN(3);
assert(equal(range.save, "lo world"));
static if (isAutodecodableString!string) // only enabled with autodecoding
{
string str = range.source;
assert(str == "lo world");
}
}
// source only exists if the range was wrapped
{
auto range = byCodeUnit("hello world"d);
static assert(!__traits(compiles, range.source));
}
}
@safe pure nothrow unittest
{
import std.utf;
import std.algorithm.comparison : equal;
// hellö as a range of `char`s, which are UTF-8
assert("hell\u00F6".byUTF!char().equal(['h', 'e', 'l', 'l', 0xC3, 0xB6]));
// `wchar`s are able to hold the ö in a single element (UTF-16 code unit)
assert("hell\u00F6".byUTF!wchar().equal(['h', 'e', 'l', 'l', 'ö']));
// 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32
assert("𐐷".byUTF!char().equal([0xF0, 0x90, 0x90, 0xB7]));
assert("𐐷".byUTF!wchar().equal([0xD801, 0xDC37]));
assert("𐐷".byUTF!dchar().equal([0x00010437]));
}
@safe unittest
{
import std.utf;
import std.algorithm.comparison : equal;
import std.exception : assertThrown;
assert("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.yes).equal("hello\uFFFDetty"));
assertThrown!UTFException("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.no).equal("hello betty"));
}
@safe pure nothrow unittest
{
import std.utf;
import std.range.primitives;
wchar[] s = ['ă', 'î'];
auto rc = s.byUTF!char;
static assert(isBidirectionalRange!(typeof(rc)));
assert(rc.back == 0xae);
rc.popBack;
assert(rc.back == 0xc3);
rc.popBack;
assert(rc.back == 0x83);
rc.popBack;
assert(rc.back == 0xc4);
auto rw = s.byUTF!wchar;
static assert(isBidirectionalRange!(typeof(rw)));
assert(rw.back == 'î');
rw.popBack;
assert(rw.back == 'ă');
auto rd = s.byUTF!dchar;
static assert(isBidirectionalRange!(typeof(rd)));
assert(rd.back == 'î');
rd.popBack;
assert(rd.back == 'ă');
}