blob: 8b204ab4cad5eefd7405313557e03fd5d93c1b12 [file] [log] [blame]
/**
* Contains various string related functions.
*
* Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
* Authors: Walter Bright, https://www.digitalmars.com
* License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/string.d, root/_string.d)
* Documentation: https://dlang.org/phobos/dmd_root_string.html
* Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/string.d
*/
module dmd.root.string;
/// Slices a `\0`-terminated C-string, excluding the terminator
inout(char)[] toDString (inout(char)* s) pure nothrow @nogc
{
import core.stdc.string : strlen;
return s ? s[0 .. strlen(s)] : null;
}
/**
Compare two slices for equality, in a case-insensitive way
Comparison is based on `char` and does not do decoding.
As a result, it's only really accurate for plain ASCII strings.
Params:
s1 = string to compare
s2 = string to compare
Returns:
`true` if `s1 == s2` regardless of case
*/
extern(D) static bool iequals(const(char)[] s1, const(char)[] s2) pure nothrow @nogc
{
import core.stdc.ctype : toupper;
if (s1.length != s2.length)
return false;
foreach (idx, c1; s1)
{
// Since we did a length check, it is safe to bypass bounds checking
const c2 = s2.ptr[idx];
if (c1 != c2)
if (toupper(c1) != toupper(c2))
return false;
}
return true;
}
/**
Copy the content of `src` into a C-string ('\0' terminated) then call `dg`
The intent of this function is to provide an allocation-less
way to call a C function using a D slice.
The function internally allocates a buffer if needed, but frees it on exit.
Note:
The argument to `dg` is `scope`. To keep the data around after `dg` exits,
one has to copy it.
Params:
src = Slice to use to call the C function
dg = Delegate to call afterwards
Returns:
The return value of `T`
*/
auto toCStringThen(alias dg)(const(char)[] src) nothrow
{
import dmd.root.rmem : mem;
import dmd.common.string : SmallBuffer;
const len = src.length + 1;
char[512] small = void;
auto sb = SmallBuffer!char(len, small[]);
scope ptr = sb[];
ptr[0 .. src.length] = src[];
ptr[src.length] = '\0';
return dg(ptr);
}
unittest
{
assert("Hello world".toCStringThen!((v) => v == "Hello world\0"));
assert("Hello world\0".toCStringThen!((v) => v == "Hello world\0\0"));
assert(null.toCStringThen!((v) => v == "\0"));
}
/**
* Strips one leading line terminator of the given string.
*
* The following are what the Unicode standard considers as line terminators:
*
* | Name | D Escape Sequence | Unicode Code Point |
* |---------------------|-------------------|--------------------|
* | Line feed | `\n` | `U+000A` |
* | Line tabulation | `\v` | `U+000B` |
* | Form feed | `\f` | `U+000C` |
* | Carriage return | `\r` | `U+000D` |
* | Next line | | `U+0085` |
* | Line separator | | `U+2028` |
* | Paragraph separator | | `U+2029` |
*
* This function will also strip `\r\n`.
*/
string stripLeadingLineTerminator(string str) pure nothrow @nogc @safe
{
enum nextLine = "\xC2\x85";
enum lineSeparator = "\xE2\x80\xA8";
enum paragraphSeparator = "\xE2\x80\xA9";
static assert(lineSeparator.length == paragraphSeparator.length);
if (str.length == 0)
return str;
switch (str[0])
{
case '\r':
{
if (str.length >= 2 && str[1] == '\n')
return str[2 .. $];
goto case;
}
case '\v', '\f', '\n': return str[1 .. $];
case nextLine[0]:
{
if (str.length >= 2 && str[0 .. 2] == nextLine)
return str[2 .. $];
return str;
}
case lineSeparator[0]:
{
if (str.length >= lineSeparator.length)
{
const prefix = str[0 .. lineSeparator.length];
if (prefix == lineSeparator || prefix == paragraphSeparator)
return str[lineSeparator.length .. $];
}
return str;
}
default: return str;
}
}
unittest
{
assert("".stripLeadingLineTerminator == "");
assert("foo".stripLeadingLineTerminator == "foo");
assert("\xC2foo".stripLeadingLineTerminator == "\xC2foo");
assert("\xE2foo".stripLeadingLineTerminator == "\xE2foo");
assert("\nfoo".stripLeadingLineTerminator == "foo");
assert("\vfoo".stripLeadingLineTerminator == "foo");
assert("\ffoo".stripLeadingLineTerminator == "foo");
assert("\rfoo".stripLeadingLineTerminator == "foo");
assert("\u0085foo".stripLeadingLineTerminator == "foo");
assert("\u2028foo".stripLeadingLineTerminator == "foo");
assert("\u2029foo".stripLeadingLineTerminator == "foo");
assert("\n\rfoo".stripLeadingLineTerminator == "\rfoo");
assert("\r\nfoo".stripLeadingLineTerminator == "foo");
}
/**
* A string comparison functions that returns the same result as strcmp
*
* Note: Strings are compared based on their ASCII values, no UTF-8 decoding.
*
* Some C functions (e.g. `qsort`) require a `int` result for comparison.
* See_Also: Druntime's `core.internal.string`
*/
int dstrcmp()( scope const char[] s1, scope const char[] s2 ) @trusted
{
immutable len = s1.length <= s2.length ? s1.length : s2.length;
if (__ctfe)
{
foreach (const u; 0 .. len)
{
if (s1[u] != s2[u])
return s1[u] > s2[u] ? 1 : -1;
}
}
else
{
import core.stdc.string : memcmp;
const ret = memcmp( s1.ptr, s2.ptr, len );
if ( ret )
return ret;
}
return s1.length < s2.length ? -1 : (s1.length > s2.length);
}
//
unittest
{
assert(dstrcmp("Fraise", "Fraise") == 0);
assert(dstrcmp("Baguette", "Croissant") < 0);
assert(dstrcmp("Croissant", "Baguette") > 0);
static assert(dstrcmp("Baguette", "Croissant") < 0);
// UTF-8 decoding for the CT variant
assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
static assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
}
/**
* Infers the length `N` of a string literal and coerces its type to a static
* array with length `N + 1`. Returns the string with a null character appended
* to the end.
*
* Params:
* literal = string literal
*
* Notes:
* - LDC produces quite optimal code for short strings:
* - https://d.godbolt.org/z/M69Z1g
* - https://gist.github.com/PetarKirov/338e4ab9292b6b2b311a3070572a07fb (backup URL)
*/
char[N + 1] toStaticArray(size_t N)(scope const(char)[N] literal)
{
char[N+1] result = void;
result[0..N] = literal[0..N];
result[N] = 0;
return result;
}
///
@safe pure nothrow @nogc
unittest
{
auto m = "123".toStaticArray;
const c = "123".toStaticArray;
immutable i = "123".toStaticArray;
enum e = "123".toStaticArray;
assert(m == "123\0");
assert(c == "123\0");
assert(i == "123\0");
static assert(e == "123\0");
const empty = "".toStaticArray;
static assert(empty.length == 1);
static assert(empty[0] == '\0');
}
/**
* Checks if C string `p` starts with `needle`.
* Params:
* p = the C string to check
* needle = the string to look for
* Returns:
* `true` if `p` starts with `needle`
*/
@system pure nothrow @nogc
bool startsWith(scope const(char)* p, scope const(char)[] needle)
in { assert(p && needle.ptr); }
do
{
foreach (const c; needle)
{
assert(c);
if (c != *p)
return false;
++p;
}
return true;
}
///
@system pure nothrow @nogc
unittest
{
const buf = "123".toStaticArray;
const ptr = &buf[0];
assert(ptr.startsWith(""));
assert(ptr.startsWith("1"));
assert(ptr.startsWith("12"));
assert(ptr.startsWith("123"));
assert(!ptr.startsWith("1234"));
}