gcc/d/dmd/root/string.d - gcc - Git at Google

 /**
  * Contains various string related functions.
  *
  * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
  * Authors:   Walter Bright, https://www.digitalmars.com
  * License:   $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
  * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/string.d, root/_string.d)
  * Documentation:  https://dlang.org/phobos/dmd_root_string.html
  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/string.d
  */
 module dmd.root.string;

 /// Slices a `\0`-terminated C-string, excluding the terminator
 inout(char)[] toDString (inout(char)* s) pure nothrow @nogc
 {
     import core.stdc.string : strlen;
     return s ? s[0 .. strlen(s)] : null;
 }

 /**
 Compare two slices for equality, in a case-insensitive way

 Comparison is based on `char` and does not do decoding.
 As a result, it's only really accurate for plain ASCII strings.

 Params:
 s1 = string to compare
 s2 = string to compare

 Returns:
 `true` if `s1 == s2` regardless of case
 */
 extern(D) static bool iequals(const(char)[] s1, const(char)[] s2) pure nothrow @nogc
 {
     import core.stdc.ctype : toupper;

     if (s1.length != s2.length)
         return false;

     foreach (idx, c1; s1)
     {
         // Since we did a length check, it is safe to bypass bounds checking
         const c2 = s2.ptr[idx];
         if (c1 != c2)
             if (toupper(c1) != toupper(c2))
                 return false;
     }
     return true;
 }

 /**
 Copy the content of `src` into a C-string ('\0' terminated) then call `dg`

 The intent of this function is to provide an allocation-less
 way to call a C function using a D slice.
 The function internally allocates a buffer if needed, but frees it on exit.

 Note:
 The argument to `dg` is `scope`. To keep the data around after `dg` exits,
 one has to copy it.

 Params:
 src = Slice to use to call the C function
 dg  = Delegate to call afterwards

 Returns:
 The return value of `T`
 */
 auto toCStringThen(alias dg)(const(char)[] src) nothrow
 {
     import dmd.root.rmem : mem;
     import dmd.common.string : SmallBuffer;

     const len = src.length + 1;
     char[512] small = void;
     auto sb = SmallBuffer!char(len, small[]);
     scope ptr = sb[];
     ptr[0 .. src.length] = src[];
     ptr[src.length] = '\0';
     return dg(ptr);
 }

 unittest
 {
     assert("Hello world".toCStringThen!((v) => v == "Hello world\0"));
     assert("Hello world\0".toCStringThen!((v) => v == "Hello world\0\0"));
     assert(null.toCStringThen!((v) => v == "\0"));
 }

 /**
  * Strips one leading line terminator of the given string.
  *
  * The following are what the Unicode standard considers as line terminators:
  *
  * | Name                | D Escape Sequence | Unicode Code Point |
  * |---------------------|-------------------|--------------------|
  * | Line feed           | `\n`              | `U+000A`           |
  * | Line tabulation     | `\v`              | `U+000B`           |
  * | Form feed           | `\f`              | `U+000C`           |
  * | Carriage return     | `\r`              | `U+000D`           |
  * | Next line           |                   | `U+0085`           |
  * | Line separator      |                   | `U+2028`           |
  * | Paragraph separator |                   | `U+2029`           |
  *
  * This function will also strip `\r\n`.
  */
 string stripLeadingLineTerminator(string str) pure nothrow @nogc @safe
 {
     enum nextLine = "\xC2\x85";
     enum lineSeparator = "\xE2\x80\xA8";
     enum paragraphSeparator = "\xE2\x80\xA9";

     static assert(lineSeparator.length == paragraphSeparator.length);

     if (str.length == 0)
         return str;

     switch (str[0])
     {
         case '\r':
         {
             if (str.length >= 2 && str[1] == '\n')
                 return str[2 .. $];
             goto case;
         }
         case '\v', '\f', '\n': return str[1 .. $];

         case nextLine[0]:
         {
             if (str.length >= 2 && str[0 .. 2] == nextLine)
                 return str[2 .. $];

             return str;
         }

         case lineSeparator[0]:
         {
             if (str.length >= lineSeparator.length)
             {
                 const prefix = str[0 .. lineSeparator.length];

                 if (prefix == lineSeparator || prefix == paragraphSeparator)
                     return str[lineSeparator.length .. $];
             }

             return str;
         }

         default: return str;
     }
 }

 unittest
 {
     assert("".stripLeadingLineTerminator == "");
     assert("foo".stripLeadingLineTerminator == "foo");
     assert("\xC2foo".stripLeadingLineTerminator == "\xC2foo");
     assert("\xE2foo".stripLeadingLineTerminator == "\xE2foo");
     assert("\nfoo".stripLeadingLineTerminator == "foo");
     assert("\vfoo".stripLeadingLineTerminator == "foo");
     assert("\ffoo".stripLeadingLineTerminator == "foo");
     assert("\rfoo".stripLeadingLineTerminator == "foo");
     assert("\u0085foo".stripLeadingLineTerminator == "foo");
     assert("\u2028foo".stripLeadingLineTerminator == "foo");
     assert("\u2029foo".stripLeadingLineTerminator == "foo");
     assert("\n\rfoo".stripLeadingLineTerminator == "\rfoo");
     assert("\r\nfoo".stripLeadingLineTerminator == "foo");
 }

 /**
  * A string comparison functions that returns the same result as strcmp
  *
  * Note: Strings are compared based on their ASCII values, no UTF-8 decoding.
  *
  * Some C functions (e.g. `qsort`) require a `int` result for comparison.
  * See_Also: Druntime's `core.internal.string`
  */
 int dstrcmp()( scope const char[] s1, scope const char[] s2 ) @trusted
 {
     immutable len = s1.length <= s2.length ? s1.length : s2.length;
     if (__ctfe)
     {
         foreach (const u; 0 .. len)
         {
             if (s1[u] != s2[u])
                 return s1[u] > s2[u] ? 1 : -1;
         }
     }
     else
     {
         import core.stdc.string : memcmp;

         const ret = memcmp( s1.ptr, s2.ptr, len );
         if ( ret )
             return ret;
     }
     return s1.length < s2.length ? -1 : (s1.length > s2.length);
 }

 //
 unittest
 {
     assert(dstrcmp("Fraise", "Fraise")      == 0);
     assert(dstrcmp("Baguette", "Croissant") < 0);
     assert(dstrcmp("Croissant", "Baguette") > 0);

     static assert(dstrcmp("Baguette", "Croissant") < 0);

     // UTF-8 decoding for the CT variant
     assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
     static assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
 }

 /**
  * Infers the length `N` of a string literal and coerces its type to a static
  * array with length `N + 1`. Returns the string with a null character appended
  * to the end.
  *
  * Params:
  *  literal = string literal
  *
  * Notes:
  *  - LDC produces quite optimal code for short strings:
  *    - https://d.godbolt.org/z/M69Z1g
  *    - https://gist.github.com/PetarKirov/338e4ab9292b6b2b311a3070572a07fb (backup URL)
 */
 char[N + 1] toStaticArray(size_t N)(scope const(char)[N] literal)
 {
     char[N+1] result = void;
     result[0..N] = literal[0..N];
     result[N] = 0;
     return result;
 }

 ///
 @safe pure nothrow @nogc
 unittest
 {
     auto m = "123".toStaticArray;
     const c = "123".toStaticArray;
     immutable i = "123".toStaticArray;
     enum e = "123".toStaticArray;

     assert(m == "123\0");
     assert(c == "123\0");
     assert(i == "123\0");
     static assert(e == "123\0");

     const empty = "".toStaticArray;
     static assert(empty.length == 1);
     static assert(empty[0] == '\0');
 }

 /**
  * Checks if C string `p` starts with `needle`.
  * Params:
  *     p = the C string to check
  *     needle = the string to look for
  * Returns:
  *    `true` if `p` starts with `needle`
  */
 @system pure nothrow @nogc
 bool startsWith(scope const(char)* p, scope const(char)[] needle)
 in { assert(p && needle.ptr); }
 do
 {
     foreach (const c; needle)
     {
         assert(c);
         if (c != *p)
             return false;
         ++p;
     }
     return true;
 }

 ///
 @system pure nothrow @nogc
 unittest
 {
     const buf = "123".toStaticArray;
     const ptr = &buf[0];
     assert(ptr.startsWith(""));
     assert(ptr.startsWith("1"));
     assert(ptr.startsWith("12"));
     assert(ptr.startsWith("123"));
     assert(!ptr.startsWith("1234"));
 }
	/**
	* Contains various string related functions.
	*
	* Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
	* Authors: Walter Bright, https://www.digitalmars.com
	* License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
	* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/string.d, root/_string.d)
	* Documentation: https://dlang.org/phobos/dmd_root_string.html
	* Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/string.d
	*/
	module dmd.root.string;

	/// Slices a `\0`-terminated C-string, excluding the terminator
	inout(char)[] toDString (inout(char)* s) pure nothrow @nogc
	{
	import core.stdc.string : strlen;
	return s ? s[0 .. strlen(s)] : null;
	}

	/**
	Compare two slices for equality, in a case-insensitive way

	Comparison is based on `char` and does not do decoding.
	As a result, it's only really accurate for plain ASCII strings.

	Params:
	s1 = string to compare
	s2 = string to compare

	Returns:
	`true` if `s1 == s2` regardless of case
	*/
	extern(D) static bool iequals(const(char)[] s1, const(char)[] s2) pure nothrow @nogc
	{
	import core.stdc.ctype : toupper;

	if (s1.length != s2.length)
	return false;

	foreach (idx, c1; s1)
	{
	// Since we did a length check, it is safe to bypass bounds checking
	const c2 = s2.ptr[idx];
	if (c1 != c2)
	if (toupper(c1) != toupper(c2))
	return false;
	}
	return true;
	}

	/**
	Copy the content of `src` into a C-string ('\0' terminated) then call `dg`

	The intent of this function is to provide an allocation-less
	way to call a C function using a D slice.
	The function internally allocates a buffer if needed, but frees it on exit.

	Note:
	The argument to `dg` is `scope`. To keep the data around after `dg` exits,
	one has to copy it.

	Params:
	src = Slice to use to call the C function
	dg = Delegate to call afterwards

	Returns:
	The return value of `T`
	*/
	auto toCStringThen(alias dg)(const(char)[] src) nothrow
	{
	import dmd.root.rmem : mem;
	import dmd.common.string : SmallBuffer;

	const len = src.length + 1;
	char[512] small = void;
	auto sb = SmallBuffer!char(len, small[]);
	scope ptr = sb[];
	ptr[0 .. src.length] = src[];
	ptr[src.length] = '\0';
	return dg(ptr);
	}

	unittest
	{
	assert("Hello world".toCStringThen!((v) => v == "Hello world\0"));
	assert("Hello world\0".toCStringThen!((v) => v == "Hello world\0\0"));
	assert(null.toCStringThen!((v) => v == "\0"));
	}

	/**
	* Strips one leading line terminator of the given string.
	*
	* The following are what the Unicode standard considers as line terminators:
	*
	* \| Name \| D Escape Sequence \| Unicode Code Point \|
	* \|---------------------\|-------------------\|--------------------\|
	* \| Line feed \| `\n` \| `U+000A` \|
	* \| Line tabulation \| `\v` \| `U+000B` \|
	* \| Form feed \| `\f` \| `U+000C` \|
	* \| Carriage return \| `\r` \| `U+000D` \|
	* \| Next line \| \| `U+0085` \|
	* \| Line separator \| \| `U+2028` \|
	* \| Paragraph separator \| \| `U+2029` \|
	*
	* This function will also strip `\r\n`.
	*/
	string stripLeadingLineTerminator(string str) pure nothrow @nogc @safe
	{
	enum nextLine = "\xC2\x85";
	enum lineSeparator = "\xE2\x80\xA8";
	enum paragraphSeparator = "\xE2\x80\xA9";

	static assert(lineSeparator.length == paragraphSeparator.length);

	if (str.length == 0)
	return str;

	switch (str[0])
	{
	case '\r':
	{
	if (str.length >= 2 && str[1] == '\n')
	return str[2 .. $];
	goto case;
	}
	case '\v', '\f', '\n': return str[1 .. $];

	case nextLine[0]:
	{
	if (str.length >= 2 && str[0 .. 2] == nextLine)
	return str[2 .. $];

	return str;
	}

	case lineSeparator[0]:
	{
	if (str.length >= lineSeparator.length)
	{
	const prefix = str[0 .. lineSeparator.length];

	if (prefix == lineSeparator \|\| prefix == paragraphSeparator)
	return str[lineSeparator.length .. $];
	}

	return str;
	}

	default: return str;
	}
	}

	unittest
	{
	assert("".stripLeadingLineTerminator == "");
	assert("foo".stripLeadingLineTerminator == "foo");
	assert("\xC2foo".stripLeadingLineTerminator == "\xC2foo");
	assert("\xE2foo".stripLeadingLineTerminator == "\xE2foo");
	assert("\nfoo".stripLeadingLineTerminator == "foo");
	assert("\vfoo".stripLeadingLineTerminator == "foo");
	assert("\ffoo".stripLeadingLineTerminator == "foo");
	assert("\rfoo".stripLeadingLineTerminator == "foo");
	assert("\u0085foo".stripLeadingLineTerminator == "foo");
	assert("\u2028foo".stripLeadingLineTerminator == "foo");
	assert("\u2029foo".stripLeadingLineTerminator == "foo");
	assert("\n\rfoo".stripLeadingLineTerminator == "\rfoo");
	assert("\r\nfoo".stripLeadingLineTerminator == "foo");
	}

	/**
	* A string comparison functions that returns the same result as strcmp
	*
	* Note: Strings are compared based on their ASCII values, no UTF-8 decoding.
	*
	* Some C functions (e.g. `qsort`) require a `int` result for comparison.
	* See_Also: Druntime's `core.internal.string`
	*/
	int dstrcmp()( scope const char[] s1, scope const char[] s2 ) @trusted
	{
	immutable len = s1.length <= s2.length ? s1.length : s2.length;
	if (__ctfe)
	{
	foreach (const u; 0 .. len)
	{
	if (s1[u] != s2[u])
	return s1[u] > s2[u] ? 1 : -1;
	}
	}
	else
	{
	import core.stdc.string : memcmp;

	const ret = memcmp( s1.ptr, s2.ptr, len );
	if ( ret )
	return ret;
	}
	return s1.length < s2.length ? -1 : (s1.length > s2.length);
	}

	//
	unittest
	{
	assert(dstrcmp("Fraise", "Fraise") == 0);
	assert(dstrcmp("Baguette", "Croissant") < 0);
	assert(dstrcmp("Croissant", "Baguette") > 0);

	static assert(dstrcmp("Baguette", "Croissant") < 0);

	// UTF-8 decoding for the CT variant
	assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
	static assert(dstrcmp("안녕하세요!", "안녕하세요!") == 0);
	}

	/**
	* Infers the length `N` of a string literal and coerces its type to a static
	* array with length `N + 1`. Returns the string with a null character appended
	* to the end.
	*
	* Params:
	* literal = string literal
	*
	* Notes:
	* - LDC produces quite optimal code for short strings:
	* - https://d.godbolt.org/z/M69Z1g
	* - https://gist.github.com/PetarKirov/338e4ab9292b6b2b311a3070572a07fb (backup URL)
	*/
	char[N + 1] toStaticArray(size_t N)(scope const(char)[N] literal)
	{
	char[N+1] result = void;
	result[0..N] = literal[0..N];
	result[N] = 0;
	return result;
	}

	///
	@safe pure nothrow @nogc
	unittest
	{
	auto m = "123".toStaticArray;
	const c = "123".toStaticArray;
	immutable i = "123".toStaticArray;
	enum e = "123".toStaticArray;

	assert(m == "123\0");
	assert(c == "123\0");
	assert(i == "123\0");
	static assert(e == "123\0");

	const empty = "".toStaticArray;
	static assert(empty.length == 1);
	static assert(empty[0] == '\0');
	}

	/**
	* Checks if C string `p` starts with `needle`.
	* Params:
	* p = the C string to check
	* needle = the string to look for
	* Returns:
	* `true` if `p` starts with `needle`
	*/
	@system pure nothrow @nogc
	bool startsWith(scope const(char)* p, scope const(char)[] needle)
	in { assert(p && needle.ptr); }
	do
	{
	foreach (const c; needle)
	{
	assert(c);
	if (c != *p)
	return false;
	++p;
	}
	return true;
	}

	///
	@system pure nothrow @nogc
	unittest
	{
	const buf = "123".toStaticArray;
	const ptr = &buf[0];
	assert(ptr.startsWith(""));
	assert(ptr.startsWith("1"));
	assert(ptr.startsWith("12"));
	assert(ptr.startsWith("123"));
	assert(!ptr.startsWith("1234"));
	}