blob: b430114e516f7e5de693283be7aee0e8752db946 [file] [log] [blame]
// Written in the D programming language.
/++
Functions which operate on ASCII characters.
All of the functions in std._ascii accept Unicode characters but
effectively ignore them if they're not ASCII. All $(D isX) functions return
$(D false) for non-ASCII characters, and all $(D toX) functions do nothing
to non-ASCII characters.
For functions which operate on Unicode characters, see
$(MREF std, uni).
$(SCRIPT inhibitQuickIndex = 1;)
$(DIVC quickindex,
$(BOOKTABLE,
$(TR $(TH Category) $(TH Functions))
$(TR $(TD Validation) $(TD
$(LREF isAlpha)
$(LREF isAlphaNum)
$(LREF isASCII)
$(LREF isControl)
$(LREF isDigit)
$(LREF isGraphical)
$(LREF isHexDigit)
$(LREF isOctalDigit)
$(LREF isPrintable)
$(LREF isPunctuation)
$(LREF isUpper)
$(LREF isWhite)
))
$(TR $(TD Conversions) $(TD
$(LREF toLower)
$(LREF toUpper)
))
$(TR $(TD Constants) $(TD
$(LREF digits)
$(LREF fullHexDigits)
$(LREF hexDigits)
$(LREF letters)
$(LREF lowercase)
$(LREF lowerHexDigits)
$(LREF newline)
$(LREF octalDigits)
$(LREF uppercase)
$(LREF whitespace)
))
$(TR $(TD Enums) $(TD
$(LREF LetterCase)
))
))
References:
$(LINK2 http://www.digitalmars.com/d/ascii-table.html, ASCII Table),
$(HTTP en.wikipedia.org/wiki/Ascii, Wikipedia)
License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
Authors: $(HTTP digitalmars.com, Walter Bright) and Jonathan M Davis
Source: $(PHOBOSSRC std/_ascii.d)
+/
module std.ascii;
version (unittest)
{
// FIXME: When dmd bug #314 is fixed, make these selective.
import std.meta; // : AliasSeq;
import std.range; // : chain;
import std.traits; // : functionAttributes, FunctionAttribute, isSafe;
}
immutable fullHexDigits = "0123456789ABCDEFabcdef"; /// 0 .. 9A .. Fa .. f
immutable hexDigits = fullHexDigits[0 .. 16]; /// 0 .. 9A .. F
immutable lowerHexDigits = "0123456789abcdef"; /// 0 .. 9a .. f
immutable digits = hexDigits[0 .. 10]; /// 0 .. 9
immutable octalDigits = digits[0 .. 8]; /// 0 .. 7
immutable letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; /// A .. Za .. z
immutable uppercase = letters[0 .. 26]; /// A .. Z
immutable lowercase = letters[26 .. 52]; /// a .. z
immutable whitespace = " \t\v\r\n\f"; /// ASCII _whitespace
/++
Letter case specifier.
+/
enum LetterCase : bool
{
upper, /// Upper case letters
lower /// Lower case letters
}
///
@safe unittest
{
import std.conv : to;
assert(42.to!string(16, LetterCase.upper) == "2A");
assert(42.to!string(16, LetterCase.lower) == "2a");
}
///
@system unittest
{
import std.digest.hmac : hmac;
import std.digest.digest : toHexString;
import std.digest.sha : SHA1;
import std.string : representation;
const sha1HMAC = "A very long phrase".representation
.hmac!SHA1("secret".representation)
.toHexString!(LetterCase.lower);
assert(sha1HMAC == "49f2073c7bf58577e8c9ae59fe8cfd37c9ab94e5");
}
/// Newline sequence for this system.
version (Windows)
immutable newline = "\r\n";
else version (Posix)
immutable newline = "\n";
else
static assert(0, "Unsupported OS");
/++
Params: c = The character to test.
Returns: Whether $(D c) is a letter or a number (0 .. 9, a .. z, A .. Z).
+/
bool isAlphaNum(dchar c) @safe pure nothrow @nogc
{
return c <= 'z' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'Z'));
}
///
@safe pure nothrow @nogc unittest
{
assert( isAlphaNum('A'));
assert( isAlphaNum('1'));
assert(!isAlphaNum('#'));
// N.B.: does not return true for non-ASCII Unicode alphanumerics:
assert(!isAlphaNum('á'));
}
@safe unittest
{
foreach (c; chain(digits, octalDigits, fullHexDigits, letters, lowercase, uppercase))
assert(isAlphaNum(c));
foreach (c; whitespace)
assert(!isAlphaNum(c));
}
/++
Params: c = The character to test.
Returns: Whether $(D c) is an ASCII letter (A .. Z, a .. z).
+/
bool isAlpha(dchar c) @safe pure nothrow @nogc
{
// Optimizer can turn this into a bitmask operation on 64 bit code
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
///
@safe pure nothrow @nogc unittest
{
assert( isAlpha('A'));
assert(!isAlpha('1'));
assert(!isAlpha('#'));
// N.B.: does not return true for non-ASCII Unicode alphabetic characters:
assert(!isAlpha('á'));
}
@safe unittest
{
foreach (c; chain(letters, lowercase, uppercase))
assert(isAlpha(c));
foreach (c; chain(digits, octalDigits, whitespace))
assert(!isAlpha(c));
}
/++
Params: c = The character to test.
Returns: Whether $(D c) is a lowercase ASCII letter (a .. z).
+/
bool isLower(dchar c) @safe pure nothrow @nogc
{
return c >= 'a' && c <= 'z';
}
///
@safe pure nothrow @nogc unittest
{
assert( isLower('a'));
assert(!isLower('A'));
assert(!isLower('#'));
// N.B.: does not return true for non-ASCII Unicode lowercase letters
assert(!isLower('á'));
assert(!isLower('Á'));
}
@safe unittest
{
foreach (c; lowercase)
assert(isLower(c));
foreach (c; chain(digits, uppercase, whitespace))
assert(!isLower(c));
}
/++
Params: c = The character to test.
Returns: Whether $(D c) is an uppercase ASCII letter (A .. Z).
+/
bool isUpper(dchar c) @safe pure nothrow @nogc
{
return c <= 'Z' && 'A' <= c;
}
///
@safe pure nothrow @nogc unittest
{
assert( isUpper('A'));
assert(!isUpper('a'));
assert(!isUpper('#'));
// N.B.: does not return true for non-ASCII Unicode uppercase letters
assert(!isUpper('á'));
assert(!isUpper('Á'));
}
@safe unittest
{
foreach (c; uppercase)
assert(isUpper(c));
foreach (c; chain(digits, lowercase, whitespace))
assert(!isUpper(c));
}
/++
Params: c = The character to test.
Returns: Whether $(D c) is a digit (0 .. 9).
+/
bool isDigit(dchar c) @safe pure nothrow @nogc
{
return '0' <= c && c <= '9';
}
///
@safe pure nothrow @nogc unittest
{
assert( isDigit('3'));
assert( isDigit('8'));
assert(!isDigit('B'));
assert(!isDigit('#'));
// N.B.: does not return true for non-ASCII Unicode numbers
assert(!isDigit('0')); // full-width digit zero (U+FF10)
assert(!isDigit('4')); // full-width digit four (U+FF14)
}
@safe unittest
{
foreach (c; digits)
assert(isDigit(c));
foreach (c; chain(letters, whitespace))
assert(!isDigit(c));
}
/++
Params: c = The character to test.
Returns: Whether $(D c) is a digit in base 8 (0 .. 7).
+/
bool isOctalDigit(dchar c) @safe pure nothrow @nogc
{
return c >= '0' && c <= '7';
}
///
@safe pure nothrow @nogc unittest
{
assert( isOctalDigit('0'));
assert( isOctalDigit('7'));
assert(!isOctalDigit('8'));
assert(!isOctalDigit('A'));
assert(!isOctalDigit('#'));
}
@safe unittest
{
foreach (c; octalDigits)
assert(isOctalDigit(c));
foreach (c; chain(letters, ['8', '9'], whitespace))
assert(!isOctalDigit(c));
}
/++
Params: c = The character to test.
Returns: Whether $(D c) is a digit in base 16 (0 .. 9, A .. F, a .. f).
+/
bool isHexDigit(dchar c) @safe pure nothrow @nogc
{
return c <= 'f' && c >= '0' && (c <= '9' || c >= 'a' || (c >= 'A' && c <= 'F'));
}
///
@safe pure nothrow @nogc unittest
{
assert( isHexDigit('0'));
assert( isHexDigit('A'));
assert( isHexDigit('f')); // lowercase hex digits are accepted
assert(!isHexDigit('g'));
assert(!isHexDigit('G'));
assert(!isHexDigit('#'));
}
@safe unittest
{
foreach (c; fullHexDigits)
assert(isHexDigit(c));
foreach (c; chain(lowercase[6 .. $], uppercase[6 .. $], whitespace))
assert(!isHexDigit(c));
}
/++
Params: c = The character to test.
Returns: Whether or not $(D c) is a whitespace character. That includes the
space, tab, vertical tab, form feed, carriage return, and linefeed
characters.
+/
bool isWhite(dchar c) @safe pure nothrow @nogc
{
return c == ' ' || (c >= 0x09 && c <= 0x0D);
}
///
@safe pure nothrow @nogc unittest
{
assert( isWhite(' '));
assert( isWhite('\t'));
assert( isWhite('\n'));
assert(!isWhite('1'));
assert(!isWhite('a'));
assert(!isWhite('#'));
// N.B.: Does not return true for non-ASCII Unicode whitespace characters.
static import std.uni;
assert(std.uni.isWhite('\u00A0'));
assert(!isWhite('\u00A0')); // std.ascii.isWhite
}
@safe unittest
{
foreach (c; whitespace)
assert(isWhite(c));
foreach (c; chain(digits, letters))
assert(!isWhite(c));
}
/++
Params: c = The character to test.
Returns: Whether $(D c) is a control character.
+/
bool isControl(dchar c) @safe pure nothrow @nogc
{
return c < 0x20 || c == 0x7F;
}
///
@safe pure nothrow @nogc unittest
{
assert( isControl('\0'));
assert( isControl('\022'));
assert( isControl('\n')); // newline is both whitespace and control
assert(!isControl(' '));
assert(!isControl('1'));
assert(!isControl('a'));
assert(!isControl('#'));
// N.B.: non-ASCII Unicode control characters are not recognized:
assert(!isControl('\u0080'));
assert(!isControl('\u2028'));
assert(!isControl('\u2029'));
}
@safe unittest
{
foreach (dchar c; 0 .. 32)
assert(isControl(c));
assert(isControl(127));
foreach (c; chain(digits, letters, [' ']))
assert(!isControl(c));
}
/++
Params: c = The character to test.
Returns: Whether or not $(D c) is a punctuation character. That includes
all ASCII characters which are not control characters, letters, digits, or
whitespace.
+/
bool isPunctuation(dchar c) @safe pure nothrow @nogc
{
return c <= '~' && c >= '!' && !isAlphaNum(c);
}
///
@safe pure nothrow @nogc unittest
{
assert( isPunctuation('.'));
assert( isPunctuation(','));
assert( isPunctuation(':'));
assert( isPunctuation('!'));
assert( isPunctuation('#'));
assert( isPunctuation('~'));
assert( isPunctuation('+'));
assert( isPunctuation('_'));
assert(!isPunctuation('1'));
assert(!isPunctuation('a'));
assert(!isPunctuation(' '));
assert(!isPunctuation('\n'));
assert(!isPunctuation('\0'));
// N.B.: Non-ASCII Unicode punctuation characters are not recognized.
assert(!isPunctuation('\u2012')); // (U+2012 = en-dash)
}
@safe unittest
{
foreach (dchar c; 0 .. 128)
{
if (isControl(c) || isAlphaNum(c) || c == ' ')
assert(!isPunctuation(c));
else
assert(isPunctuation(c));
}
}
/++
Params: c = The character to test.
Returns: Whether or not $(D c) is a printable character other than the
space character.
+/
bool isGraphical(dchar c) @safe pure nothrow @nogc
{
return '!' <= c && c <= '~';
}
///
@safe pure nothrow @nogc unittest
{
assert( isGraphical('1'));
assert( isGraphical('a'));
assert( isGraphical('#'));
assert(!isGraphical(' ')); // whitespace is not graphical
assert(!isGraphical('\n'));
assert(!isGraphical('\0'));
// N.B.: Unicode graphical characters are not regarded as such.
assert(!isGraphical('á'));
}
@safe unittest
{
foreach (dchar c; 0 .. 128)
{
if (isControl(c) || c == ' ')
assert(!isGraphical(c));
else
assert(isGraphical(c));
}
}
/++
Params: c = The character to test.
Returns: Whether or not $(D c) is a printable character - including the
space character.
+/
bool isPrintable(dchar c) @safe pure nothrow @nogc
{
return c >= ' ' && c <= '~';
}
///
@safe pure nothrow @nogc unittest
{
assert( isPrintable(' ')); // whitespace is printable
assert( isPrintable('1'));
assert( isPrintable('a'));
assert( isPrintable('#'));
assert(!isPrintable('\0')); // control characters are not printable
// N.B.: Printable non-ASCII Unicode characters are not recognized.
assert(!isPrintable('á'));
}
@safe unittest
{
foreach (dchar c; 0 .. 128)
{
if (isControl(c))
assert(!isPrintable(c));
else
assert(isPrintable(c));
}
}
/++
Params: c = The character to test.
Returns: Whether or not $(D c) is in the ASCII character set - i.e. in the
range 0 .. 0x7F.
+/
pragma(inline, true)
bool isASCII(dchar c) @safe pure nothrow @nogc
{
return c <= 0x7F;
}
///
@safe pure nothrow @nogc unittest
{
assert( isASCII('a'));
assert(!isASCII('á'));
}
@safe unittest
{
foreach (dchar c; 0 .. 128)
assert(isASCII(c));
assert(!isASCII(128));
}
/++
Converts an ASCII letter to lowercase.
Params: c = A character of any type that implicitly converts to $(D dchar).
In the case where it's a built-in type, or an enum of a built-in type,
$(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
type, $(D dchar) is returned.
Returns: The corresponding lowercase letter, if $(D c) is an uppercase
ASCII character, otherwise $(D c) itself.
+/
auto toLower(C)(C c)
if (is(C : dchar))
{
import std.traits : isAggregateType, OriginalType, Unqual;
alias OC = OriginalType!C;
static if (isAggregateType!OC)
alias R = dchar;
else
alias R = Unqual!OC;
return isUpper(c) ? cast(R)(cast(R) c + 'a' - 'A') : cast(R) c;
}
///
@safe pure nothrow @nogc unittest
{
assert(toLower('a') == 'a');
assert(toLower('A') == 'a');
assert(toLower('#') == '#');
// N.B.: Non-ASCII Unicode uppercase letters are not converted.
assert(toLower('Á') == 'Á');
}
@safe pure nothrow unittest
{
foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
{
foreach (i, c; uppercase)
assert(toLower(cast(C) c) == lowercase[i]);
foreach (C c; 0 .. 128)
{
if (c < 'A' || c > 'Z')
assert(toLower(c) == c);
else
assert(toLower(c) != c);
}
foreach (C c; 128 .. C.max)
assert(toLower(c) == c);
//CTFE
static assert(toLower(cast(C)'a') == 'a');
static assert(toLower(cast(C)'A') == 'a');
}
}
/++
Converts an ASCII letter to uppercase.
Params: c = Any type which implicitly converts to $(D dchar). In the case
where it's a built-in type, or an enum of a built-in type,
$(D Unqual!(OriginalType!C)) is returned, whereas if it's a user-defined
type, $(D dchar) is returned.
Returns: The corresponding uppercase letter, if $(D c) is a lowercase ASCII
character, otherwise $(D c) itself.
+/
auto toUpper(C)(C c)
if (is(C : dchar))
{
import std.traits : isAggregateType, OriginalType, Unqual;
alias OC = OriginalType!C;
static if (isAggregateType!OC)
alias R = dchar;
else
alias R = Unqual!OC;
return isLower(c) ? cast(R)(cast(R) c - ('a' - 'A')) : cast(R) c;
}
///
@safe pure nothrow @nogc unittest
{
assert(toUpper('a') == 'A');
assert(toUpper('A') == 'A');
assert(toUpper('#') == '#');
// N.B.: Non-ASCII Unicode lowercase letters are not converted.
assert(toUpper('á') == 'á');
}
@safe pure nothrow unittest
{
foreach (C; AliasSeq!(char, wchar, dchar, immutable char, ubyte))
{
foreach (i, c; lowercase)
assert(toUpper(cast(C) c) == uppercase[i]);
foreach (C c; 0 .. 128)
{
if (c < 'a' || c > 'z')
assert(toUpper(c) == c);
else
assert(toUpper(c) != c);
}
foreach (C c; 128 .. C.max)
assert(toUpper(c) == c);
//CTFE
static assert(toUpper(cast(C)'a') == 'A');
static assert(toUpper(cast(C)'A') == 'A');
}
}
@safe unittest //Test both toUpper and toLower with non-builtin
{
//User Defined [Char|Wchar|Dchar]
static struct UDC { char c; alias c this; }
static struct UDW { wchar c; alias c this; }
static struct UDD { dchar c; alias c this; }
//[Char|Wchar|Dchar] Enum
enum CE : char {a = 'a', A = 'A'}
enum WE : wchar {a = 'a', A = 'A'}
enum DE : dchar {a = 'a', A = 'A'}
//User Defined [Char|Wchar|Dchar] Enum
enum UDCE : UDC {a = UDC('a'), A = UDC('A')}
enum UDWE : UDW {a = UDW('a'), A = UDW('A')}
enum UDDE : UDD {a = UDD('a'), A = UDD('A')}
//User defined types with implicit cast to dchar test.
foreach (Char; AliasSeq!(UDC, UDW, UDD))
{
assert(toLower(Char('a')) == 'a');
assert(toLower(Char('A')) == 'a');
static assert(toLower(Char('a')) == 'a');
static assert(toLower(Char('A')) == 'a');
static assert(toUpper(Char('a')) == 'A');
static assert(toUpper(Char('A')) == 'A');
}
//Various enum tests.
foreach (Enum; AliasSeq!(CE, WE, DE, UDCE, UDWE, UDDE))
{
assert(toLower(Enum.a) == 'a');
assert(toLower(Enum.A) == 'a');
assert(toUpper(Enum.a) == 'A');
assert(toUpper(Enum.A) == 'A');
static assert(toLower(Enum.a) == 'a');
static assert(toLower(Enum.A) == 'a');
static assert(toUpper(Enum.a) == 'A');
static assert(toUpper(Enum.A) == 'A');
}
//Return value type tests for enum of non-UDT. These should be the original type.
foreach (T; AliasSeq!(CE, WE, DE))
{
alias C = OriginalType!T;
static assert(is(typeof(toLower(T.init)) == C));
static assert(is(typeof(toUpper(T.init)) == C));
}
//Return value tests for UDT and enum of UDT. These should be dchar
foreach (T; AliasSeq!(UDC, UDW, UDD, UDCE, UDWE, UDDE))
{
static assert(is(typeof(toLower(T.init)) == dchar));
static assert(is(typeof(toUpper(T.init)) == dchar));
}
}