| /** |
| * Character tables related to identifiers. |
| * |
| * Supports UAX31, C99, C11 and least restrictive (All). |
| * |
| * Copyright: Copyright (C) 1999-2025 by The D Language Foundation, All Rights Reserved |
| * Authors: $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole) |
| * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) |
| * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/compiler/src/dmd/common/charactertables.d, common/charactertables.d) |
| * Documentation: https://dlang.org/phobos/dmd_common_charactertables.html |
| * Coverage: https://codecov.io/gh/dlang/dmd/src/master/compiler/src/dmd/common/charactertables.d |
| */ |
| module dmd.common.charactertables; |
| |
| @safe nothrow @nogc pure: |
| |
| extern(C++): |
| |
| /// |
| enum IdentifierTable { |
| UAX31, /// |
| C99, /// |
| C11, /// |
| LR, /// Least Restrictive aka All |
| } |
| |
| /// |
| struct IdentifierCharLookup |
| { |
| @safe nothrow @nogc pure: |
| |
| /// |
| extern(C++) bool function(dchar) isStart; |
| /// |
| extern(C++) bool function(dchar) isContinue; |
| |
| /// Lookup the table given the table name |
| extern(C++) static IdentifierCharLookup forTable(IdentifierTable table) |
| { |
| import dmd.common.identifiertables; |
| |
| // Awful solution to require these lambdas. |
| // However without them the extern(C++) ABI issues crop up for isInRange, |
| // and then it can't access the tables. |
| final switch(table) |
| { |
| case IdentifierTable.UAX31: |
| return IdentifierCharLookup( |
| (c) => isInRange!UAX31_Start(c), |
| (c) => isInRange!UAX31_Continue(c)); |
| case IdentifierTable.C99: |
| return IdentifierCharLookup( |
| (c) => isInRange!FixedTable_C99_Start(c), |
| (c) => isInRange!FixedTable_C99_Continue(c)); |
| case IdentifierTable.C11: |
| return IdentifierCharLookup( |
| (c) => isInRange!FixedTable_C11_Start(c), |
| (c) => isInRange!FixedTable_C11_Continue(c)); |
| case IdentifierTable.LR: |
| return IdentifierCharLookup( |
| (c) => isInRange!LeastRestrictive_Start(c), |
| (c) => isInRange!LeastRestrictive_Continue(c)); |
| } |
| } |
| } |
| |
| /** |
| Convenience function for use in places where we just don't care, |
| what the identifier ranges are, or if it is start/continue. |
| |
| Returns: is character a member of least restrictive of all. |
| */ |
| bool isAnyIdentifierCharacter(dchar c) |
| { |
| import dmd.common.identifiertables; |
| return isInRange!LeastRestrictive_OfAll(c); |
| } |
| |
| /// |
| unittest |
| { |
| assert(isAnyIdentifierCharacter('ğ')); |
| } |
| |
| /** |
| Convenience function for use in places where we just don't care, |
| what the identifier ranges are. |
| |
| Returns: is character a member of restrictive Start |
| */ |
| bool isAnyStart(dchar c) |
| { |
| import dmd.common.identifiertables; |
| return isInRange!LeastRestrictive_Start(c); |
| } |
| |
| /// |
| unittest |
| { |
| assert(isAnyStart('ğ')); |
| } |
| |
| /** |
| Convenience function for use in places where we just don't care, |
| what the identifier ranges are. |
| |
| Returns: is character a member of least restrictive Continue |
| */ |
| bool isAnyContinue(dchar c) |
| { |
| import dmd.common.identifiertables; |
| return isInRange!LeastRestrictive_Continue(c); |
| } |
| |
| /// |
| unittest |
| { |
| assert(isAnyContinue('ğ')); |
| } |
| |
| /// UTF line separator |
| enum LS = 0x2028; |
| /// UTF paragraph separator |
| enum PS = 0x2029; |
| |
| private |
| { |
| enum CMoctal = 0x1; |
| enum CMhex = 0x2; |
| enum CMidchar = 0x4; |
| enum CMzerosecond = 0x8; |
| enum CMdigitsecond = 0x10; |
| enum CMsinglechar = 0x20; |
| } |
| |
| /// |
| bool isoctal(const char c) |
| { |
| return (cmtable[c] & CMoctal) != 0; |
| } |
| |
| /// |
| bool ishex(const char c) |
| { |
| return (cmtable[c] & CMhex) != 0; |
| } |
| |
| /// |
| bool isidchar(const char c) |
| { |
| return (cmtable[c] & CMidchar) != 0; |
| } |
| |
| /// |
| bool isZeroSecond(const char c) |
| { |
| return (cmtable[c] & CMzerosecond) != 0; |
| } |
| |
| /// |
| bool isDigitSecond(const char c) |
| { |
| return (cmtable[c] & CMdigitsecond) != 0; |
| } |
| |
| /// |
| bool issinglechar(const char c) |
| { |
| return (cmtable[c] & CMsinglechar) != 0; |
| } |
| |
| /// |
| bool c_isxdigit(const int c) |
| { |
| return (( c >= '0' && c <= '9') || |
| ( c >= 'a' && c <= 'f') || |
| ( c >= 'A' && c <= 'F')); |
| } |
| |
| /// |
| bool c_isalnum(const int c) |
| { |
| return (( c >= '0' && c <= '9') || |
| ( c >= 'a' && c <= 'z') || |
| ( c >= 'A' && c <= 'Z')); |
| } |
| |
| extern(D) private: |
| |
| // originally from dmd.root.utf |
| bool isInRange(alias Ranges)(dchar c) |
| { |
| size_t high = Ranges.length - 1; |
| // Shortcut search if c is out of range |
| size_t low = (c < Ranges[0][0] || Ranges[high][1] < c) ? high + 1 : 0; |
| // Binary search |
| while (low <= high) |
| { |
| const size_t mid = low + ((high - low) >> 1); |
| if (c < Ranges[mid][0]) |
| high = mid - 1; |
| else if (Ranges[mid][1] < c) |
| low = mid + 1; |
| else |
| { |
| assert(Ranges[mid][0] <= c && c <= Ranges[mid][1]); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /******************************************** |
| * Do our own char maps |
| */ |
| // originally from dmd.lexer (was private) |
| static immutable cmtable = () |
| { |
| ubyte[256] table; |
| foreach (const c; 0 .. table.length) |
| { |
| if ('0' <= c && c <= '7') |
| table[c] |= CMoctal; |
| if (c_isxdigit(c)) |
| table[c] |= CMhex; |
| if (c_isalnum(c) || c == '_') |
| table[c] |= CMidchar; |
| |
| switch (c) |
| { |
| case 'x': case 'X': |
| case 'b': case 'B': |
| table[c] |= CMzerosecond; |
| break; |
| |
| case '0': .. case '9': |
| case 'e': case 'E': |
| case 'f': case 'F': |
| case 'l': case 'L': |
| case 'p': case 'P': |
| case 'u': case 'U': |
| case 'i': |
| case '.': |
| case '_': |
| table[c] |= CMzerosecond | CMdigitsecond; |
| break; |
| |
| default: |
| break; |
| } |
| |
| switch (c) |
| { |
| case '\\': |
| case '\n': |
| case '\r': |
| case 0: |
| case 0x1A: |
| case '\'': |
| break; |
| default: |
| if (!(c & 0x80)) |
| table[c] |= CMsinglechar; |
| break; |
| } |
| } |
| return table; |
| }(); |