| // Copyright (C) 2020-2025 Free Software Foundation, Inc. |
| |
| // This file is part of GCC. |
| |
| // GCC is free software; you can redistribute it and/or modify it under |
| // the terms of the GNU General Public License as published by the Free |
| // Software Foundation; either version 3, or (at your option) any later |
| // version. |
| |
| // GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| // WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| // for more details. |
| |
| // You should have received a copy of the GNU General Public License |
| // along with GCC; see the file COPYING3. If not see |
| // <http://www.gnu.org/licenses/>. |
| |
| #ifndef RUST_TOKEN_H |
| #define RUST_TOKEN_H |
| |
| #include "rust-system.h" |
| #include "rust-linemap.h" |
| #include "rust-unicode.h" |
| |
| namespace Rust { |
| // "Primitive core types" in Rust - the different int and float types, as well |
| // as some others |
| enum PrimitiveCoreType |
| { |
| CORETYPE_UNKNOWN, |
| // named primitives |
| CORETYPE_BOOL, |
| CORETYPE_CHAR, |
| CORETYPE_STR, |
| // okay technically int and uint are arch-dependent (pointer size) |
| CORETYPE_INT, |
| CORETYPE_UINT, |
| // numbered number primitives |
| CORETYPE_F32, |
| CORETYPE_F64, |
| CORETYPE_I8, |
| CORETYPE_I16, |
| CORETYPE_I32, |
| CORETYPE_I64, |
| CORETYPE_I128, |
| CORETYPE_U8, |
| CORETYPE_U16, |
| CORETYPE_U32, |
| CORETYPE_U64, |
| CORETYPE_U128, |
| // Pure decimals are used for tuple index. |
| // Also means there is no type hint. |
| CORETYPE_PURE_DECIMAL, |
| // arch-dependent pointer sizes |
| CORETYPE_ISIZE = CORETYPE_INT, |
| CORETYPE_USIZE = CORETYPE_UINT |
| }; |
| |
| // RS_TOKEN(name, description) |
| // RS_TOKEN_KEYWORD_{2015,2018}(name, identifier) |
| |
| // Keep RS_TOKEN_KEYWORD sorted |
| |
| /* note that abstract, async, become, box, do, final, macro, override, priv, |
| * try, typeof, unsized, virtual, and yield are unused */ |
| #define RS_TOKEN_LIST \ |
| RS_TOKEN (FIRST_TOKEN, "<first-token-marker>") \ |
| RS_TOKEN (END_OF_FILE, "end of file") \ |
| RS_TOKEN (EXCLAM, "!") \ |
| RS_TOKEN (NOT_EQUAL, "!=") \ |
| RS_TOKEN (PERCENT, "%") \ |
| RS_TOKEN (PERCENT_EQ, "%=") \ |
| RS_TOKEN (AMP, "&") \ |
| RS_TOKEN (AMP_EQ, "&=") \ |
| RS_TOKEN (LOGICAL_AND, "&&") \ |
| RS_TOKEN (ASTERISK, "*") \ |
| RS_TOKEN (ASTERISK_EQ, "*=") \ |
| RS_TOKEN (PLUS, "+") \ |
| RS_TOKEN (PLUS_EQ, "+=") \ |
| RS_TOKEN (COMMA, ",") \ |
| RS_TOKEN (MINUS, "-") \ |
| RS_TOKEN (MINUS_EQ, "-=") \ |
| RS_TOKEN (RETURN_TYPE, "->") \ |
| RS_TOKEN (DOT, ".") \ |
| RS_TOKEN (DOT_DOT, "..") \ |
| RS_TOKEN (DOT_DOT_EQ, "..=") \ |
| RS_TOKEN (ELLIPSIS, "...") \ |
| RS_TOKEN (DIV, "/") \ |
| RS_TOKEN (DIV_EQ, "/=") \ |
| RS_TOKEN (COLON, ":") \ |
| RS_TOKEN (SEMICOLON, ";") \ |
| RS_TOKEN (LEFT_SHIFT, "<<") \ |
| RS_TOKEN (LEFT_SHIFT_EQ, "<<=") \ |
| RS_TOKEN (LEFT_ANGLE, "<") \ |
| RS_TOKEN (LESS_OR_EQUAL, "<=") \ |
| RS_TOKEN (EQUAL, "=") \ |
| RS_TOKEN (EQUAL_EQUAL, "==") \ |
| RS_TOKEN (MATCH_ARROW, "=>") \ |
| RS_TOKEN (RIGHT_ANGLE, ">") \ |
| RS_TOKEN (GREATER_OR_EQUAL, ">=") \ |
| RS_TOKEN (RIGHT_SHIFT, ">>") \ |
| RS_TOKEN (RIGHT_SHIFT_EQ, ">>=") \ |
| RS_TOKEN (PATTERN_BIND, "@") \ |
| RS_TOKEN (TILDE, "~") \ |
| RS_TOKEN (BACKSLASH, "\\") \ |
| RS_TOKEN (BACKTICK, "`") \ |
| RS_TOKEN (CARET, "^") \ |
| RS_TOKEN (CARET_EQ, "^=") \ |
| RS_TOKEN (PIPE, "|") \ |
| RS_TOKEN (PIPE_EQ, "|=") \ |
| RS_TOKEN (OR, "||") \ |
| RS_TOKEN (QUESTION_MARK, "?") \ |
| RS_TOKEN (HASH, "#") \ |
| /* from here on, dodgy and may not be correct. not operators and may be \ |
| * symbols */ \ |
| /* RS_TOKEN(SPACE, " ") probably too dodgy */ \ |
| /* RS_TOKEN(NEWLINE, "\n")*/ \ |
| RS_TOKEN (SCOPE_RESOLUTION, "::") /* dodgy */ \ |
| RS_TOKEN (SINGLE_QUOTE, "'") /* should i differentiate from lifetime? */ \ |
| RS_TOKEN (DOUBLE_QUOTE, "\"") \ |
| RS_TOKEN (IDENTIFIER, "identifier") \ |
| RS_TOKEN (INT_LITERAL, \ |
| "integer literal") /* do different int and float types need \ |
| different literal types? */ \ |
| RS_TOKEN (FLOAT_LITERAL, "float literal") \ |
| RS_TOKEN (STRING_LITERAL, "string literal") \ |
| RS_TOKEN (CHAR_LITERAL, "character literal") \ |
| RS_TOKEN (BYTE_STRING_LITERAL, "byte string literal") \ |
| RS_TOKEN (RAW_STRING_LITERAL, "raw string literal") \ |
| RS_TOKEN (BYTE_CHAR_LITERAL, "byte character literal") \ |
| RS_TOKEN (LIFETIME, "lifetime") /* TODO: improve token type */ \ |
| /* Have "interpolated" tokens (whatever that means)? identifer, path, type, \ |
| * pattern, */ \ |
| /* expression, statement, block, meta, item in mrustc (but not directly in \ |
| * lexer). */ \ |
| RS_TOKEN (LEFT_PAREN, "(") \ |
| RS_TOKEN (RIGHT_PAREN, ")") \ |
| RS_TOKEN (LEFT_CURLY, "{") \ |
| RS_TOKEN (RIGHT_CURLY, "}") \ |
| RS_TOKEN (LEFT_SQUARE, "[") \ |
| RS_TOKEN (RIGHT_SQUARE, "]") \ |
| /* Macros */ \ |
| RS_TOKEN (DOLLAR_SIGN, "$") \ |
| /* Doc Comments */ \ |
| RS_TOKEN (INNER_DOC_COMMENT, "#![doc]") \ |
| RS_TOKEN (OUTER_DOC_COMMENT, "#[doc]") \ |
| RS_TOKEN_KEYWORD_2015 (ABSTRACT, "abstract") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (AS, "as") \ |
| RS_TOKEN_KEYWORD_2018 (ASYNC, "async") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (AUTO, "auto") \ |
| RS_TOKEN_KEYWORD_2018 (AWAIT, "await") \ |
| RS_TOKEN_KEYWORD_2015 (BECOME, "become") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (BOX, "box") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (BREAK, "break") \ |
| RS_TOKEN_KEYWORD_2015 (CONST, "const") \ |
| RS_TOKEN_KEYWORD_2015 (CONTINUE, "continue") \ |
| RS_TOKEN_KEYWORD_2015 (CRATE, "crate") \ |
| RS_TOKEN_KEYWORD_2015 (DO, "do") /* unused */ \ |
| RS_TOKEN_KEYWORD_2018 (DYN, "dyn") \ |
| RS_TOKEN_KEYWORD_2015 (ELSE, "else") \ |
| RS_TOKEN_KEYWORD_2015 (ENUM_KW, "enum") \ |
| RS_TOKEN_KEYWORD_2015 (EXTERN_KW, "extern") \ |
| RS_TOKEN_KEYWORD_2015 (FALSE_LITERAL, "false") \ |
| RS_TOKEN_KEYWORD_2015 (FINAL_KW, "final") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (FN_KW, "fn") \ |
| RS_TOKEN_KEYWORD_2015 (FOR, "for") \ |
| RS_TOKEN_KEYWORD_2015 (IF, "if") \ |
| RS_TOKEN_KEYWORD_2015 (IMPL, "impl") \ |
| RS_TOKEN_KEYWORD_2015 (IN, "in") \ |
| RS_TOKEN_KEYWORD_2015 (LET, "let") \ |
| RS_TOKEN_KEYWORD_2015 (LOOP, "loop") \ |
| RS_TOKEN_KEYWORD_2015 (MACRO, "macro") \ |
| RS_TOKEN_KEYWORD_2015 (MATCH_KW, "match") \ |
| RS_TOKEN_KEYWORD_2015 (MOD, "mod") \ |
| RS_TOKEN_KEYWORD_2015 (MOVE, "move") \ |
| RS_TOKEN_KEYWORD_2015 (MUT, "mut") \ |
| RS_TOKEN_KEYWORD_2015 (OVERRIDE_KW, "override") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (PRIV, "priv") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (PUB, "pub") \ |
| RS_TOKEN_KEYWORD_2015 (REF, "ref") \ |
| RS_TOKEN_KEYWORD_2015 (RETURN_KW, "return") \ |
| RS_TOKEN_KEYWORD_2015 ( \ |
| SELF_ALIAS, "Self") /* mrustc does not treat this as a reserved word*/ \ |
| RS_TOKEN_KEYWORD_2015 (SELF, "self") \ |
| RS_TOKEN_KEYWORD_2015 (STATIC_KW, "static") \ |
| RS_TOKEN_KEYWORD_2015 (STRUCT_KW, "struct") \ |
| RS_TOKEN_KEYWORD_2015 (SUPER, "super") \ |
| RS_TOKEN_KEYWORD_2015 (TRAIT, "trait") \ |
| RS_TOKEN_KEYWORD_2015 (TRUE_LITERAL, "true") \ |
| RS_TOKEN_KEYWORD_2015 (TRY, "try") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (TYPE, "type") \ |
| RS_TOKEN_KEYWORD_2015 (TYPEOF, "typeof") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (UNDERSCORE, "_") \ |
| RS_TOKEN_KEYWORD_2015 (UNSAFE, "unsafe") \ |
| RS_TOKEN_KEYWORD_2015 (UNSIZED, "unsized") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (USE, "use") \ |
| RS_TOKEN_KEYWORD_2015 (VIRTUAL, "virtual") /* unused */ \ |
| RS_TOKEN_KEYWORD_2015 (WHERE, "where") \ |
| RS_TOKEN_KEYWORD_2015 (WHILE, "while") \ |
| RS_TOKEN_KEYWORD_2015 (YIELD, "yield") /* unused */ \ |
| RS_TOKEN (LAST_TOKEN, "<last-token-marker>") |
| |
| // Contains all token types. Crappy implementation via x-macros. |
| enum TokenId |
| { |
| #define RS_TOKEN(name, _) name, |
| #define RS_TOKEN_KEYWORD_2015(x, y) RS_TOKEN (x, y) |
| #define RS_TOKEN_KEYWORD_2018 RS_TOKEN_KEYWORD_2015 |
| RS_TOKEN_LIST |
| #undef RS_TOKEN_KEYWORD_2015 |
| #undef RS_TOKEN_KEYWORD_2018 |
| #undef RS_TOKEN |
| }; |
| |
| // dodgy "TokenPtr" declaration with Token forward declaration |
| class Token; |
| // A smart pointer (shared_ptr) to Token. |
| typedef std::shared_ptr<Token> TokenPtr; |
| // A smart pointer (shared_ptr) to a constant Token. |
| typedef std::shared_ptr<const Token> const_TokenPtr; |
| |
| // Hackily defined way to get token description for enum value using x-macros |
| const char * |
| get_token_description (TokenId id); |
| /* Hackily defined way to get token description as a string for enum value using |
| * x-macros */ |
| const char * |
| token_id_to_str (TokenId id); |
| /* checks if a token is a keyword */ |
| bool |
| token_id_is_keyword (TokenId id); |
| /* gets the string associated with a keyword */ |
| const std::string & |
| token_id_keyword_string (TokenId id); |
| // Get type hint description as a string. |
| const char * |
| get_type_hint_string (PrimitiveCoreType type); |
| |
| /* Normalize string if a token is a identifier */ |
| std::string |
| nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str); |
| |
| // Represents a single token. Create using factory static methods. |
| class Token |
| { |
| private: |
| // Token kind. |
| TokenId token_id; |
| // Token location. |
| location_t locus; |
| // Associated text (if any) of token. |
| std::unique_ptr<std::string> str; |
| // TODO: maybe remove issues and just store std::string as value? |
| /* Type hint for token based on lexer data (e.g. type suffix). Does not exist |
| * for most tokens. */ |
| PrimitiveCoreType type_hint; |
| |
| // Token constructor from token id and location. Has a null string. |
| Token (TokenId token_id, location_t location) |
| : token_id (token_id), locus (location), str (nullptr), |
| type_hint (CORETYPE_UNKNOWN) |
| {} |
| |
| // Token constructor from token id, location, and a string. |
| Token (TokenId token_id, location_t location, std::string &¶mStr) |
| : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) |
| { |
| // Normalize identifier tokens |
| str = std::make_unique<std::string> ( |
| nfc_normalize_token_string (location, token_id, paramStr)); |
| } |
| |
| // Token constructor from token id, location, and a char. |
| Token (TokenId token_id, location_t location, char paramChar) |
| : token_id (token_id), locus (location), |
| str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN) |
| { |
| // Do not need to normalize 1byte char |
| } |
| |
| // Token constructor from token id, location, and a "codepoint". |
| Token (TokenId token_id, location_t location, Codepoint paramCodepoint) |
| : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) |
| { |
| // Normalize identifier tokens |
| str = std::make_unique<std::string> ( |
| nfc_normalize_token_string (location, token_id, |
| paramCodepoint.as_string ())); |
| } |
| |
| // Token constructor from token id, location, a string, and type hint. |
| Token (TokenId token_id, location_t location, std::string &¶mStr, |
| PrimitiveCoreType parType) |
| : token_id (token_id), locus (location), type_hint (parType) |
| { |
| // Normalize identifier tokens |
| str = std::make_unique<std::string> ( |
| nfc_normalize_token_string (location, token_id, paramStr)); |
| } |
| |
| public: |
| // No default constructor. |
| Token () = delete; |
| // Do not copy/assign tokens. |
| Token (const Token &) = delete; |
| Token &operator= (const Token &) = delete; |
| |
| // Allow moving tokens. |
| Token (Token &&other) = default; |
| Token &operator= (Token &&other) = default; |
| |
| ~Token () = default; |
| |
| /* TODO: make_shared (which saves a heap allocation) does not work with the |
| * private constructor */ |
| |
| // Makes and returns a new TokenPtr (with null string). |
| static TokenPtr make (TokenId token_id, location_t locus) |
| { |
| // return std::make_shared<Token> (token_id, locus); |
| return TokenPtr (new Token (token_id, locus)); |
| } |
| |
| // Makes and returns a new TokenPtr of type IDENTIFIER. |
| static TokenPtr make_identifier (location_t locus, std::string &&str) |
| { |
| // return std::make_shared<Token> (IDENTIFIER, locus, str); |
| return TokenPtr (new Token (IDENTIFIER, locus, std::move (str))); |
| } |
| |
| // Makes and returns a new TokenPtr of type INT_LITERAL. |
| static TokenPtr make_int (location_t locus, std::string &&str, |
| PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) |
| { |
| // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint); |
| return TokenPtr ( |
| new Token (INT_LITERAL, locus, std::move (str), type_hint)); |
| } |
| |
| // Makes and returns a new TokenPtr of type FLOAT_LITERAL. |
| static TokenPtr make_float (location_t locus, std::string &&str, |
| PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) |
| { |
| // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint); |
| return TokenPtr ( |
| new Token (FLOAT_LITERAL, locus, std::move (str), type_hint)); |
| } |
| |
| // Makes and returns a new TokenPtr of type STRING_LITERAL. |
| static TokenPtr make_string (location_t locus, std::string &&str) |
| { |
| // return std::make_shared<Token> (STRING_LITERAL, locus, str, |
| // CORETYPE_STR); |
| return TokenPtr ( |
| new Token (STRING_LITERAL, locus, std::move (str), CORETYPE_STR)); |
| } |
| |
| // Makes and returns a new TokenPtr of type CHAR_LITERAL. |
| static TokenPtr make_char (location_t locus, Codepoint char_lit) |
| { |
| // return std::make_shared<Token> (CHAR_LITERAL, locus, char_lit); |
| return TokenPtr (new Token (CHAR_LITERAL, locus, char_lit)); |
| } |
| |
| // Makes and returns a new TokenPtr of type BYTE_CHAR_LITERAL. |
| static TokenPtr make_byte_char (location_t locus, char byte_char) |
| { |
| // return std::make_shared<Token> (BYTE_CHAR_LITERAL, locus, byte_char); |
| return TokenPtr (new Token (BYTE_CHAR_LITERAL, locus, byte_char)); |
| } |
| |
| // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix). |
| static TokenPtr make_byte_string (location_t locus, std::string &&str) |
| { |
| // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str); |
| return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str))); |
| } |
| |
| // Makes and returns a new TokenPtr of type RAW_STRING_LITERAL. |
| static TokenPtr make_raw_string (location_t locus, std::string &&str) |
| { |
| return TokenPtr (new Token (RAW_STRING_LITERAL, locus, std::move (str))); |
| } |
| |
| // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT. |
| static TokenPtr make_inner_doc_comment (location_t locus, std::string &&str) |
| { |
| return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str))); |
| } |
| |
| // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT. |
| static TokenPtr make_outer_doc_comment (location_t locus, std::string &&str) |
| { |
| return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str))); |
| } |
| |
| // Makes and returns a new TokenPtr of type LIFETIME. |
| static TokenPtr make_lifetime (location_t locus, std::string &&str) |
| { |
| // return std::make_shared<Token> (LIFETIME, locus, str); |
| return TokenPtr (new Token (LIFETIME, locus, std::move (str))); |
| } |
| |
| // Gets id of the token. |
| TokenId get_id () const { return token_id; } |
| |
| // Gets location of the token. |
| location_t get_locus () const { return locus; } |
| |
| // Set location of the token. |
| void set_locus (location_t locus) { this->locus = locus; } |
| |
| // Gets string description of the token. |
| const std::string & |
| get_str () const; /*{ |
| // FIXME: put in header again when fix null problem |
| //gcc_assert(str != nullptr); |
| if (str == nullptr) { |
| error_at(get_locus(), "attempted to get string for '%s', which has no string. |
| returning empty string instead.", get_token_description()); return ""; |
| } |
| return *str; |
| }*/ |
| |
| // Gets token's type hint info. |
| PrimitiveCoreType get_type_hint () const |
| { |
| return type_hint == CORETYPE_PURE_DECIMAL ? CORETYPE_UNKNOWN : type_hint; |
| } |
| |
| // diagnostics (error reporting) |
| const char *get_token_description () const |
| { |
| return Rust::get_token_description (token_id); |
| } |
| |
| // debugging |
| const char *token_id_to_str () const |
| { |
| return Rust::token_id_to_str (token_id); |
| } |
| |
| // debugging |
| const char *get_type_hint_str () const; |
| |
| /* Returns whether the token is a literal of any type (int, float, char, |
| * string, byte char, byte string). */ |
| bool is_literal () const |
| { |
| switch (token_id) |
| { |
| case INT_LITERAL: |
| case FLOAT_LITERAL: |
| case CHAR_LITERAL: |
| case STRING_LITERAL: |
| case BYTE_CHAR_LITERAL: |
| case BYTE_STRING_LITERAL: |
| case RAW_STRING_LITERAL: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* Returns whether the token actually has a string (regardless of whether it |
| * should or not). */ |
| bool has_str () const { return str != nullptr; } |
| |
| // Returns whether the token should have a string. |
| bool should_have_str () const |
| { |
| return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME; |
| } |
| |
| // Returns whether the token is a pure decimal int literal |
| bool is_pure_decimal () const { return type_hint == CORETYPE_PURE_DECIMAL; } |
| |
| // Return the token representation as someone would find it in the original |
| // source code file. |
| std::string as_string () const; |
| }; |
| } // namespace Rust |
| |
| namespace std { |
| template <> struct hash<Rust::PrimitiveCoreType> |
| { |
| size_t operator() (const Rust::PrimitiveCoreType &coretype) const noexcept |
| { |
| return hash<std::underlying_type<Rust::PrimitiveCoreType>::type> () ( |
| static_cast<std::underlying_type<Rust::PrimitiveCoreType>::type> ( |
| coretype)); |
| } |
| }; |
| } // namespace std |
| |
| #endif |