blob: 10293e0a81228b08e6e64ecc4e73dced47890493 [file] [log] [blame]
// Copyright (C) 2020-2025 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#ifndef RUST_LEX_H
#define RUST_LEX_H
#include "rust-linemap.h"
#include "rust-buffered-queue.h"
#include "rust-token.h"
#include "optional.h"
#include "selftest.h"
#include "rust-input-source.h"
namespace Rust {
// Simple wrapper for FILE* that simplifies destruction.
struct RAIIFile
{
private:
FILE *file;
const char *filename;
void close ()
{
if (file != nullptr && file != stdin)
fclose (file);
}
static bool allowed_filetype (const struct stat &statbuf)
{
// The file could be either
// - a regular file
// - a char device (/dev/null...)
return S_ISREG (statbuf.st_mode) || S_ISCHR (statbuf.st_mode);
}
public:
RAIIFile (const char *filename) : filename (filename)
{
if (strcmp (filename, "-") == 0)
{
file = stdin;
}
else
{
struct stat statbuf;
if (!(file = fopen (filename, "r")))
{
return;
}
if (-1 == fstat (fileno (file), &statbuf)
|| !allowed_filetype (statbuf))
{
fclose (file);
file = nullptr;
errno = EISDIR;
}
}
}
/**
* Create a RAIIFile from an existing instance of FILE*
*/
RAIIFile (FILE *raw, const char *filename = nullptr)
: file (raw), filename (filename)
{}
RAIIFile (const RAIIFile &other) = delete;
RAIIFile &operator= (const RAIIFile &other) = delete;
// have to specify setting file to nullptr, otherwise unintended fclose occurs
RAIIFile (RAIIFile &&other) : file (other.file), filename (other.filename)
{
other.file = nullptr;
}
RAIIFile &operator= (RAIIFile &&other)
{
close ();
file = other.file;
filename = other.filename;
other.file = nullptr;
return *this;
}
static RAIIFile create_error () { return RAIIFile (nullptr, nullptr); }
~RAIIFile () { close (); }
FILE *get_raw () { return file; }
const char *get_filename () { return filename; }
bool ok () const { return file; }
};
class Lexer
{
private:
// Request new Location for current column in line_table
location_t get_current_location ();
// Skips the current input character.
void skip_input ();
// Advances current input character to n + 1 characters ahead of current
// position.
void skip_input (int n);
// Peeks the current character.
Codepoint peek_input ();
// Returns character n characters ahead of current position.
Codepoint peek_input (int n);
// Classifies keyword (i.e. gets id for keyword).
TokenId classify_keyword (const std::string &str);
std::tuple<std::string, int, bool> parse_in_decimal ();
std::pair<std::string, int> parse_in_exponent_part ();
std::pair<PrimitiveCoreType, int> parse_in_type_suffix ();
std::tuple<char, int, bool> parse_escape (char opening_char);
std::tuple<Codepoint, int, bool> parse_utf8_escape ();
int parse_partial_string_continue ();
std::pair<long, int> parse_partial_hex_escape ();
std::pair<Codepoint, int> parse_partial_unicode_escape ();
void skip_broken_string_input (Codepoint current_char);
TokenPtr parse_byte_char (location_t loc);
TokenPtr parse_byte_string (location_t loc);
TokenPtr parse_raw_byte_string (location_t loc);
TokenPtr parse_raw_identifier (location_t loc);
TokenPtr parse_string (location_t loc);
TokenPtr maybe_parse_raw_string (location_t loc);
TokenPtr parse_raw_string (location_t loc, int initial_hash_count);
TokenPtr parse_non_decimal_int_literals (location_t loc);
TokenPtr parse_decimal_int_or_float (location_t loc);
TokenPtr parse_char_or_lifetime (location_t loc);
TokenPtr parse_identifier_or_keyword (location_t loc);
template <typename IsDigitFunc>
TokenPtr parse_non_decimal_int_literal (location_t loc,
IsDigitFunc is_digit_func,
std::string existent_str, int base);
public:
// Construct lexer with input file and filename provided
Lexer (const char *filename, RAIIFile input, Linemap *linemap,
tl::optional<std::ofstream &> dump_lex_opt = tl::nullopt);
// Lex the contents of a string instead of a file
Lexer (const std::string &input, Linemap *linemap);
// dtor
~Lexer ();
// don't allow copy semantics (for now, at least)
Lexer (const Lexer &other) = delete;
Lexer &operator= (const Lexer &other) = delete;
// enable move semantics
Lexer (Lexer &&other) = default;
Lexer &operator= (Lexer &&other) = default;
bool input_source_is_valid_utf8 ();
// Returns token n tokens ahead of current position.
const_TokenPtr peek_token (int n) { return token_queue.peek (n); }
// Peeks the current token.
const_TokenPtr peek_token () { return peek_token (0); }
// Builds a token from the input queue.
TokenPtr build_token ();
// Advances current token to n + 1 tokens ahead of current position.
void skip_token (int n);
// Skips the current token.
void skip_token () { skip_token (0); }
// Dumps and advances by n + 1 tokens.
void dump_and_skip (int n);
// Replaces the current token with a specified token.
void replace_current_token (TokenPtr replacement);
// FIXME: don't use anymore
/* Splits the current token into two. Intended for use with nested generics
* closes (i.e. T<U<X>> where >> is wrongly lexed as one token). Note that
* this will only work with "simple" tokens like punctuation. */
void split_current_token (TokenId new_left, TokenId new_right);
void split_current_token (std::vector<TokenPtr> new_tokens);
Linemap *get_line_map () { return line_map; }
std::string get_filename () { return std::string (input.get_filename ()); }
private:
void start_line (int current_line, int current_column);
// File for use as input.
RAIIFile input;
// TODO is this actually required? could just have file storage in InputSource
// Current line number.
int current_line;
// Current column number.
int current_column;
// Current character.
Codepoint current_char;
// Line map.
Linemap *line_map;
/* Max column number that can be quickly allocated - higher may require
* allocating new linemap */
static const int max_column_hint = 80;
tl::optional<std::ofstream &> dump_lex_out;
// The input source for the lexer.
// InputSource input_source;
// Input file queue.
std::unique_ptr<InputSource> raw_input_source;
buffered_queue<Codepoint, std::reference_wrapper<InputSource>> input_queue;
// Token source wrapper thing.
struct TokenSource
{
// The lexer object that will use this TokenSource.
Lexer *lexer;
// Create a new TokenSource with given lexer.
TokenSource (Lexer *parLexer) : lexer (parLexer) {}
// Used to mimic std::reference_wrapper that is used for InputSource.
TokenSource &get () { return *this; }
// Overload operator () to build token in lexer.
TokenPtr next () { return lexer->build_token (); }
};
// The token source for the lexer.
// TokenSource token_source;
// Token stream queue.
buffered_queue<std::shared_ptr<Token>, TokenSource> token_queue;
};
} // namespace Rust
#if CHECKING_P
namespace selftest {
void
rust_input_source_test ();
} // namespace selftest
#endif // CHECKING_P
#endif