| // Copyright (C) 2020-2023 Free Software Foundation, Inc. |
| |
| // This file is part of GCC. |
| |
| // GCC is free software; you can redistribute it and/or modify it under |
| // the terms of the GNU General Public License as published by the Free |
| // Software Foundation; either version 3, or (at your option) any later |
| // version. |
| |
| // GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| // WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| // for more details. |
| |
| // You should have received a copy of the GNU General Public License |
| // along with GCC; see the file COPYING3. If not see |
| // <http://www.gnu.org/licenses/>. |
| |
| #include "rust-macro-expand.h" |
| #include "rust-macro-substitute-ctx.h" |
| #include "rust-ast-full.h" |
| #include "rust-ast-visitor.h" |
| #include "rust-diagnostics.h" |
| #include "rust-parse.h" |
| #include "rust-attribute-visitor.h" |
| #include "rust-early-name-resolver.h" |
| |
| namespace Rust { |
| AST::Fragment |
| MacroExpander::expand_decl_macro (Location invoc_locus, |
| AST::MacroInvocData &invoc, |
| AST::MacroRulesDefinition &rules_def, |
| bool semicolon) |
| { |
| // ensure that both invocation and rules are in a valid state |
| rust_assert (!invoc.is_marked_for_strip ()); |
| rust_assert (!rules_def.is_marked_for_strip ()); |
| rust_assert (rules_def.get_macro_rules ().size () > 0); |
| |
| /* probably something here about parsing invoc and rules def token trees to |
| * token stream. if not, how would parser handle the captures of exprs and |
| * stuff? on the other hand, token trees may be kind of useful in rules def as |
| * creating a point where recursion can occur (like having |
| * "compare_macro_match" and then it calling itself when it finds delimiters) |
| */ |
| |
| /* find matching rule to invoc token tree, based on macro rule's matcher. if |
| * none exist, error. |
| * - specifically, check each matcher in order. if one fails to match, move |
| * onto next. */ |
| /* TODO: does doing this require parsing expressions and whatever in the |
| * invoc? if so, might as well save the results if referenced using $ or |
| * whatever. If not, do another pass saving them. Except this is probably |
| * useless as different rules could have different starting points for exprs |
| * or whatever. Decision trees could avoid this, but they have their own |
| * issues. */ |
| /* TODO: will need to modify the parser so that it can essentially "catch" |
| * errors - maybe "try_parse_expr" or whatever methods. */ |
| // this technically creates a back-tracking parser - this will be the |
| // implementation style |
| |
| /* then, after results are saved, generate the macro output from the |
| * transcriber token tree. if i understand this correctly, the macro |
| * invocation gets replaced by the transcriber tokens, except with |
| * substitutions made (e.g. for $i variables) */ |
| |
| /* TODO: it is probably better to modify AST::Token to store a pointer to a |
| * Lexer::Token (rather than being converted) - i.e. not so much have |
| * AST::Token as a Token but rather a TokenContainer (as it is another type of |
| * TokenTree). This will prevent re-conversion of Tokens between each type |
| * all the time, while still allowing the heterogenous storage of token trees. |
| */ |
| |
| AST::DelimTokenTree &invoc_token_tree = invoc.get_delim_tok_tree (); |
| |
| // find matching arm |
| AST::MacroRule *matched_rule = nullptr; |
| std::map<std::string, MatchedFragmentContainer> matched_fragments; |
| for (auto &rule : rules_def.get_rules ()) |
| { |
| sub_stack.push (); |
| bool did_match_rule = try_match_rule (rule, invoc_token_tree); |
| matched_fragments = sub_stack.pop (); |
| |
| if (did_match_rule) |
| { |
| // // Debugging |
| // for (auto &kv : matched_fragments) |
| // rust_debug ("[fragment]: %s (%ld - %s)", kv.first.c_str (), |
| // kv.second.get_fragments ().size (), |
| // kv.second.get_kind () |
| // == MatchedFragmentContainer::Kind::Repetition |
| // ? "repetition" |
| // : "metavar"); |
| |
| matched_rule = &rule; |
| break; |
| } |
| } |
| |
| if (matched_rule == nullptr) |
| { |
| RichLocation r (invoc_locus); |
| r.add_range (rules_def.get_locus ()); |
| rust_error_at (r, "Failed to match any rule within macro"); |
| return AST::Fragment::create_error (); |
| } |
| |
| return transcribe_rule (*matched_rule, invoc_token_tree, matched_fragments, |
| semicolon, peek_context ()); |
| } |
| |
| void |
| MacroExpander::expand_eager_invocations (AST::MacroInvocation &invoc) |
| { |
| if (invoc.get_pending_eager_invocations ().empty ()) |
| return; |
| |
| // We have to basically create a new delimited token tree which contains the |
| // result of one step of expansion. In the case of builtin macros called with |
| // other macro invocations, such as `concat!("h", 'a', a!())`, we need to |
| // expand `a!()` before expanding the concat macro. |
| // This will, ideally, give us a new token tree containing the various |
| // existing tokens + the result of the expansion of a!(). |
| // To do this, we "parse" the given token tree to find anything that "looks |
| // like a macro invocation". Then, we get the corresponding macro invocation |
| // from the `pending_eager_invocations` vector and expand it. |
| // Because the `pending_eager_invocations` vector is created in the same order |
| // that the DelimTokenTree is parsed, we know that the first macro invocation |
| // within the DelimTokenTree corresponds to the first element in |
| // `pending_eager_invocations`. The idea is thus to: |
| // 1. Find a macro invocation in the token tree, noting the index of the start |
| // token and of the end token |
| // 2. Get its associated invocation in `pending_eager_invocations` |
| // 3. Expand that element |
| // 4. Get the token tree associated with that AST fragment |
| // 5. Replace the original tokens corresponding to the invocation with the new |
| // tokens from the fragment |
| // pseudo-code: |
| // |
| // i = 0; |
| // for tok in dtt: |
| // if tok is identifier && tok->next() is !: |
| // start = index(tok); |
| // l_delim = tok->next()->next(); |
| // tok = skip_until_r_delim(); |
| // end = index(tok); |
| // |
| // new_tt = expand_eager_invoc(eagers[i++]); |
| // old_tt[start..end] = new_tt; |
| |
| auto dtt = invoc.get_invoc_data ().get_delim_tok_tree (); |
| auto stream = dtt.to_token_stream (); |
| std::vector<std::unique_ptr<AST::TokenTree>> new_stream; |
| size_t current_pending = 0; |
| |
| // we need to create a clone of the delimited token tree as the lexer |
| // expects ownership of the tokens |
| std::vector<std::unique_ptr<Rust::AST::Token>> dtt_clone; |
| for (auto &tok : stream) |
| dtt_clone.emplace_back (tok->clone_token ()); |
| |
| MacroInvocLexer lex (std::move (dtt_clone)); |
| Parser<MacroInvocLexer> parser (lex); |
| |
| // we want to build a substitution map - basically, associating a `start` and |
| // `end` index for each of the pending macro invocations |
| std::map<std::pair<size_t, size_t>, std::unique_ptr<AST::MacroInvocation> &> |
| substitution_map; |
| |
| for (size_t i = 0; i < stream.size (); i++) |
| { |
| // FIXME: Can't these offsets be figure out when we actually parse the |
| // pending_eager_invocation in the first place? |
| auto invocation = parser.parse_macro_invocation ({}); |
| |
| // if we've managed to parse a macro invocation, we look at the current |
| // offset and store them in the substitution map. Otherwise, we skip one |
| // token and try parsing again |
| if (invocation) |
| substitution_map.insert ( |
| {{i, parser.get_token_source ().get_offs ()}, |
| invoc.get_pending_eager_invocations ()[current_pending++]}); |
| else |
| parser.skip_token (stream[i]->get_id ()); |
| } |
| |
| size_t current_idx = 0; |
| for (auto kv : substitution_map) |
| { |
| auto &to_expand = kv.second; |
| expand_invoc (*to_expand, false); |
| |
| auto fragment = take_expanded_fragment (); |
| auto &new_tokens = fragment.get_tokens (); |
| |
| auto start = kv.first.first; |
| auto end = kv.first.second; |
| |
| // We're now going to re-add the tokens to the invocation's token tree. |
| // 1. Basically, what we want to do is insert all tokens up until the |
| // beginning of the macro invocation (start). |
| // 2. Then, we'll insert all of the tokens resulting from the macro |
| // expansion: These are in `new_tokens`. |
| // 3. Finally, we'll do that again from |
| // the end of macro and go back to 1. |
| |
| for (size_t i = current_idx; i < start; i++) |
| new_stream.emplace_back (stream[i]->clone_token ()); |
| |
| for (auto &tok : new_tokens) |
| new_stream.emplace_back (tok->clone_token ()); |
| |
| current_idx = end; |
| } |
| |
| // Once all of that is done, we copy the last remaining tokens from the |
| // original stream |
| for (size_t i = current_idx; i < stream.size (); i++) |
| new_stream.emplace_back (stream[i]->clone_token ()); |
| |
| auto new_dtt |
| = AST::DelimTokenTree (dtt.get_delim_type (), std::move (new_stream)); |
| |
| invoc.get_pending_eager_invocations ().clear (); |
| invoc.get_invoc_data ().set_delim_tok_tree (new_dtt); |
| } |
| |
| void |
| MacroExpander::expand_invoc (AST::MacroInvocation &invoc, bool has_semicolon) |
| { |
| if (depth_exceeds_recursion_limit ()) |
| { |
| rust_error_at (invoc.get_locus (), "reached recursion limit"); |
| return; |
| } |
| |
| if (invoc.get_kind () == AST::MacroInvocation::InvocKind::Builtin) |
| expand_eager_invocations (invoc); |
| |
| AST::MacroInvocData &invoc_data = invoc.get_invoc_data (); |
| |
| // ?? |
| // switch on type of macro: |
| // - '!' syntax macro (inner switch) |
| // - procedural macro - "A token-based function-like macro" |
| // - 'macro_rules' (by example/pattern-match) macro? or not? "an |
| // AST-based function-like macro" |
| // - else is unreachable |
| // - attribute syntax macro (inner switch) |
| // - procedural macro attribute syntax - "A token-based attribute |
| // macro" |
| // - legacy macro attribute syntax? - "an AST-based attribute macro" |
| // - non-macro attribute: mark known |
| // - else is unreachable |
| // - derive macro (inner switch) |
| // - derive or legacy derive - "token-based" vs "AST-based" |
| // - else is unreachable |
| // - derive container macro - unreachable |
| |
| auto fragment = AST::Fragment::create_error (); |
| invoc_data.set_expander (this); |
| |
| // lookup the rules |
| AST::MacroRulesDefinition *rules_def = nullptr; |
| bool ok = mappings->lookup_macro_invocation (invoc, &rules_def); |
| |
| // If there's no rule associated with the invocation, we can simply return |
| // early. The early name resolver will have already emitted an error. |
| if (!ok) |
| return; |
| |
| // We store the last expanded invocation and macro definition for error |
| // reporting in case the recursion limit is reached |
| last_invoc = &invoc; |
| last_def = rules_def; |
| |
| if (rules_def->is_builtin ()) |
| fragment |
| = rules_def->get_builtin_transcriber () (invoc.get_locus (), invoc_data); |
| else |
| fragment = expand_decl_macro (invoc.get_locus (), invoc_data, *rules_def, |
| has_semicolon); |
| |
| set_expanded_fragment (std::move (fragment)); |
| } |
| |
| /* Determines whether any cfg predicate is false and hence item with attributes |
| * should be stripped. Note that attributes must be expanded before calling. */ |
| bool |
| MacroExpander::fails_cfg (const AST::AttrVec &attrs) const |
| { |
| for (const auto &attr : attrs) |
| { |
| if (attr.get_path () == "cfg" && !attr.check_cfg_predicate (session)) |
| return true; |
| } |
| return false; |
| } |
| |
| /* Determines whether any cfg predicate is false and hence item with attributes |
| * should be stripped. Will expand attributes as well. */ |
| bool |
| MacroExpander::fails_cfg_with_expand (AST::AttrVec &attrs) const |
| { |
| // TODO: maybe have something that strips cfg attributes that evaluate true? |
| for (auto &attr : attrs) |
| { |
| if (attr.get_path () == "cfg") |
| { |
| if (!attr.is_parsed_to_meta_item ()) |
| attr.parse_attr_to_meta_item (); |
| |
| // DEBUG |
| if (!attr.is_parsed_to_meta_item ()) |
| rust_debug ("failed to parse attr to meta item, right before " |
| "cfg predicate check"); |
| else |
| rust_debug ("attr has been successfully parsed to meta item, " |
| "right before cfg predicate check"); |
| |
| if (!attr.check_cfg_predicate (session)) |
| { |
| // DEBUG |
| rust_debug ( |
| "cfg predicate failed for attribute: \033[0;31m'%s'\033[0m", |
| attr.as_string ().c_str ()); |
| |
| return true; |
| } |
| else |
| { |
| // DEBUG |
| rust_debug ("cfg predicate succeeded for attribute: " |
| "\033[0;31m'%s'\033[0m", |
| attr.as_string ().c_str ()); |
| } |
| } |
| } |
| return false; |
| } |
| |
| // Expands cfg_attr attributes. |
| void |
| MacroExpander::expand_cfg_attrs (AST::AttrVec &attrs) |
| { |
| for (std::size_t i = 0; i < attrs.size (); i++) |
| { |
| auto &attr = attrs[i]; |
| if (attr.get_path () == "cfg_attr") |
| { |
| if (!attr.is_parsed_to_meta_item ()) |
| attr.parse_attr_to_meta_item (); |
| |
| if (attr.check_cfg_predicate (session)) |
| { |
| // split off cfg_attr |
| AST::AttrVec new_attrs = attr.separate_cfg_attrs (); |
| |
| // remove attr from vector |
| attrs.erase (attrs.begin () + i); |
| |
| // add new attrs to vector |
| attrs.insert (attrs.begin () + i, |
| std::make_move_iterator (new_attrs.begin ()), |
| std::make_move_iterator (new_attrs.end ())); |
| } |
| |
| /* do something - if feature (first token in tree) is in fact enabled, |
| * make tokens listed afterwards into attributes. i.e.: for |
| * [cfg_attr(feature = "wow", wow1, wow2)], if "wow" is true, then add |
| * attributes [wow1] and [wow2] to attribute list. This can also be |
| * recursive, so check for expanded attributes being recursive and |
| * possibly recursively call the expand_attrs? */ |
| } |
| else |
| { |
| i++; |
| } |
| } |
| attrs.shrink_to_fit (); |
| } |
| |
| void |
| MacroExpander::expand_crate () |
| { |
| NodeId scope_node_id = crate.get_node_id (); |
| resolver->get_macro_scope ().push (scope_node_id); |
| |
| /* fill macro/decorator map from init list? not sure where init list comes |
| * from? */ |
| |
| // TODO: does cfg apply for inner attributes? research. |
| // the apparent answer (from playground test) is yes |
| |
| // expand crate cfg_attr attributes |
| expand_cfg_attrs (crate.inner_attrs); |
| |
| if (fails_cfg_with_expand (crate.inner_attrs)) |
| { |
| // basically, delete whole crate |
| crate.strip_crate (); |
| // TODO: maybe create warning here? probably not desired behaviour |
| } |
| // expand module attributes? |
| |
| push_context (ITEM); |
| |
| // expand attributes recursively and strip items if required |
| AttrVisitor attr_visitor (*this); |
| auto &items = crate.items; |
| for (auto it = items.begin (); it != items.end ();) |
| { |
| auto &item = *it; |
| |
| // mark for stripping if required |
| item->accept_vis (attr_visitor); |
| |
| auto fragment = take_expanded_fragment (); |
| if (fragment.should_expand ()) |
| { |
| // Remove the current expanded invocation |
| it = items.erase (it); |
| for (auto &node : fragment.get_nodes ()) |
| { |
| it = items.insert (it, node.take_item ()); |
| it++; |
| } |
| } |
| else if (item->is_marked_for_strip ()) |
| it = items.erase (it); |
| else |
| it++; |
| } |
| |
| pop_context (); |
| |
| // TODO: should recursive attribute and macro expansion be done in the same |
| // transversal? Or in separate ones like currently? |
| |
| // expand module tree recursively |
| |
| // post-process |
| |
| // extract exported macros? |
| } |
| |
| bool |
| MacroExpander::depth_exceeds_recursion_limit () const |
| { |
| return expansion_depth >= cfg.recursion_limit; |
| } |
| |
| bool |
| MacroExpander::try_match_rule (AST::MacroRule &match_rule, |
| AST::DelimTokenTree &invoc_token_tree) |
| { |
| MacroInvocLexer lex (invoc_token_tree.to_token_stream ()); |
| Parser<MacroInvocLexer> parser (lex); |
| |
| AST::MacroMatcher &matcher = match_rule.get_matcher (); |
| |
| expansion_depth++; |
| if (!match_matcher (parser, matcher)) |
| { |
| expansion_depth--; |
| return false; |
| } |
| expansion_depth--; |
| |
| bool used_all_input_tokens = parser.skip_token (END_OF_FILE); |
| return used_all_input_tokens; |
| } |
| |
| bool |
| MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser, |
| AST::MacroMatchFragment &fragment) |
| { |
| switch (fragment.get_frag_spec ().get_kind ()) |
| { |
| case AST::MacroFragSpec::EXPR: |
| parser.parse_expr (); |
| break; |
| |
| case AST::MacroFragSpec::BLOCK: |
| parser.parse_block_expr (); |
| break; |
| |
| case AST::MacroFragSpec::IDENT: |
| parser.parse_identifier_pattern (); |
| break; |
| |
| case AST::MacroFragSpec::LITERAL: |
| parser.parse_literal_expr (); |
| break; |
| |
| case AST::MacroFragSpec::ITEM: |
| parser.parse_item (false); |
| break; |
| |
| case AST::MacroFragSpec::TY: |
| parser.parse_type (); |
| break; |
| |
| case AST::MacroFragSpec::PAT: |
| parser.parse_pattern (); |
| break; |
| |
| case AST::MacroFragSpec::PATH: |
| parser.parse_path_in_expression (); |
| break; |
| |
| case AST::MacroFragSpec::VIS: |
| parser.parse_visibility (); |
| break; |
| |
| case AST::MacroFragSpec::STMT: { |
| auto restrictions = ParseRestrictions (); |
| restrictions.consume_semi = false; |
| parser.parse_stmt (restrictions); |
| break; |
| } |
| |
| case AST::MacroFragSpec::LIFETIME: |
| parser.parse_lifetime_params (); |
| break; |
| |
| // is meta attributes? |
| case AST::MacroFragSpec::META: |
| parser.parse_attribute_body (); |
| break; |
| |
| case AST::MacroFragSpec::TT: |
| parser.parse_token_tree (); |
| break; |
| |
| // i guess we just ignore invalid and just error out |
| case AST::MacroFragSpec::INVALID: |
| return false; |
| } |
| |
| // it matches if the parser did not produce errors trying to parse that type |
| // of item |
| return !parser.has_errors (); |
| } |
| |
| bool |
| MacroExpander::match_matcher (Parser<MacroInvocLexer> &parser, |
| AST::MacroMatcher &matcher, bool in_repetition) |
| { |
| if (depth_exceeds_recursion_limit ()) |
| { |
| rust_error_at (matcher.get_match_locus (), "reached recursion limit"); |
| return false; |
| } |
| |
| auto delimiter = parser.peek_current_token (); |
| |
| // this is used so we can check that we delimit the stream correctly. |
| switch (delimiter->get_id ()) |
| { |
| case LEFT_PAREN: { |
| if (!parser.skip_token (LEFT_PAREN)) |
| return false; |
| } |
| break; |
| |
| case LEFT_SQUARE: { |
| if (!parser.skip_token (LEFT_SQUARE)) |
| return false; |
| } |
| break; |
| |
| case LEFT_CURLY: { |
| if (!parser.skip_token (LEFT_CURLY)) |
| return false; |
| } |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| const MacroInvocLexer &source = parser.get_token_source (); |
| |
| for (auto &match : matcher.get_matches ()) |
| { |
| size_t offs_begin = source.get_offs (); |
| |
| switch (match->get_macro_match_type ()) |
| { |
| case AST::MacroMatch::MacroMatchType::Fragment: { |
| AST::MacroMatchFragment *fragment |
| = static_cast<AST::MacroMatchFragment *> (match.get ()); |
| if (!match_fragment (parser, *fragment)) |
| return false; |
| |
| // matched fragment get the offset in the token stream |
| size_t offs_end = source.get_offs (); |
| if (in_repetition) |
| sub_stack.append_fragment ( |
| MatchedFragment (fragment->get_ident (), offs_begin, offs_end)); |
| else |
| sub_stack.insert_metavar ( |
| MatchedFragment (fragment->get_ident (), offs_begin, offs_end)); |
| } |
| break; |
| |
| case AST::MacroMatch::MacroMatchType::Tok: { |
| AST::Token *tok = static_cast<AST::Token *> (match.get ()); |
| if (!match_token (parser, *tok)) |
| return false; |
| } |
| break; |
| |
| case AST::MacroMatch::MacroMatchType::Repetition: { |
| AST::MacroMatchRepetition *rep |
| = static_cast<AST::MacroMatchRepetition *> (match.get ()); |
| if (!match_repetition (parser, *rep)) |
| return false; |
| } |
| break; |
| |
| case AST::MacroMatch::MacroMatchType::Matcher: { |
| AST::MacroMatcher *m |
| = static_cast<AST::MacroMatcher *> (match.get ()); |
| expansion_depth++; |
| if (!match_matcher (parser, *m, in_repetition)) |
| { |
| expansion_depth--; |
| return false; |
| } |
| expansion_depth--; |
| } |
| break; |
| } |
| } |
| |
| switch (delimiter->get_id ()) |
| { |
| case LEFT_PAREN: { |
| if (!parser.skip_token (RIGHT_PAREN)) |
| return false; |
| } |
| break; |
| |
| case LEFT_SQUARE: { |
| if (!parser.skip_token (RIGHT_SQUARE)) |
| return false; |
| } |
| break; |
| |
| case LEFT_CURLY: { |
| if (!parser.skip_token (RIGHT_CURLY)) |
| return false; |
| } |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| return true; |
| } |
| |
| bool |
| MacroExpander::match_token (Parser<MacroInvocLexer> &parser, AST::Token &token) |
| { |
| // FIXME this needs to actually match the content and the type |
| return parser.skip_token (token.get_id ()); |
| } |
| |
| bool |
| MacroExpander::match_n_matches (Parser<MacroInvocLexer> &parser, |
| AST::MacroMatchRepetition &rep, |
| size_t &match_amount, size_t lo_bound, |
| size_t hi_bound) |
| { |
| match_amount = 0; |
| auto &matches = rep.get_matches (); |
| |
| const MacroInvocLexer &source = parser.get_token_source (); |
| while (true) |
| { |
| // If the current token is a closing macro delimiter, break away. |
| // TODO: Is this correct? |
| auto t_id = parser.peek_current_token ()->get_id (); |
| if (t_id == RIGHT_PAREN || t_id == RIGHT_SQUARE || t_id == RIGHT_CURLY) |
| break; |
| |
| // Skip parsing a separator on the first match, otherwise consume it. |
| // If it isn't present, this is an error |
| if (rep.has_sep () && match_amount > 0) |
| if (!match_token (parser, *rep.get_sep ())) |
| break; |
| |
| bool valid_current_match = false; |
| for (auto &match : matches) |
| { |
| size_t offs_begin = source.get_offs (); |
| switch (match->get_macro_match_type ()) |
| { |
| case AST::MacroMatch::MacroMatchType::Fragment: { |
| AST::MacroMatchFragment *fragment |
| = static_cast<AST::MacroMatchFragment *> (match.get ()); |
| valid_current_match = match_fragment (parser, *fragment); |
| |
| // matched fragment get the offset in the token stream |
| size_t offs_end = source.get_offs (); |
| |
| // The main difference with match_matcher happens here: Instead |
| // of inserting a new fragment, we append to one. If that |
| // fragment does not exist, then the operation is similar to |
| // `insert_fragment` with the difference that we are not |
| // creating a metavariable, but a repetition of one, which is |
| // really different. |
| sub_stack.append_fragment ( |
| MatchedFragment (fragment->get_ident (), offs_begin, |
| offs_end)); |
| } |
| break; |
| |
| case AST::MacroMatch::MacroMatchType::Tok: { |
| AST::Token *tok = static_cast<AST::Token *> (match.get ()); |
| valid_current_match = match_token (parser, *tok); |
| } |
| break; |
| |
| case AST::MacroMatch::MacroMatchType::Repetition: { |
| AST::MacroMatchRepetition *rep |
| = static_cast<AST::MacroMatchRepetition *> (match.get ()); |
| valid_current_match = match_repetition (parser, *rep); |
| } |
| break; |
| |
| case AST::MacroMatch::MacroMatchType::Matcher: { |
| AST::MacroMatcher *m |
| = static_cast<AST::MacroMatcher *> (match.get ()); |
| valid_current_match = match_matcher (parser, *m, true); |
| } |
| break; |
| } |
| } |
| // If we've encountered an error once, stop trying to match more |
| // repetitions |
| if (!valid_current_match) |
| break; |
| |
| match_amount++; |
| |
| // Break early if we notice there's too many expressions already |
| if (hi_bound && match_amount > hi_bound) |
| break; |
| } |
| |
| // Check if the amount of matches we got is valid: Is it more than the lower |
| // bound and less than the higher bound? |
| bool did_meet_lo_bound = match_amount >= lo_bound; |
| bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true; |
| |
| // If the end-result is valid, then we can clear the parse errors: Since |
| // repetitions are parsed eagerly, it is okay to fail in some cases |
| auto res = did_meet_lo_bound && did_meet_hi_bound; |
| if (res) |
| parser.clear_errors (); |
| |
| return res; |
| } |
| |
| bool |
| MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser, |
| AST::MacroMatchRepetition &rep) |
| { |
| size_t match_amount = 0; |
| bool res = false; |
| |
| std::string lo_str; |
| std::string hi_str; |
| switch (rep.get_op ()) |
| { |
| case AST::MacroMatchRepetition::MacroRepOp::ANY: |
| lo_str = "0"; |
| hi_str = "+inf"; |
| res = match_n_matches (parser, rep, match_amount); |
| break; |
| case AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE: |
| lo_str = "1"; |
| hi_str = "+inf"; |
| res = match_n_matches (parser, rep, match_amount, 1); |
| break; |
| case AST::MacroMatchRepetition::MacroRepOp::ZERO_OR_ONE: |
| lo_str = "0"; |
| hi_str = "1"; |
| res = match_n_matches (parser, rep, match_amount, 0, 1); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (!res) |
| rust_error_at (rep.get_match_locus (), |
| "invalid amount of matches for macro invocation. Expected " |
| "between %s and %s, got %lu", |
| lo_str.c_str (), hi_str.c_str (), |
| (unsigned long) match_amount); |
| |
| rust_debug_loc (rep.get_match_locus (), "%s matched %lu times", |
| res ? "successfully" : "unsuccessfully", |
| (unsigned long) match_amount); |
| |
| // We have to handle zero fragments differently: They will not have been |
| // "matched" but they are still valid and should be inserted as a special |
| // case. So we go through the stack map, and for every fragment which doesn't |
| // exist, insert a zero-matched fragment. |
| auto &stack_map = sub_stack.peek (); |
| for (auto &match : rep.get_matches ()) |
| { |
| if (match->get_macro_match_type () |
| == AST::MacroMatch::MacroMatchType::Fragment) |
| { |
| auto fragment = static_cast<AST::MacroMatchFragment *> (match.get ()); |
| auto it = stack_map.find (fragment->get_ident ()); |
| |
| if (it == stack_map.end ()) |
| sub_stack.insert_matches (fragment->get_ident (), |
| MatchedFragmentContainer::zero ()); |
| } |
| } |
| |
| return res; |
| } |
| |
| /** |
| * Helper function to refactor calling a parsing function 0 or more times |
| */ |
| static AST::Fragment |
| parse_many (Parser<MacroInvocLexer> &parser, TokenId &delimiter, |
| std::function<AST::SingleASTNode ()> parse_fn) |
| { |
| auto &lexer = parser.get_token_source (); |
| auto start = lexer.get_offs (); |
| |
| std::vector<AST::SingleASTNode> nodes; |
| while (true) |
| { |
| if (parser.peek_current_token ()->get_id () == delimiter) |
| break; |
| |
| auto node = parse_fn (); |
| if (node.is_error ()) |
| { |
| for (auto err : parser.get_errors ()) |
| err.emit (); |
| |
| return AST::Fragment::create_error (); |
| } |
| |
| nodes.emplace_back (std::move (node)); |
| } |
| auto end = lexer.get_offs (); |
| |
| return AST::Fragment (std::move (nodes), lexer.get_token_slice (start, end)); |
| } |
| |
| /** |
| * Transcribe 0 or more items from a macro invocation |
| * |
| * @param parser Parser to extract items from |
| * @param delimiter Id of the token on which parsing should stop |
| */ |
| static AST::Fragment |
| transcribe_many_items (Parser<MacroInvocLexer> &parser, TokenId &delimiter) |
| { |
| return parse_many (parser, delimiter, [&parser] () { |
| auto item = parser.parse_item (true); |
| return AST::SingleASTNode (std::move (item)); |
| }); |
| } |
| |
| /** |
| * Transcribe 0 or more external items from a macro invocation |
| * |
| * @param parser Parser to extract items from |
| * @param delimiter Id of the token on which parsing should stop |
| */ |
| static AST::Fragment |
| transcribe_many_ext (Parser<MacroInvocLexer> &parser, TokenId &delimiter) |
| { |
| return parse_many (parser, delimiter, [&parser] () { |
| auto item = parser.parse_external_item (); |
| return AST::SingleASTNode (std::move (item)); |
| }); |
| } |
| |
| /** |
| * Transcribe 0 or more trait items from a macro invocation |
| * |
| * @param parser Parser to extract items from |
| * @param delimiter Id of the token on which parsing should stop |
| */ |
| static AST::Fragment |
| transcribe_many_trait_items (Parser<MacroInvocLexer> &parser, |
| TokenId &delimiter) |
| { |
| return parse_many (parser, delimiter, [&parser] () { |
| auto item = parser.parse_trait_item (); |
| return AST::SingleASTNode (std::move (item)); |
| }); |
| } |
| |
| /** |
| * Transcribe 0 or more impl items from a macro invocation |
| * |
| * @param parser Parser to extract items from |
| * @param delimiter Id of the token on which parsing should stop |
| */ |
| static AST::Fragment |
| transcribe_many_impl_items (Parser<MacroInvocLexer> &parser, TokenId &delimiter) |
| { |
| return parse_many (parser, delimiter, [&parser] () { |
| auto item = parser.parse_inherent_impl_item (); |
| return AST::SingleASTNode (std::move (item)); |
| }); |
| } |
| |
| /** |
| * Transcribe 0 or more trait impl items from a macro invocation |
| * |
| * @param parser Parser to extract items from |
| * @param delimiter Id of the token on which parsing should stop |
| */ |
| static AST::Fragment |
| transcribe_many_trait_impl_items (Parser<MacroInvocLexer> &parser, |
| TokenId &delimiter) |
| { |
| return parse_many (parser, delimiter, [&parser] () { |
| auto item = parser.parse_trait_impl_item (); |
| return AST::SingleASTNode (std::move (item)); |
| }); |
| } |
| |
| /** |
| * Transcribe 0 or more statements from a macro invocation |
| * |
| * @param parser Parser to extract statements from |
| * @param delimiter Id of the token on which parsing should stop |
| */ |
| static AST::Fragment |
| transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter) |
| { |
| auto restrictions = ParseRestrictions (); |
| restrictions.consume_semi = false; |
| |
| // FIXME: This is invalid! It needs to also handle cases where the macro |
| // transcriber is an expression, but since the macro call is followed by |
| // a semicolon, it's a valid ExprStmt |
| return parse_many (parser, delimiter, [&parser, restrictions] () { |
| auto stmt = parser.parse_stmt (restrictions); |
| return AST::SingleASTNode (std::move (stmt)); |
| }); |
| } |
| |
| /** |
| * Transcribe one expression from a macro invocation |
| * |
| * @param parser Parser to extract statements from |
| */ |
| static AST::Fragment |
| transcribe_expression (Parser<MacroInvocLexer> &parser) |
| { |
| auto &lexer = parser.get_token_source (); |
| auto start = lexer.get_offs (); |
| |
| auto expr = parser.parse_expr (); |
| if (expr == nullptr) |
| return AST::Fragment::create_error (); |
| |
| auto end = lexer.get_offs (); |
| |
| return AST::Fragment ({std::move (expr)}, lexer.get_token_slice (start, end)); |
| } |
| |
| /** |
| * Transcribe one type from a macro invocation |
| * |
| * @param parser Parser to extract statements from |
| */ |
| static AST::Fragment |
| transcribe_type (Parser<MacroInvocLexer> &parser) |
| { |
| auto &lexer = parser.get_token_source (); |
| auto start = lexer.get_offs (); |
| |
| auto type = parser.parse_type (true); |
| for (auto err : parser.get_errors ()) |
| err.emit (); |
| |
| auto end = lexer.get_offs (); |
| |
| return AST::Fragment ({std::move (type)}, lexer.get_token_slice (start, end)); |
| } |
| |
| static AST::Fragment |
| transcribe_on_delimiter (Parser<MacroInvocLexer> &parser, bool semicolon, |
| AST::DelimType delimiter, TokenId last_token_id) |
| { |
| if (semicolon || delimiter == AST::DelimType::CURLY) |
| return transcribe_many_stmts (parser, last_token_id); |
| else |
| return transcribe_expression (parser); |
| } // namespace Rust |
| |
| static AST::Fragment |
| transcribe_context (MacroExpander::ContextType ctx, |
| Parser<MacroInvocLexer> &parser, bool semicolon, |
| AST::DelimType delimiter, TokenId last_token_id) |
| { |
| // The flow-chart in order to choose a parsing function is as follows: |
| // |
| // [switch special context] |
| // -- Item --> parser.parse_item(); |
| // -- Trait --> parser.parse_trait_item(); |
| // -- Impl --> parser.parse_impl_item(); |
| // -- Extern --> parser.parse_extern_item(); |
| // -- None --> [has semicolon?] |
| // -- Yes --> parser.parse_stmt(); |
| // -- No --> [switch invocation.delimiter()] |
| // -- { } --> parser.parse_stmt(); |
| // -- _ --> parser.parse_expr(); // once! |
| |
| // If there is a semicolon OR we are expanding a MacroInvocationSemi, then |
| // we can parse multiple items. Otherwise, parse *one* expression |
| |
| switch (ctx) |
| { |
| case MacroExpander::ContextType::ITEM: |
| return transcribe_many_items (parser, last_token_id); |
| break; |
| case MacroExpander::ContextType::TRAIT: |
| return transcribe_many_trait_items (parser, last_token_id); |
| break; |
| case MacroExpander::ContextType::IMPL: |
| return transcribe_many_impl_items (parser, last_token_id); |
| break; |
| case MacroExpander::ContextType::TRAIT_IMPL: |
| return transcribe_many_trait_impl_items (parser, last_token_id); |
| break; |
| case MacroExpander::ContextType::EXTERN: |
| return transcribe_many_ext (parser, last_token_id); |
| break; |
| case MacroExpander::ContextType::TYPE: |
| return transcribe_type (parser); |
| break; |
| default: |
| return transcribe_on_delimiter (parser, semicolon, delimiter, |
| last_token_id); |
| } |
| } |
| |
| static std::string |
| tokens_to_str (std::vector<std::unique_ptr<AST::Token>> &tokens) |
| { |
| std::string str; |
| if (!tokens.empty ()) |
| { |
| str += tokens[0]->as_string (); |
| for (size_t i = 1; i < tokens.size (); i++) |
| str += " " + tokens[i]->as_string (); |
| } |
| |
| return str; |
| } |
| |
| AST::Fragment |
| MacroExpander::transcribe_rule ( |
| AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree, |
| std::map<std::string, MatchedFragmentContainer> &matched_fragments, |
| bool semicolon, ContextType ctx) |
| { |
| // we can manipulate the token tree to substitute the dollar identifiers so |
| // that when we call parse its already substituted for us |
| AST::MacroTranscriber &transcriber = match_rule.get_transcriber (); |
| AST::DelimTokenTree &transcribe_tree = transcriber.get_token_tree (); |
| |
| auto invoc_stream = invoc_token_tree.to_token_stream (); |
| auto macro_rule_tokens = transcribe_tree.to_token_stream (); |
| |
| auto substitute_context |
| = SubstituteCtx (invoc_stream, macro_rule_tokens, matched_fragments); |
| std::vector<std::unique_ptr<AST::Token>> substituted_tokens |
| = substitute_context.substitute_tokens (); |
| |
| rust_debug ("substituted tokens: %s", |
| tokens_to_str (substituted_tokens).c_str ()); |
| |
| // parse it to an Fragment |
| MacroInvocLexer lex (std::move (substituted_tokens)); |
| Parser<MacroInvocLexer> parser (lex); |
| |
| auto last_token_id = TokenId::RIGHT_CURLY; |
| |
| // this is used so we can check that we delimit the stream correctly. |
| switch (transcribe_tree.get_delim_type ()) |
| { |
| case AST::DelimType::PARENS: |
| last_token_id = TokenId::RIGHT_PAREN; |
| rust_assert (parser.skip_token (LEFT_PAREN)); |
| break; |
| |
| case AST::DelimType::CURLY: |
| rust_assert (parser.skip_token (LEFT_CURLY)); |
| break; |
| |
| case AST::DelimType::SQUARE: |
| last_token_id = TokenId::RIGHT_SQUARE; |
| rust_assert (parser.skip_token (LEFT_SQUARE)); |
| break; |
| } |
| |
| // see https://github.com/Rust-GCC/gccrs/issues/22 |
| // TL;DR: |
| // - Treat all macro invocations with parentheses, (), or square brackets, |
| // [], as expressions. |
| // - If the macro invocation has curly brackets, {}, it may be parsed as a |
| // statement depending on the context. |
| // - If the macro invocation has a semicolon at the end, it must be parsed |
| // as a statement (either via ExpressionStatement or |
| // MacroInvocationWithSemi) |
| |
| auto fragment |
| = transcribe_context (ctx, parser, semicolon, |
| invoc_token_tree.get_delim_type (), last_token_id); |
| |
| // emit any errors |
| if (parser.has_errors ()) |
| { |
| for (auto &err : parser.get_errors ()) |
| rust_error_at (err.locus, "%s", err.message.c_str ()); |
| return AST::Fragment::create_error (); |
| } |
| |
| // are all the tokens used? |
| bool did_delimit = parser.skip_token (last_token_id); |
| |
| bool reached_end_of_stream = did_delimit && parser.skip_token (END_OF_FILE); |
| if (!reached_end_of_stream) |
| { |
| const_TokenPtr current_token = parser.peek_current_token (); |
| rust_error_at (current_token->get_locus (), |
| "tokens here and after are unparsed"); |
| } |
| |
| return fragment; |
| } |
| } // namespace Rust |