| // class template regex -*- C++ -*- |
| |
| // Copyright (C) 2013-2022 Free Software Foundation, Inc. |
| // |
| // This file is part of the GNU ISO C++ Library. This library is free |
| // software; you can redistribute it and/or modify it under the |
| // terms of the GNU General Public License as published by the |
| // Free Software Foundation; either version 3, or (at your option) |
| // any later version. |
| |
| // This library is distributed in the hope that it will be useful, |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| // GNU General Public License for more details. |
| |
| // Under Section 7 of GPL version 3, you are granted additional |
| // permissions described in the GCC Runtime Library Exception, version |
| // 3.1, as published by the Free Software Foundation. |
| |
| // You should have received a copy of the GNU General Public License and |
| // a copy of the GCC Runtime Library Exception along with this program; |
| // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| // <http://www.gnu.org/licenses/>. |
| |
| /** |
| * @file bits/regex_scanner.h |
| * This is an internal header file, included by other library headers. |
| * Do not attempt to use it directly. @headername{regex} |
| */ |
| |
| namespace std _GLIBCXX_VISIBILITY(default) |
| { |
| _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| |
| namespace __detail |
| { |
| /** |
| * @addtogroup regex-detail |
| * @{ |
| */ |
| |
| struct _ScannerBase |
| { |
| public: |
| /// Token types returned from the scanner. |
| enum _TokenT : unsigned |
| { |
| _S_token_anychar, |
| _S_token_ord_char, |
| _S_token_oct_num, |
| _S_token_hex_num, |
| _S_token_backref, |
| _S_token_subexpr_begin, |
| _S_token_subexpr_no_group_begin, |
| _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n' |
| _S_token_subexpr_end, |
| _S_token_bracket_begin, |
| _S_token_bracket_neg_begin, |
| _S_token_bracket_end, |
| _S_token_interval_begin, |
| _S_token_interval_end, |
| _S_token_quoted_class, |
| _S_token_char_class_name, |
| _S_token_collsymbol, |
| _S_token_equiv_class_name, |
| _S_token_opt, |
| _S_token_or, |
| _S_token_closure0, |
| _S_token_closure1, |
| _S_token_line_begin, |
| _S_token_line_end, |
| _S_token_word_bound, // neg if _M_value[0] == 'n' |
| _S_token_comma, |
| _S_token_dup_count, |
| _S_token_eof, |
| _S_token_bracket_dash, |
| _S_token_unknown = -1u |
| }; |
| |
| protected: |
| typedef regex_constants::syntax_option_type _FlagT; |
| |
| enum _StateT |
| { |
| _S_state_normal, |
| _S_state_in_brace, |
| _S_state_in_bracket, |
| }; |
| |
| protected: |
| _ScannerBase(_FlagT __flags) |
| : _M_state(_S_state_normal), |
| _M_flags(__flags), |
| _M_escape_tbl(_M_is_ecma() |
| ? _M_ecma_escape_tbl |
| : _M_awk_escape_tbl), |
| _M_spec_char(_M_is_ecma() |
| ? _M_ecma_spec_char |
| : _M_flags & regex_constants::basic |
| ? _M_basic_spec_char |
| : _M_flags & regex_constants::extended |
| ? _M_extended_spec_char |
| : _M_flags & regex_constants::grep |
| ? ".[\\*^$\n" |
| : _M_flags & regex_constants::egrep |
| ? ".[\\()*+?{|^$\n" |
| : _M_flags & regex_constants::awk |
| ? _M_extended_spec_char |
| : nullptr), |
| _M_at_bracket_start(false) |
| { __glibcxx_assert(_M_spec_char); } |
| |
| protected: |
| const char* |
| _M_find_escape(char __c) |
| { |
| auto __it = _M_escape_tbl; |
| for (; __it->first != '\0'; ++__it) |
| if (__it->first == __c) |
| return &__it->second; |
| return nullptr; |
| } |
| |
| bool |
| _M_is_ecma() const |
| { return _M_flags & regex_constants::ECMAScript; } |
| |
| bool |
| _M_is_basic() const |
| { return _M_flags & (regex_constants::basic | regex_constants::grep); } |
| |
| bool |
| _M_is_extended() const |
| { |
| return _M_flags & (regex_constants::extended |
| | regex_constants::egrep |
| | regex_constants::awk); |
| } |
| |
| bool |
| _M_is_grep() const |
| { return _M_flags & (regex_constants::grep | regex_constants::egrep); } |
| |
| bool |
| _M_is_awk() const |
| { return _M_flags & regex_constants::awk; } |
| |
| protected: |
| // TODO: Make them static in the next abi change. |
| const std::pair<char, _TokenT> _M_token_tbl[9] = |
| { |
| {'^', _S_token_line_begin}, |
| {'$', _S_token_line_end}, |
| {'.', _S_token_anychar}, |
| {'*', _S_token_closure0}, |
| {'+', _S_token_closure1}, |
| {'?', _S_token_opt}, |
| {'|', _S_token_or}, |
| {'\n', _S_token_or}, // grep and egrep |
| {'\0', _S_token_or}, |
| }; |
| const std::pair<char, char> _M_ecma_escape_tbl[8] = |
| { |
| {'0', '\0'}, |
| {'b', '\b'}, |
| {'f', '\f'}, |
| {'n', '\n'}, |
| {'r', '\r'}, |
| {'t', '\t'}, |
| {'v', '\v'}, |
| {'\0', '\0'}, |
| }; |
| const std::pair<char, char> _M_awk_escape_tbl[11] = |
| { |
| {'"', '"'}, |
| {'/', '/'}, |
| {'\\', '\\'}, |
| {'a', '\a'}, |
| {'b', '\b'}, |
| {'f', '\f'}, |
| {'n', '\n'}, |
| {'r', '\r'}, |
| {'t', '\t'}, |
| {'v', '\v'}, |
| {'\0', '\0'}, |
| }; |
| const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|"; |
| const char* _M_basic_spec_char = ".[\\*^$"; |
| const char* _M_extended_spec_char = ".[\\()*+?{|^$"; |
| |
| _StateT _M_state; |
| _FlagT _M_flags; |
| _TokenT _M_token; |
| const std::pair<char, char>* _M_escape_tbl; |
| const char* _M_spec_char; |
| bool _M_at_bracket_start; |
| }; |
| |
| /** |
| * @brief Scans an input range for regex tokens. |
| * |
| * The %_Scanner class interprets the regular expression pattern in |
| * the input range passed to its constructor as a sequence of parse |
| * tokens passed to the regular expression compiler. The sequence |
| * of tokens provided depends on the flag settings passed to the |
| * constructor: different regular expression grammars will interpret |
| * the same input pattern in syntactically different ways. |
| */ |
| template<typename _CharT> |
| class _Scanner |
| : public _ScannerBase |
| { |
| public: |
| typedef std::basic_string<_CharT> _StringT; |
| typedef regex_constants::syntax_option_type _FlagT; |
| typedef const std::ctype<_CharT> _CtypeT; |
| |
| _Scanner(const _CharT* __begin, const _CharT* __end, |
| _FlagT __flags, std::locale __loc); |
| |
| void |
| _M_advance(); |
| |
| _TokenT |
| _M_get_token() const noexcept |
| { return _M_token; } |
| |
| const _StringT& |
| _M_get_value() const noexcept |
| { return _M_value; } |
| |
| #ifdef _GLIBCXX_DEBUG |
| std::ostream& |
| _M_print(std::ostream&); |
| #endif |
| |
| private: |
| void |
| _M_scan_normal(); |
| |
| void |
| _M_scan_in_bracket(); |
| |
| void |
| _M_scan_in_brace(); |
| |
| void |
| _M_eat_escape_ecma(); |
| |
| void |
| _M_eat_escape_posix(); |
| |
| void |
| _M_eat_escape_awk(); |
| |
| void |
| _M_eat_class(char); |
| |
| const _CharT* _M_current; |
| const _CharT* _M_end; |
| _CtypeT& _M_ctype; |
| _StringT _M_value; |
| void (_Scanner::* _M_eat_escape)(); |
| }; |
| |
| ///@} regex-detail |
| } // namespace __detail |
| _GLIBCXX_END_NAMESPACE_VERSION |
| } // namespace std |
| |
| #include <bits/regex_scanner.tcc> |