| // class template regex -*- C++ -*- |
| |
| // Copyright (C) 2013-2021 Free Software Foundation, Inc. |
| // |
| // This file is part of the GNU ISO C++ Library. This library is free |
| // software; you can redistribute it and/or modify it under the |
| // terms of the GNU General Public License as published by the |
| // Free Software Foundation; either version 3, or (at your option) |
| // any later version. |
| |
| // This library is distributed in the hope that it will be useful, |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| // GNU General Public License for more details. |
| |
| // Under Section 7 of GPL version 3, you are granted additional |
| // permissions described in the GCC Runtime Library Exception, version |
| // 3.1, as published by the Free Software Foundation. |
| |
| // You should have received a copy of the GNU General Public License and |
| // a copy of the GCC Runtime Library Exception along with this program; |
| // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| // <http://www.gnu.org/licenses/>. |
| |
| /** |
| * @file bits/regex_executor.h |
| * This is an internal header file, included by other library headers. |
| * Do not attempt to use it directly. @headername{regex} |
| */ |
| |
| // FIXME convert comments to doxygen format. |
| |
| namespace std _GLIBCXX_VISIBILITY(default) |
| { |
| _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| |
| namespace __detail |
| { |
| /** |
| * @addtogroup regex-detail |
| * @{ |
| */ |
| |
| /** |
| * @brief Takes a regex and an input string and does the matching. |
| * |
| * The %_Executor class has two modes: DFS mode and BFS mode, controlled |
| * by the template parameter %__dfs_mode. |
| */ |
| template<typename _BiIter, typename _Alloc, typename _TraitsT, |
| bool __dfs_mode> |
| class _Executor |
| { |
| using __search_mode = integral_constant<bool, __dfs_mode>; |
| using __dfs = true_type; |
| using __bfs = false_type; |
| |
| enum class _Match_mode : unsigned char { _Exact, _Prefix }; |
| |
| public: |
| typedef typename iterator_traits<_BiIter>::value_type _CharT; |
| typedef basic_regex<_CharT, _TraitsT> _RegexT; |
| typedef _GLIBCXX_STD_C::vector<sub_match<_BiIter>, _Alloc> _ResultsVec; |
| typedef regex_constants::match_flag_type _FlagT; |
| typedef typename _TraitsT::char_class_type _ClassT; |
| typedef _NFA<_TraitsT> _NFAT; |
| |
| public: |
| _Executor(_BiIter __begin, |
| _BiIter __end, |
| _ResultsVec& __results, |
| const _RegexT& __re, |
| _FlagT __flags) |
| : _M_begin(__begin), |
| _M_end(__end), |
| _M_re(__re), |
| _M_nfa(*__re._M_automaton), |
| _M_results(__results), |
| _M_rep_count(_M_nfa.size()), |
| _M_states(_M_nfa._M_start(), _M_nfa.size()), |
| _M_flags(__flags) |
| { |
| using namespace regex_constants; |
| if (__flags & match_prev_avail) // ignore not_bol and not_bow |
| _M_flags &= ~(match_not_bol | match_not_bow); |
| } |
| |
| // Set matched when string exactly matches the pattern. |
| bool |
| _M_match() |
| { |
| _M_current = _M_begin; |
| return _M_main(_Match_mode::_Exact); |
| } |
| |
| // Set matched when some prefix of the string matches the pattern. |
| bool |
| _M_search_from_first() |
| { |
| _M_current = _M_begin; |
| return _M_main(_Match_mode::_Prefix); |
| } |
| |
| bool |
| _M_search(); |
| |
| private: |
| void |
| _M_rep_once_more(_Match_mode __match_mode, _StateIdT); |
| |
| void |
| _M_handle_repeat(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_subexpr_begin(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_subexpr_end(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_line_begin_assertion(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_line_end_assertion(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_word_boundary(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_subexpr_lookahead(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_match(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_backref(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_accept(_Match_mode, _StateIdT); |
| |
| void |
| _M_handle_alternative(_Match_mode, _StateIdT); |
| |
| void |
| _M_dfs(_Match_mode __match_mode, _StateIdT __start); |
| |
| bool |
| _M_main(_Match_mode __match_mode) |
| { return _M_main_dispatch(__match_mode, __search_mode{}); } |
| |
| bool |
| _M_main_dispatch(_Match_mode __match_mode, __dfs); |
| |
| bool |
| _M_main_dispatch(_Match_mode __match_mode, __bfs); |
| |
| bool |
| _M_is_word(_CharT __ch) const |
| { |
| static const _CharT __s[2] = { 'w' }; |
| return _M_re._M_automaton->_M_traits.isctype |
| (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1)); |
| } |
| |
| bool |
| _M_at_begin() const |
| { |
| if (_M_current == _M_begin) |
| { |
| // match_not_bol means ^ does not match [_M_begin,_M_begin) |
| if (_M_flags & regex_constants::match_not_bol) |
| return false; |
| // match_prev_avail means _M_begin is not the start of the input. |
| if (_M_flags & regex_constants::match_prev_avail) |
| { |
| // For ECMAScript multiline matches, check if the previous |
| // character is a line terminator. |
| if (_M_match_multiline()) |
| return _M_is_line_terminator(*std::prev(_M_current)); |
| else |
| return false; |
| } |
| else // ^ matches at _M_begin |
| return true; |
| } |
| else if (_M_match_multiline()) |
| return _M_is_line_terminator(*std::prev(_M_current)); |
| else |
| return false; |
| } |
| |
| bool |
| _M_at_end() const |
| { |
| if (_M_current == _M_end) |
| return !(_M_flags & regex_constants::match_not_eol); |
| else if (_M_match_multiline()) |
| return _M_is_line_terminator(*_M_current); |
| else |
| return false; |
| } |
| |
| bool |
| _M_word_boundary() const; |
| |
| bool |
| _M_lookahead(_StateIdT __next); |
| |
| bool |
| _M_is_line_terminator(_CharT __c) const |
| { |
| const auto& __traits = _M_re._M_automaton->_M_traits; |
| const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc()); |
| const char __n{ __ct.narrow(__c, ' ') }; |
| if (__n == '\n') |
| return true; |
| if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript) |
| { |
| if (__n == '\r') |
| return true; |
| // FIXME: U+2028 (line separator) and U+2029 (paragraph separator) |
| } |
| return false; |
| } |
| |
| bool |
| _M_match_multiline() const noexcept |
| { |
| constexpr auto __m |
| = regex_constants::ECMAScript | regex_constants::__multiline; |
| return (_M_re._M_automaton->_M_options() & __m) == __m; |
| } |
| |
| // Holds additional information used in BFS-mode. |
| template<typename _SearchMode, typename _ResultsVec> |
| struct _State_info; |
| |
| template<typename _ResultsVec> |
| struct _State_info<__bfs, _ResultsVec> |
| { |
| explicit |
| _State_info(_StateIdT __start, size_t __n) |
| : _M_visited_states(new bool[__n]()), _M_start(__start) |
| { } |
| |
| ~_State_info() { delete[] _M_visited_states; } |
| |
| _State_info(const _State_info&) = delete; |
| _State_info& operator=(const _State_info&) = delete; |
| |
| bool _M_visited(_StateIdT __i) |
| { |
| if (_M_visited_states[__i]) |
| return true; |
| _M_visited_states[__i] = true; |
| return false; |
| } |
| |
| void _M_queue(_StateIdT __i, const _ResultsVec& __res) |
| { _M_match_queue.emplace_back(__i, __res); } |
| |
| // Dummy implementations for BFS mode. |
| _BiIter* _M_get_sol_pos() { return nullptr; } |
| |
| // Saves states that need to be considered for the next character. |
| _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue; |
| // Indicates which states are already visited. |
| bool* _M_visited_states; |
| // To record current solution. |
| _StateIdT _M_start; |
| }; |
| |
| template<typename _ResultsVec> |
| struct _State_info<__dfs, _ResultsVec> |
| { |
| explicit |
| _State_info(_StateIdT __start, size_t) : _M_start(__start) |
| { } |
| |
| // Dummy implementations for DFS mode. |
| bool _M_visited(_StateIdT) const { return false; } |
| void _M_queue(_StateIdT, const _ResultsVec&) { } |
| |
| _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } |
| |
| // To record current solution. |
| _StateIdT _M_start; |
| _BiIter _M_sol_pos; |
| }; |
| |
| public: |
| _ResultsVec _M_cur_results; |
| _BiIter _M_current; |
| _BiIter _M_begin; |
| const _BiIter _M_end; |
| const _RegexT& _M_re; |
| const _NFAT& _M_nfa; |
| _ResultsVec& _M_results; |
| _GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count; |
| _State_info<__search_mode, _ResultsVec> _M_states; |
| _FlagT _M_flags; |
| // Do we have a solution so far? |
| bool _M_has_sol; |
| }; |
| |
| ///@} regex-detail |
| } // namespace __detail |
| _GLIBCXX_END_NAMESPACE_VERSION |
| } // namespace std |
| |
| #include <bits/regex_executor.tcc> |