| // <barrier> -*- C++ -*- |
| |
| // Copyright (C) 2020-2021 Free Software Foundation, Inc. |
| // |
| // This file is part of the GNU ISO C++ Library. This library is free |
| // software; you can redistribute it and/or modify it under the |
| // terms of the GNU General Public License as published by the |
| // Free Software Foundation; either version 3, or (at your option) |
| // any later version. |
| |
| // This library is distributed in the hope that it will be useful, |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| // GNU General Public License for more details. |
| |
| // You should have received a copy of the GNU General Public License along |
| // with this library; see the file COPYING3. If not see |
| // <http://www.gnu.org/licenses/>. |
| |
| // This implementation is based on libcxx/include/barrier |
| //===-- barrier.h --------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===---------------------------------------------------------------===// |
| |
| /** @file include/barrier |
| * This is a Standard C++ Library header. |
| */ |
| |
| #ifndef _GLIBCXX_BARRIER |
| #define _GLIBCXX_BARRIER 1 |
| |
| #pragma GCC system_header |
| |
| #if __cplusplus > 201703L |
| #include <bits/atomic_base.h> |
| #if __cpp_lib_atomic_wait && __cpp_aligned_new |
| #include <bits/std_thread.h> |
| #include <bits/unique_ptr.h> |
| |
| #include <array> |
| |
| #define __cpp_lib_barrier 201907L |
| |
| namespace std _GLIBCXX_VISIBILITY(default) |
| { |
| _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| |
| struct __empty_completion |
| { |
| _GLIBCXX_ALWAYS_INLINE void |
| operator()() noexcept |
| { } |
| }; |
| |
| /* |
| |
| The default implementation of __tree_barrier is a classic tree barrier. |
| |
| It looks different from literature pseudocode for two main reasons: |
| 1. Threads that call into std::barrier functions do not provide indices, |
| so a numbering step is added before the actual barrier algorithm, |
| appearing as an N+1 round to the N rounds of the tree barrier. |
| 2. A great deal of attention has been paid to avoid cache line thrashing |
| by flattening the tree structure into cache-line sized arrays, that |
| are indexed in an efficient way. |
| |
| */ |
| |
| enum class __barrier_phase_t : unsigned char { }; |
| |
| template<typename _CompletionF> |
| class __tree_barrier |
| { |
| using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>; |
| using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>; |
| static constexpr auto __phase_alignment = |
| __atomic_phase_ref_t::required_alignment; |
| |
| using __tickets_t = std::array<__barrier_phase_t, 64>; |
| struct alignas(64) /* naturally-align the heap state */ __state_t |
| { |
| alignas(__phase_alignment) __tickets_t __tickets; |
| }; |
| |
| ptrdiff_t _M_expected; |
| unique_ptr<__state_t[]> _M_state; |
| __atomic_base<ptrdiff_t> _M_expected_adjustment; |
| _CompletionF _M_completion; |
| |
| alignas(__phase_alignment) __barrier_phase_t _M_phase; |
| |
| bool |
| _M_arrive(__barrier_phase_t __old_phase, size_t __current) |
| { |
| const auto __old_phase_val = static_cast<unsigned char>(__old_phase); |
| const auto __half_step = |
| static_cast<__barrier_phase_t>(__old_phase_val + 1); |
| const auto __full_step = |
| static_cast<__barrier_phase_t>(__old_phase_val + 2); |
| |
| size_t __current_expected = _M_expected; |
| __current %= ((_M_expected + 1) >> 1); |
| |
| for (int __round = 0; ; ++__round) |
| { |
| if (__current_expected <= 1) |
| return true; |
| size_t const __end_node = ((__current_expected + 1) >> 1), |
| __last_node = __end_node - 1; |
| for ( ; ; ++__current) |
| { |
| if (__current == __end_node) |
| __current = 0; |
| auto __expect = __old_phase; |
| __atomic_phase_ref_t __phase(_M_state[__current] |
| .__tickets[__round]); |
| if (__current == __last_node && (__current_expected & 1)) |
| { |
| if (__phase.compare_exchange_strong(__expect, __full_step, |
| memory_order_acq_rel)) |
| break; // I'm 1 in 1, go to next __round |
| } |
| else if (__phase.compare_exchange_strong(__expect, __half_step, |
| memory_order_acq_rel)) |
| { |
| return false; // I'm 1 in 2, done with arrival |
| } |
| else if (__expect == __half_step) |
| { |
| if (__phase.compare_exchange_strong(__expect, __full_step, |
| memory_order_acq_rel)) |
| break; // I'm 2 in 2, go to next __round |
| } |
| } |
| __current_expected = __last_node + 1; |
| __current >>= 1; |
| } |
| } |
| |
| public: |
| using arrival_token = __barrier_phase_t; |
| |
| static constexpr ptrdiff_t |
| max() noexcept |
| { return __PTRDIFF_MAX__; } |
| |
| __tree_barrier(ptrdiff_t __expected, _CompletionF __completion) |
| : _M_expected(__expected), _M_expected_adjustment(0), |
| _M_completion(move(__completion)), |
| _M_phase(static_cast<__barrier_phase_t>(0)) |
| { |
| size_t const __count = (_M_expected + 1) >> 1; |
| |
| _M_state = std::make_unique<__state_t[]>(__count); |
| } |
| |
| [[nodiscard]] arrival_token |
| arrive(ptrdiff_t __update) |
| { |
| std::hash<std::thread::id> __hasher; |
| size_t __current = __hasher(std::this_thread::get_id()); |
| __atomic_phase_ref_t __phase(_M_phase); |
| const auto __old_phase = __phase.load(memory_order_relaxed); |
| const auto __cur = static_cast<unsigned char>(__old_phase); |
| for(; __update; --__update) |
| { |
| if(_M_arrive(__old_phase, __current)) |
| { |
| _M_completion(); |
| _M_expected += _M_expected_adjustment.load(memory_order_relaxed); |
| _M_expected_adjustment.store(0, memory_order_relaxed); |
| auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2); |
| __phase.store(__new_phase, memory_order_release); |
| __phase.notify_all(); |
| } |
| } |
| return __old_phase; |
| } |
| |
| void |
| wait(arrival_token&& __old_phase) const |
| { |
| __atomic_phase_const_ref_t __phase(_M_phase); |
| auto const __test_fn = [=] |
| { |
| return __phase.load(memory_order_acquire) != __old_phase; |
| }; |
| std::__atomic_wait_address(&_M_phase, __test_fn); |
| } |
| |
| void |
| arrive_and_drop() |
| { |
| _M_expected_adjustment.fetch_sub(1, memory_order_relaxed); |
| (void)arrive(1); |
| } |
| }; |
| |
| template<typename _CompletionF = __empty_completion> |
| class barrier |
| { |
| // Note, we may introduce a "central" barrier algorithm at some point |
| // for more space constrained targets |
| using __algorithm_t = __tree_barrier<_CompletionF>; |
| __algorithm_t _M_b; |
| |
| public: |
| class arrival_token final |
| { |
| public: |
| arrival_token(arrival_token&&) = default; |
| arrival_token& operator=(arrival_token&&) = default; |
| ~arrival_token() = default; |
| |
| private: |
| friend class barrier; |
| using __token = typename __algorithm_t::arrival_token; |
| explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { } |
| __token _M_tok; |
| }; |
| |
| static constexpr ptrdiff_t |
| max() noexcept |
| { return __algorithm_t::max(); } |
| |
| explicit |
| barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) |
| : _M_b(__count, std::move(__completion)) |
| { } |
| |
| barrier(barrier const&) = delete; |
| barrier& operator=(barrier const&) = delete; |
| |
| [[nodiscard]] arrival_token |
| arrive(ptrdiff_t __update = 1) |
| { return arrival_token{_M_b.arrive(__update)}; } |
| |
| void |
| wait(arrival_token&& __phase) const |
| { _M_b.wait(std::move(__phase._M_tok)); } |
| |
| void |
| arrive_and_wait() |
| { wait(arrive()); } |
| |
| void |
| arrive_and_drop() |
| { _M_b.arrive_and_drop(); } |
| }; |
| |
| _GLIBCXX_END_NAMESPACE_VERSION |
| } // namespace |
| #endif // __cpp_lib_atomic_wait && __cpp_aligned_new |
| #endif // __cplusplus > 201703L |
| #endif // _GLIBCXX_BARRIER |