libstdc++-v3/include/std/barrier - gcc - Git at Google

 // <barrier> -*- C++ -*-

 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
 // terms of the GNU General Public License as published by the
 // Free Software Foundation; either version 3, or (at your option)
 // any later version.

 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.

 // You should have received a copy of the GNU General Public License along
 // with this library; see the file COPYING3.  If not see
 // <http://www.gnu.org/licenses/>.

 // This implementation is based on libcxx/include/barrier
 //===-- barrier.h --------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------===//

 /** @file include/barrier
  *  This is a Standard C++ Library header.
  */

 #ifndef _GLIBCXX_BARRIER
 #define _GLIBCXX_BARRIER 1

 #pragma GCC system_header

 #if __cplusplus > 201703L
 #include <bits/atomic_base.h>
 #if __cpp_lib_atomic_wait && __cpp_aligned_new
 #include <bits/std_thread.h>
 #include <bits/unique_ptr.h>

 #include <array>

 #define __cpp_lib_barrier 201907L

 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION

   struct __empty_completion
   {
     _GLIBCXX_ALWAYS_INLINE void
     operator()() noexcept
     { }
   };

 /*

 The default implementation of __tree_barrier is a classic tree barrier.

 It looks different from literature pseudocode for two main reasons:
  1. Threads that call into std::barrier functions do not provide indices,
     so a numbering step is added before the actual barrier algorithm,
     appearing as an N+1 round to the N rounds of the tree barrier.
  2. A great deal of attention has been paid to avoid cache line thrashing
     by flattening the tree structure into cache-line sized arrays, that
     are indexed in an efficient way.

 */

   enum class __barrier_phase_t : unsigned char { };

   template<typename _CompletionF>
     class __tree_barrier
     {
       using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
       using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
       static constexpr auto __phase_alignment =
 		      __atomic_phase_ref_t::required_alignment;

       using __tickets_t = std::array<__barrier_phase_t, 64>;
       struct alignas(64) /* naturally-align the heap state */ __state_t
       {
 	alignas(__phase_alignment) __tickets_t __tickets;
       };

       ptrdiff_t _M_expected;
       unique_ptr<__state_t[]> _M_state;
       __atomic_base<ptrdiff_t> _M_expected_adjustment;
       _CompletionF _M_completion;

       alignas(__phase_alignment) __barrier_phase_t  _M_phase;

       bool
       _M_arrive(__barrier_phase_t __old_phase, size_t __current)
       {
 	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
 	const auto __half_step =
 			   static_cast<__barrier_phase_t>(__old_phase_val + 1);
 	const auto __full_step =
 			   static_cast<__barrier_phase_t>(__old_phase_val + 2);

 	size_t __current_expected = _M_expected;
 	__current %= ((_M_expected + 1) >> 1);

 	for (int __round = 0; ; ++__round)
 	  {
 	    if (__current_expected <= 1)
 		return true;
 	    size_t const __end_node = ((__current_expected + 1) >> 1),
 			 __last_node = __end_node - 1;
 	    for ( ; ; ++__current)
 	      {
 		if (__current == __end_node)
 		  __current = 0;
 		auto __expect = __old_phase;
 		__atomic_phase_ref_t __phase(_M_state[__current]
 						.__tickets[__round]);
 		if (__current == __last_node && (__current_expected & 1))
 		  {
 		    if (__phase.compare_exchange_strong(__expect, __full_step,
 						        memory_order_acq_rel))
 		      break;     // I'm 1 in 1, go to next __round
 		  }
 		else if (__phase.compare_exchange_strong(__expect, __half_step,
 						         memory_order_acq_rel))
 		  {
 		    return false; // I'm 1 in 2, done with arrival
 		  }
 		else if (__expect == __half_step)
 		  {
 		    if (__phase.compare_exchange_strong(__expect, __full_step,
 						        memory_order_acq_rel))
 		      break;    // I'm 2 in 2, go to next __round
 		  }
 	      }
 	    __current_expected = __last_node + 1;
 	    __current >>= 1;
 	  }
       }

     public:
       using arrival_token = __barrier_phase_t;

       static constexpr ptrdiff_t
       max() noexcept
       { return __PTRDIFF_MAX__; }

       __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
 	  : _M_expected(__expected), _M_expected_adjustment(0),
 	    _M_completion(move(__completion)),
 	    _M_phase(static_cast<__barrier_phase_t>(0))
       {
 	size_t const __count = (_M_expected + 1) >> 1;

 	_M_state = std::make_unique<__state_t[]>(__count);
       }

       [[nodiscard]] arrival_token
       arrive(ptrdiff_t __update)
       {
 	std::hash<std::thread::id> __hasher;
 	size_t __current = __hasher(std::this_thread::get_id());
 	__atomic_phase_ref_t __phase(_M_phase);
 	const auto __old_phase = __phase.load(memory_order_relaxed);
 	const auto __cur = static_cast<unsigned char>(__old_phase);
 	for(; __update; --__update)
 	  {
 	    if(_M_arrive(__old_phase, __current))
 	      {
 		_M_completion();
 		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
 		_M_expected_adjustment.store(0, memory_order_relaxed);
 		auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
 		__phase.store(__new_phase, memory_order_release);
 		__phase.notify_all();
 	      }
 	  }
 	return __old_phase;
       }

       void
       wait(arrival_token&& __old_phase) const
       {
 	__atomic_phase_const_ref_t __phase(_M_phase);
 	auto const __test_fn = [=]
 	  {
 	    return __phase.load(memory_order_acquire) != __old_phase;
 	  };
 	std::__atomic_wait_address(&_M_phase, __test_fn);
       }

       void
       arrive_and_drop()
       {
 	_M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
 	(void)arrive(1);
       }
     };

   template<typename _CompletionF = __empty_completion>
     class barrier
     {
       // Note, we may introduce a "central" barrier algorithm at some point
       // for more space constrained targets
       using __algorithm_t = __tree_barrier<_CompletionF>;
       __algorithm_t _M_b;

     public:
       class arrival_token final
       {
       public:
 	arrival_token(arrival_token&&) = default;
 	arrival_token& operator=(arrival_token&&) = default;
 	~arrival_token() = default;

       private:
 	friend class barrier;
 	using __token = typename __algorithm_t::arrival_token;
 	explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
 	__token _M_tok;
       };

       static constexpr ptrdiff_t
       max() noexcept
       { return __algorithm_t::max(); }

       explicit
       barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
       : _M_b(__count, std::move(__completion))
       { }

       barrier(barrier const&) = delete;
       barrier& operator=(barrier const&) = delete;

       [[nodiscard]] arrival_token
       arrive(ptrdiff_t __update = 1)
       { return arrival_token{_M_b.arrive(__update)}; }

       void
       wait(arrival_token&& __phase) const
       { _M_b.wait(std::move(__phase._M_tok)); }

       void
       arrive_and_wait()
       { wait(arrive()); }

       void
       arrive_and_drop()
       { _M_b.arrive_and_drop(); }
     };

 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
 #endif // __cpp_lib_atomic_wait && __cpp_aligned_new
 #endif // __cplusplus > 201703L
 #endif // _GLIBCXX_BARRIER
	// <barrier> -- C++ --

	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
	//
	// This file is part of the GNU ISO C++ Library. This library is free
	// software; you can redistribute it and/or modify it under the
	// terms of the GNU General Public License as published by the
	// Free Software Foundation; either version 3, or (at your option)
	// any later version.

	// This library is distributed in the hope that it will be useful,
	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	// GNU General Public License for more details.

	// You should have received a copy of the GNU General Public License along
	// with this library; see the file COPYING3. If not see
	// <http://www.gnu.org/licenses/>.

	// This implementation is based on libcxx/include/barrier
	//===-- barrier.h --------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===---------------------------------------------------------------===//

	/** @file include/barrier
	* This is a Standard C++ Library header.
	*/

	#ifndef _GLIBCXX_BARRIER
	#define _GLIBCXX_BARRIER 1

	#pragma GCC system_header

	#if __cplusplus > 201703L
	#include <bits/atomic_base.h>
	#if __cpp_lib_atomic_wait && __cpp_aligned_new
	#include <bits/std_thread.h>
	#include <bits/unique_ptr.h>

	#include <array>

	#define __cpp_lib_barrier 201907L

	namespace std _GLIBCXX_VISIBILITY(default)
	{
	_GLIBCXX_BEGIN_NAMESPACE_VERSION

	struct __empty_completion
	{
	_GLIBCXX_ALWAYS_INLINE void
	operator()() noexcept
	{ }
	};

	/*

	The default implementation of __tree_barrier is a classic tree barrier.

	It looks different from literature pseudocode for two main reasons:
	1. Threads that call into std::barrier functions do not provide indices,
	so a numbering step is added before the actual barrier algorithm,
	appearing as an N+1 round to the N rounds of the tree barrier.
	2. A great deal of attention has been paid to avoid cache line thrashing
	by flattening the tree structure into cache-line sized arrays, that
	are indexed in an efficient way.

	*/

	enum class __barrier_phase_t : unsigned char { };

	template<typename _CompletionF>
	class __tree_barrier
	{
	using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
	using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
	static constexpr auto __phase_alignment =
	__atomic_phase_ref_t::required_alignment;

	using __tickets_t = std::array<__barrier_phase_t, 64>;
	struct alignas(64) /* naturally-align the heap state */ __state_t
	{
	alignas(__phase_alignment) __tickets_t __tickets;
	};

	ptrdiff_t _M_expected;
	unique_ptr<__state_t[]> _M_state;
	__atomic_base<ptrdiff_t> _M_expected_adjustment;
	_CompletionF _M_completion;

	alignas(__phase_alignment) __barrier_phase_t _M_phase;

	bool
	_M_arrive(__barrier_phase_t __old_phase, size_t __current)
	{
	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
	const auto __half_step =
	static_cast<__barrier_phase_t>(__old_phase_val + 1);
	const auto __full_step =
	static_cast<__barrier_phase_t>(__old_phase_val + 2);

	size_t __current_expected = _M_expected;
	__current %= ((_M_expected + 1) >> 1);

	for (int __round = 0; ; ++__round)
	{
	if (__current_expected <= 1)
	return true;
	size_t const __end_node = ((__current_expected + 1) >> 1),
	__last_node = __end_node - 1;
	for ( ; ; ++__current)
	{
	if (__current == __end_node)
	__current = 0;
	auto __expect = __old_phase;
	__atomic_phase_ref_t __phase(_M_state[__current]
	.__tickets[__round]);
	if (__current == __last_node && (__current_expected & 1))
	{
	if (__phase.compare_exchange_strong(__expect, __full_step,
	memory_order_acq_rel))
	break; // I'm 1 in 1, go to next __round
	}
	else if (__phase.compare_exchange_strong(__expect, __half_step,
	memory_order_acq_rel))
	{
	return false; // I'm 1 in 2, done with arrival
	}
	else if (__expect == __half_step)
	{
	if (__phase.compare_exchange_strong(__expect, __full_step,
	memory_order_acq_rel))
	break; // I'm 2 in 2, go to next __round
	}
	}
	__current_expected = __last_node + 1;
	__current >>= 1;
	}
	}

	public:
	using arrival_token = __barrier_phase_t;

	static constexpr ptrdiff_t
	max() noexcept
	{ return __PTRDIFF_MAX__; }

	__tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
	: _M_expected(__expected), _M_expected_adjustment(0),
	_M_completion(move(__completion)),
	_M_phase(static_cast<__barrier_phase_t>(0))
	{
	size_t const __count = (_M_expected + 1) >> 1;

	_M_state = std::make_unique<__state_t[]>(__count);
	}

	[[nodiscard]] arrival_token
	arrive(ptrdiff_t __update)
	{
	std::hash<std::thread::id> __hasher;
	size_t __current = __hasher(std::this_thread::get_id());
	__atomic_phase_ref_t __phase(_M_phase);
	const auto __old_phase = __phase.load(memory_order_relaxed);
	const auto __cur = static_cast<unsigned char>(__old_phase);
	for(; __update; --__update)
	{
	if(_M_arrive(__old_phase, __current))
	{
	_M_completion();
	_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
	_M_expected_adjustment.store(0, memory_order_relaxed);
	auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
	__phase.store(__new_phase, memory_order_release);
	__phase.notify_all();
	}
	}
	return __old_phase;
	}

	void
	wait(arrival_token&& __old_phase) const
	{
	__atomic_phase_const_ref_t __phase(_M_phase);
	auto const __test_fn = [=]
	{
	return __phase.load(memory_order_acquire) != __old_phase;
	};
	std::__atomic_wait_address(&_M_phase, __test_fn);
	}

	void
	arrive_and_drop()
	{
	_M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
	(void)arrive(1);
	}
	};

	template<typename _CompletionF = __empty_completion>
	class barrier
	{
	// Note, we may introduce a "central" barrier algorithm at some point
	// for more space constrained targets
	using __algorithm_t = __tree_barrier<_CompletionF>;
	__algorithm_t _M_b;

	public:
	class arrival_token final
	{
	public:
	arrival_token(arrival_token&&) = default;
	arrival_token& operator=(arrival_token&&) = default;
	~arrival_token() = default;

	private:
	friend class barrier;
	using __token = typename __algorithm_t::arrival_token;
	explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
	__token _M_tok;
	};

	static constexpr ptrdiff_t
	max() noexcept
	{ return __algorithm_t::max(); }

	explicit
	barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
	: _M_b(__count, std::move(__completion))
	{ }

	barrier(barrier const&) = delete;
	barrier& operator=(barrier const&) = delete;

	[[nodiscard]] arrival_token
	arrive(ptrdiff_t __update = 1)
	{ return arrival_token{_M_b.arrive(__update)}; }

	void
	wait(arrival_token&& __phase) const
	{ _M_b.wait(std::move(__phase._M_tok)); }

	void
	arrive_and_wait()
	{ wait(arrive()); }

	void
	arrive_and_drop()
	{ _M_b.arrive_and_drop(); }
	};

	_GLIBCXX_END_NAMESPACE_VERSION
	} // namespace
	#endif // __cpp_lib_atomic_wait && __cpp_aligned_new
	#endif // __cplusplus > 201703L
	#endif // _GLIBCXX_BARRIER