libstdc++-v3/testsuite/performance/20_util/memory_resource/pools.cc - gcc - Git at Google

 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
 // terms of the GNU General Public License as published by the
 // Free Software Foundation; either version 3, or (at your option)
 // any later version.

 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.

 // You should have received a copy of the GNU General Public License along
 // with this library; see the file COPYING3.  If not see
 // <http://www.gnu.org/licenses/>.

 // Override the -std flag in the check_performance script: STD=gnu++17

 // Run the test as both single- and multi-threaded: TEST_B

 #include <memory_resource>
 #include <list>
 #include <string>
 #include <testsuite_performance.h>

 const int iterations = 100;

 // Insert and remove elements of various sizes in std::list containers.
 // If timers!=nullptr the function will pause the timer while the lists
 // are cleared and deallocated, so that only insertions/removals are timed.
 // Otherwise, the time taken to deallocate the lists is also counted.
 void
 populate_lists(std::pmr::memory_resource* r, __gnu_test::time_counter* timers,
 	       int kmax = iterations)
 {
   struct size16 { char c[16]; };
   struct size32 { char c[32]; };
   struct size64 { char c[64]; };
   struct size128 { char c[128]; };

   std::pmr::list<int> l4(r);
   std::pmr::list<size16> l16(r);
   std::pmr::list<size32> l32(r);
   std::pmr::list<size64> l64(r);
   std::pmr::list<size128> l128(r);

   const int imax = 1000;
   const int jmax = 100;
   for (int k = 0; k < kmax; ++k)
   {
     for (int i = 0; i < imax; ++i)
     {
       for (int j = 0; j < jmax; ++j)
       {
         l4.emplace_back();
         l16.emplace_back();
         l32.emplace_back();
         l64.emplace_back();
         l128.emplace_back();
       }
       l4.pop_front();
       l16.pop_front();
       l32.pop_front();
       l64.pop_front();
       l128.pop_front();
     }

     if (timers)
       timers->stop();

     // Deallocate everything:
     l4.clear();
     l16.clear();
     l32.clear();
     l64.clear();
     l128.clear();

     if (timers)
       timers->restart();
   }
 }

 // Test allocations and deallocations of node-based containers (std::list).
 // In this test pmr::unsynchronized_pool_resource should be faster than
 // pmr::new_delete_resource().
 void test_lists_single_thread()
 {
   std::pmr::memory_resource* newdel = std::pmr::new_delete_resource();
   std::pmr::unsynchronized_pool_resource pool;
 #ifndef NOTHREAD
   std::pmr::synchronized_pool_resource syncpool;
 #endif

   auto run_test = [](auto* memres, std::string name, bool time_dtors) {
     name += " std::list push/pop";
     if (time_dtors)
       name += "/destroy";
     __gnu_test::time_counter time;
     __gnu_test::resource_counter resource;
     start_counters(time, resource);
     populate_lists(memres, time_dtors ? nullptr : &time);
     stop_counters(time, resource);
     report_performance(__FILE__, name, time, resource);
   };

   for (auto time_dtors : {false, true})
   {
     run_test(newdel, "new-delete-1 ", time_dtors);
     run_test(newdel, "new-delete-2 ", time_dtors);
     run_test(newdel, "new-delete-3 ", time_dtors);

     // Start with an empty set of pools:
     pool.release();
     run_test(&pool, "unsync-pool-1", time_dtors);
     // Destroy pools and start fresh:
     pool.release();
     run_test(&pool, "unsync-pool-2", time_dtors);
     // Do not destroy pools, reuse allocated memory:
     run_test(&pool, "unsync-pool-3", time_dtors);

 #ifndef NOTHREAD
     syncpool.release();
     run_test(&syncpool, "sync-pool-1  ", time_dtors);
     // Destroy pools and start fresh:
     syncpool.release();
     run_test(&syncpool, "sync-pool-2  ", time_dtors);
     // Do not destroy pools, reuse allocated memory:
     run_test(&syncpool, "sync-pool-3  ", time_dtors);
 #endif
   }
 }

 // TODO test non-pooled large allocations from (un)synchronized_pool_resource

 #ifndef NOTHREAD
 # include <thread>
 # include <mutex>
 # include <cassert>

 // Multithreaded std::list test with each thread having its own resource.
 // (pmr::new_delete vs pmr::unsynchronized_pool vs pmr::synchronized_pool)
 //
 // In this test both pmr::unsynchronized_pool_resource and
 // pmr::synchronized_pool_resource should be faster than
 // pmr::new_delete_resource().
 void test_lists_resource_per_thread()
 {
   std::mutex mx;
   std::unique_lock<std::mutex> gate(mx, std::defer_lock);

   struct state
   {
     std::thread thread;

     // Per-thread pool resources:
     std::pmr::unsynchronized_pool_resource unsync;
     std::pmr::synchronized_pool_resource sync;

     std::pmr::memory_resource* memres[3] = {
       std::pmr::new_delete_resource(), &unsync, &sync
     };
   };

   state states[4];

   const std::string resnames[] = {"new-delete ", "unsync-pool", "sync-pool  "};

   auto run_test = [&mx] (std::pmr::memory_resource* memres,
 			 __gnu_test::time_counter* timers)
   {
     std::lock_guard<std::mutex>{mx};  // block until the mutex can be locked
     populate_lists(memres, timers);
   };

   auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
     __gnu_test::time_counter time;
     __gnu_test::resource_counter resource;
     gate.lock();
     auto* time_ptr = time_dtors ? nullptr : &time;
     for (auto& s : states)
       s.thread = std::thread{ run_test, s.memres[which], time_ptr };
     start_counters(time, resource);
     gate.unlock(); // let the threads run
     for (auto& s : states)
       s.thread.join();
     stop_counters(time, resource);
     report_performance(__FILE__, resnames[which] + testname, time, resource);
   };

   for (auto time_dtors : {false, true})
   {
     std::string testname = " resource-per-thread std::list push/pop";
     if (time_dtors)
       testname += "/destroy";
     for (int which : {0, 1, 2})
       time_threads(testname, time_dtors, which);
   }
 }

 // A naive memory_resource that adds a mutex to unsynchronized_pool_resource
 struct locking_pool_resource : std::pmr::unsynchronized_pool_resource
 {
   void* do_allocate(std::size_t b, std::size_t a) override
   {
     std::lock_guard<std::mutex> l(m);
     return unsynchronized_pool_resource::do_allocate(b, a);
   }

   void do_deallocate(void* p, std::size_t b, std::size_t a) override
   {
     std::lock_guard<std::mutex> l(m);
     return unsynchronized_pool_resource::do_deallocate(p, b, a);
   }

   std::mutex m;
 };

 // Multithreaded std::list test with all threads sharing the same resource.
 // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
 //
 // pmr::synchronized_pool_resource is not expected to be anywhere near
 // as fast as pmr::new_delete_resource() here, but should perform much
 // better than the naive locking_pool_resource type.
 void test_lists_shared_resource()
 {
   std::mutex mx;
   std::unique_lock<std::mutex> gate(mx, std::defer_lock);

   locking_pool_resource unsync;
   std::pmr::synchronized_pool_resource sync;

   std::pmr::memory_resource* memres[3] = {
     std::pmr::new_delete_resource(), &unsync, &sync
   };

   std::thread threads[4];

   const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };

   auto run_test = [&mx] (std::pmr::memory_resource* memres,
 			 __gnu_test::time_counter* timers)
   {
     std::lock_guard<std::mutex>{mx};  // block until the mutex can be locked
     populate_lists(memres, timers);
   };

   auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
     __gnu_test::time_counter time;
     __gnu_test::resource_counter resource;
     gate.lock();
     auto* time_ptr = time_dtors ? nullptr : &time;
     for (auto& t : threads)
       t = std::thread{ run_test, memres[which], time_ptr };
     start_counters(time, resource);
     gate.unlock(); // let the threads run
     for (auto& t : threads)
       t.join();
     stop_counters(time, resource);
     report_performance(__FILE__, resnames[which] + testname, time, resource);
   };

   for (auto time_dtors : {false, true})
   {
     std::string testname = " shared-resource std::list push/pop";
     if (time_dtors)
       testname += "/destroy";
     for (int which : {0, 1, 2})
       time_threads(testname, time_dtors, which);
   }
 }

 // TODO threaded test just doing loads of allocations, no deallocs
 // both with per-thread resource (unsync vs sync vs newdel)
 // and shared resource (locked vs sync vs newdel)

 // TODO threaded test just doing loads of deallocations, no allocs
 // both with per-thread resource (unsync vs sync vs newdel)
 // and shared resource (locked vs sync vs newdel)

 // Multithreaded test where deallocations happen on different threads.
 // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
 //
 // This hits the slow path for pmr::synchronized_pool_resource, where
 // an exclusive lock must be taken to access other threads' pools.
 // pmr::synchronized_pool_resource is not expected to be anywhere near
 // as fast as pmr::new_delete_resource() here, but should perform much
 // better than the naive locking_pool_resource type.
 void test_cross_thread_dealloc()
 {
   const int num_threads = 4;

   struct X {
     void* ptr;
     unsigned size;
   };

   // A buffer for each thread, and extra buffers for half of the threads:
   std::vector<X> allocs[num_threads * 3 / 2];
   for (auto& v : allocs)
     v.resize(1000 * iterations);

   // Use a few different pools
   const std::size_t sizes[] = { 8, 16, 8, 16, 32, 64, 8, 16, 32, 64 };

   std::mutex mx;

   auto run_test =
   [&, num_threads] (std::pmr::memory_resource* memres, int i, bool with_exit)
   {
     std::size_t counter = 0;
     std::lock_guard<std::mutex>{mx};
     // Fill this thread's buffer with allocations:
     for (X& x : allocs[i])
     {
       x.size = sizes[counter++ % 10];
       x.ptr = memres->allocate(x.size, 1);
     }

     if (with_exit && i == 0)
     {
       // One of the threads exits, so that its pools transfer to the
       // non-thread-specific list of pools.
       return;
     }
     else if (i < num_threads / 2)
     {
       // Other threads continue allocating, into the extra buffers:
       for (X& x : allocs[num_threads + i])
       {
 	x.size = sizes[counter++ % 10];
 	x.ptr = memres->allocate(x.size, 1);
       }
     }
     else
     {
       // Half of the threads start deallocating their own memory and the
       // memory belonging to another pool
       const int other = i - num_threads / 2;
       for (unsigned n = 0; n < allocs[i].size(); ++n)
       {
 	// Deallocate memory allocated in this thread:
 	X& x1 = allocs[i][n];
 	memres->deallocate(x1.ptr, x1.size, 1);
 	x1 = {};
 	// Deallocate memory allocated in another thread:
 	X& x2 = allocs[other][n];
 	memres->deallocate(x2.ptr, x2.size, 1);
 	x2 = {};
       }
     }
   };

   std::thread threads[num_threads];

   locking_pool_resource unsync;
   std::pmr::synchronized_pool_resource sync;

   std::pmr::memory_resource* memres[3] = {
     std::pmr::new_delete_resource(), &unsync, &sync
   };
   const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };

   auto time_threads = [&] (std::string name, int which, bool with_exit)
   {
     __gnu_test::time_counter time;
     __gnu_test::resource_counter resource;
     std::unique_lock<std::mutex> gate(mx);
     for (auto& t : threads)
       t = std::thread{ run_test, memres[which], &t - threads, with_exit };
     start_counters(time, resource);
     gate.unlock();
     for (auto& t : threads)
       t.join();
     stop_counters(time, resource);
     report_performance(__FILE__, resnames[which] + name, time, resource);

     // Clean up:
     for (auto& a : allocs)
     {
       const int i = (&a - allocs);
       if (i < num_threads) // These allocations were freed
 	for (auto& x : a)
 	{
 	  assert(x.ptr == nullptr);
 	}
       else if (with_exit && i == num_threads)
 	;
       else
 	for (auto& x : a)
 	{
 	  memres[which]->deallocate(x.ptr, x.size, 1);
 	  x = {};
 	}
     }
   };

   for (int which : {0, 1, 2})
     time_threads(" cross-thread dealloc", which, false);
   for (int which : {0, 1, 2})
     time_threads(" cross-thread dealloc w/exit", which, true);
 }
 #endif

 int main()
 {
   test_lists_single_thread();
 #ifndef NOTHREAD
   test_lists_resource_per_thread();
   test_lists_shared_resource();
   test_cross_thread_dealloc();
 #endif
 }
	// This file is part of the GNU ISO C++ Library. This library is free
	// software; you can redistribute it and/or modify it under the
	// terms of the GNU General Public License as published by the
	// Free Software Foundation; either version 3, or (at your option)
	// any later version.

	// This library is distributed in the hope that it will be useful,
	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	// GNU General Public License for more details.

	// You should have received a copy of the GNU General Public License along
	// with this library; see the file COPYING3. If not see
	// <http://www.gnu.org/licenses/>.

	// Override the -std flag in the check_performance script: STD=gnu++17

	// Run the test as both single- and multi-threaded: TEST_B

	#include <memory_resource>
	#include <list>
	#include <string>
	#include <testsuite_performance.h>

	const int iterations = 100;

	// Insert and remove elements of various sizes in std::list containers.
	// If timers!=nullptr the function will pause the timer while the lists
	// are cleared and deallocated, so that only insertions/removals are timed.
	// Otherwise, the time taken to deallocate the lists is also counted.
	void
	populate_lists(std::pmr::memory_resource* r, __gnu_test::time_counter* timers,
	int kmax = iterations)
	{
	struct size16 { char c[16]; };
	struct size32 { char c[32]; };
	struct size64 { char c[64]; };
	struct size128 { char c[128]; };

	std::pmr::list<int> l4(r);
	std::pmr::list<size16> l16(r);
	std::pmr::list<size32> l32(r);
	std::pmr::list<size64> l64(r);
	std::pmr::list<size128> l128(r);

	const int imax = 1000;
	const int jmax = 100;
	for (int k = 0; k < kmax; ++k)
	{
	for (int i = 0; i < imax; ++i)
	{
	for (int j = 0; j < jmax; ++j)
	{
	l4.emplace_back();
	l16.emplace_back();
	l32.emplace_back();
	l64.emplace_back();
	l128.emplace_back();
	}
	l4.pop_front();
	l16.pop_front();
	l32.pop_front();
	l64.pop_front();
	l128.pop_front();
	}

	if (timers)
	timers->stop();

	// Deallocate everything:
	l4.clear();
	l16.clear();
	l32.clear();
	l64.clear();
	l128.clear();

	if (timers)
	timers->restart();
	}
	}

	// Test allocations and deallocations of node-based containers (std::list).
	// In this test pmr::unsynchronized_pool_resource should be faster than
	// pmr::new_delete_resource().
	void test_lists_single_thread()
	{
	std::pmr::memory_resource* newdel = std::pmr::new_delete_resource();
	std::pmr::unsynchronized_pool_resource pool;
	#ifndef NOTHREAD
	std::pmr::synchronized_pool_resource syncpool;
	#endif

	auto run_test = [](auto* memres, std::string name, bool time_dtors) {
	name += " std::list push/pop";
	if (time_dtors)
	name += "/destroy";
	__gnu_test::time_counter time;
	__gnu_test::resource_counter resource;
	start_counters(time, resource);
	populate_lists(memres, time_dtors ? nullptr : &time);
	stop_counters(time, resource);
	report_performance(__FILE__, name, time, resource);
	};

	for (auto time_dtors : {false, true})
	{
	run_test(newdel, "new-delete-1 ", time_dtors);
	run_test(newdel, "new-delete-2 ", time_dtors);
	run_test(newdel, "new-delete-3 ", time_dtors);

	// Start with an empty set of pools:
	pool.release();
	run_test(&pool, "unsync-pool-1", time_dtors);
	// Destroy pools and start fresh:
	pool.release();
	run_test(&pool, "unsync-pool-2", time_dtors);
	// Do not destroy pools, reuse allocated memory:
	run_test(&pool, "unsync-pool-3", time_dtors);

	#ifndef NOTHREAD
	syncpool.release();
	run_test(&syncpool, "sync-pool-1 ", time_dtors);
	// Destroy pools and start fresh:
	syncpool.release();
	run_test(&syncpool, "sync-pool-2 ", time_dtors);
	// Do not destroy pools, reuse allocated memory:
	run_test(&syncpool, "sync-pool-3 ", time_dtors);
	#endif
	}
	}

	// TODO test non-pooled large allocations from (un)synchronized_pool_resource

	#ifndef NOTHREAD
	# include <thread>
	# include <mutex>
	# include <cassert>

	// Multithreaded std::list test with each thread having its own resource.
	// (pmr::new_delete vs pmr::unsynchronized_pool vs pmr::synchronized_pool)
	//
	// In this test both pmr::unsynchronized_pool_resource and
	// pmr::synchronized_pool_resource should be faster than
	// pmr::new_delete_resource().
	void test_lists_resource_per_thread()
	{
	std::mutex mx;
	std::unique_lock<std::mutex> gate(mx, std::defer_lock);

	struct state
	{
	std::thread thread;

	// Per-thread pool resources:
	std::pmr::unsynchronized_pool_resource unsync;
	std::pmr::synchronized_pool_resource sync;

	std::pmr::memory_resource* memres[3] = {
	std::pmr::new_delete_resource(), &unsync, &sync
	};
	};

	state states[4];

	const std::string resnames[] = {"new-delete ", "unsync-pool", "sync-pool "};

	auto run_test = [&mx] (std::pmr::memory_resource* memres,
	__gnu_test::time_counter* timers)
	{
	std::lock_guard<std::mutex>{mx}; // block until the mutex can be locked
	populate_lists(memres, timers);
	};

	auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
	__gnu_test::time_counter time;
	__gnu_test::resource_counter resource;
	gate.lock();
	auto* time_ptr = time_dtors ? nullptr : &time;
	for (auto& s : states)
	s.thread = std::thread{ run_test, s.memres[which], time_ptr };
	start_counters(time, resource);
	gate.unlock(); // let the threads run
	for (auto& s : states)
	s.thread.join();
	stop_counters(time, resource);
	report_performance(__FILE__, resnames[which] + testname, time, resource);
	};

	for (auto time_dtors : {false, true})
	{
	std::string testname = " resource-per-thread std::list push/pop";
	if (time_dtors)
	testname += "/destroy";
	for (int which : {0, 1, 2})
	time_threads(testname, time_dtors, which);
	}
	}

	// A naive memory_resource that adds a mutex to unsynchronized_pool_resource
	struct locking_pool_resource : std::pmr::unsynchronized_pool_resource
	{
	void* do_allocate(std::size_t b, std::size_t a) override
	{
	std::lock_guard<std::mutex> l(m);
	return unsynchronized_pool_resource::do_allocate(b, a);
	}

	void do_deallocate(void* p, std::size_t b, std::size_t a) override
	{
	std::lock_guard<std::mutex> l(m);
	return unsynchronized_pool_resource::do_deallocate(p, b, a);
	}

	std::mutex m;
	};

	// Multithreaded std::list test with all threads sharing the same resource.
	// (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
	//
	// pmr::synchronized_pool_resource is not expected to be anywhere near
	// as fast as pmr::new_delete_resource() here, but should perform much
	// better than the naive locking_pool_resource type.
	void test_lists_shared_resource()
	{
	std::mutex mx;
	std::unique_lock<std::mutex> gate(mx, std::defer_lock);

	locking_pool_resource unsync;
	std::pmr::synchronized_pool_resource sync;

	std::pmr::memory_resource* memres[3] = {
	std::pmr::new_delete_resource(), &unsync, &sync
	};

	std::thread threads[4];

	const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };

	auto run_test = [&mx] (std::pmr::memory_resource* memres,
	__gnu_test::time_counter* timers)
	{
	std::lock_guard<std::mutex>{mx}; // block until the mutex can be locked
	populate_lists(memres, timers);
	};

	auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
	__gnu_test::time_counter time;
	__gnu_test::resource_counter resource;
	gate.lock();
	auto* time_ptr = time_dtors ? nullptr : &time;
	for (auto& t : threads)
	t = std::thread{ run_test, memres[which], time_ptr };
	start_counters(time, resource);
	gate.unlock(); // let the threads run
	for (auto& t : threads)
	t.join();
	stop_counters(time, resource);
	report_performance(__FILE__, resnames[which] + testname, time, resource);
	};

	for (auto time_dtors : {false, true})
	{
	std::string testname = " shared-resource std::list push/pop";
	if (time_dtors)
	testname += "/destroy";
	for (int which : {0, 1, 2})
	time_threads(testname, time_dtors, which);
	}
	}

	// TODO threaded test just doing loads of allocations, no deallocs
	// both with per-thread resource (unsync vs sync vs newdel)
	// and shared resource (locked vs sync vs newdel)

	// TODO threaded test just doing loads of deallocations, no allocs
	// both with per-thread resource (unsync vs sync vs newdel)
	// and shared resource (locked vs sync vs newdel)

	// Multithreaded test where deallocations happen on different threads.
	// (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
	//
	// This hits the slow path for pmr::synchronized_pool_resource, where
	// an exclusive lock must be taken to access other threads' pools.
	// pmr::synchronized_pool_resource is not expected to be anywhere near
	// as fast as pmr::new_delete_resource() here, but should perform much
	// better than the naive locking_pool_resource type.
	void test_cross_thread_dealloc()
	{
	const int num_threads = 4;

	struct X {
	void* ptr;
	unsigned size;
	};

	// A buffer for each thread, and extra buffers for half of the threads:
	std::vector<X> allocs[num_threads * 3 / 2];
	for (auto& v : allocs)
	v.resize(1000 * iterations);

	// Use a few different pools
	const std::size_t sizes[] = { 8, 16, 8, 16, 32, 64, 8, 16, 32, 64 };

	std::mutex mx;

	auto run_test =
	[&, num_threads] (std::pmr::memory_resource* memres, int i, bool with_exit)
	{
	std::size_t counter = 0;
	std::lock_guard<std::mutex>{mx};
	// Fill this thread's buffer with allocations:
	for (X& x : allocs[i])
	{
	x.size = sizes[counter++ % 10];
	x.ptr = memres->allocate(x.size, 1);
	}

	if (with_exit && i == 0)
	{
	// One of the threads exits, so that its pools transfer to the
	// non-thread-specific list of pools.
	return;
	}
	else if (i < num_threads / 2)
	{
	// Other threads continue allocating, into the extra buffers:
	for (X& x : allocs[num_threads + i])
	{
	x.size = sizes[counter++ % 10];
	x.ptr = memres->allocate(x.size, 1);
	}
	}
	else
	{
	// Half of the threads start deallocating their own memory and the
	// memory belonging to another pool
	const int other = i - num_threads / 2;
	for (unsigned n = 0; n < allocs[i].size(); ++n)
	{
	// Deallocate memory allocated in this thread:
	X& x1 = allocs[i][n];
	memres->deallocate(x1.ptr, x1.size, 1);
	x1 = {};
	// Deallocate memory allocated in another thread:
	X& x2 = allocs[other][n];
	memres->deallocate(x2.ptr, x2.size, 1);
	x2 = {};
	}
	}
	};

	std::thread threads[num_threads];

	locking_pool_resource unsync;
	std::pmr::synchronized_pool_resource sync;

	std::pmr::memory_resource* memres[3] = {
	std::pmr::new_delete_resource(), &unsync, &sync
	};
	const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };

	auto time_threads = [&] (std::string name, int which, bool with_exit)
	{
	__gnu_test::time_counter time;
	__gnu_test::resource_counter resource;
	std::unique_lock<std::mutex> gate(mx);
	for (auto& t : threads)
	t = std::thread{ run_test, memres[which], &t - threads, with_exit };
	start_counters(time, resource);
	gate.unlock();
	for (auto& t : threads)
	t.join();
	stop_counters(time, resource);
	report_performance(__FILE__, resnames[which] + name, time, resource);

	// Clean up:
	for (auto& a : allocs)
	{
	const int i = (&a - allocs);
	if (i < num_threads) // These allocations were freed
	for (auto& x : a)
	{
	assert(x.ptr == nullptr);
	}
	else if (with_exit && i == num_threads)
	;
	else
	for (auto& x : a)
	{
	memres[which]->deallocate(x.ptr, x.size, 1);
	x = {};
	}
	}
	};

	for (int which : {0, 1, 2})
	time_threads(" cross-thread dealloc", which, false);
	for (int which : {0, 1, 2})
	time_threads(" cross-thread dealloc w/exit", which, true);
	}
	#endif

	int main()
	{
	test_lists_single_thread();
	#ifndef NOTHREAD
	test_lists_resource_per_thread();
	test_lists_shared_resource();
	test_cross_thread_dealloc();
	#endif
	}