blob: 8a2a5f1facf22d3a5be4d919d9672c16534d9d25 [file] [log] [blame]
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// Override the -std flag in the check_performance script: STD=gnu++17
// Run the test as both single- and multi-threaded: TEST_B
#include <memory_resource>
#include <list>
#include <string>
#include <testsuite_performance.h>
const int iterations = 100;
// Insert and remove elements of various sizes in std::list containers.
// If timers!=nullptr the function will pause the timer while the lists
// are cleared and deallocated, so that only insertions/removals are timed.
// Otherwise, the time taken to deallocate the lists is also counted.
void
populate_lists(std::pmr::memory_resource* r, __gnu_test::time_counter* timers,
int kmax = iterations)
{
struct size16 { char c[16]; };
struct size32 { char c[32]; };
struct size64 { char c[64]; };
struct size128 { char c[128]; };
std::pmr::list<int> l4(r);
std::pmr::list<size16> l16(r);
std::pmr::list<size32> l32(r);
std::pmr::list<size64> l64(r);
std::pmr::list<size128> l128(r);
const int imax = 1000;
const int jmax = 100;
for (int k = 0; k < kmax; ++k)
{
for (int i = 0; i < imax; ++i)
{
for (int j = 0; j < jmax; ++j)
{
l4.emplace_back();
l16.emplace_back();
l32.emplace_back();
l64.emplace_back();
l128.emplace_back();
}
l4.pop_front();
l16.pop_front();
l32.pop_front();
l64.pop_front();
l128.pop_front();
}
if (timers)
timers->stop();
// Deallocate everything:
l4.clear();
l16.clear();
l32.clear();
l64.clear();
l128.clear();
if (timers)
timers->restart();
}
}
// Test allocations and deallocations of node-based containers (std::list).
// In this test pmr::unsynchronized_pool_resource should be faster than
// pmr::new_delete_resource().
void test_lists_single_thread()
{
std::pmr::memory_resource* newdel = std::pmr::new_delete_resource();
std::pmr::unsynchronized_pool_resource pool;
#ifndef NOTHREAD
std::pmr::synchronized_pool_resource syncpool;
#endif
auto run_test = [](auto* memres, std::string name, bool time_dtors) {
name += " std::list push/pop";
if (time_dtors)
name += "/destroy";
__gnu_test::time_counter time;
__gnu_test::resource_counter resource;
start_counters(time, resource);
populate_lists(memres, time_dtors ? nullptr : &time);
stop_counters(time, resource);
report_performance(__FILE__, name, time, resource);
};
for (auto time_dtors : {false, true})
{
run_test(newdel, "new-delete-1 ", time_dtors);
run_test(newdel, "new-delete-2 ", time_dtors);
run_test(newdel, "new-delete-3 ", time_dtors);
// Start with an empty set of pools:
pool.release();
run_test(&pool, "unsync-pool-1", time_dtors);
// Destroy pools and start fresh:
pool.release();
run_test(&pool, "unsync-pool-2", time_dtors);
// Do not destroy pools, reuse allocated memory:
run_test(&pool, "unsync-pool-3", time_dtors);
#ifndef NOTHREAD
syncpool.release();
run_test(&syncpool, "sync-pool-1 ", time_dtors);
// Destroy pools and start fresh:
syncpool.release();
run_test(&syncpool, "sync-pool-2 ", time_dtors);
// Do not destroy pools, reuse allocated memory:
run_test(&syncpool, "sync-pool-3 ", time_dtors);
#endif
}
}
// TODO test non-pooled large allocations from (un)synchronized_pool_resource
#ifndef NOTHREAD
# include <thread>
# include <mutex>
# include <cassert>
// Multithreaded std::list test with each thread having its own resource.
// (pmr::new_delete vs pmr::unsynchronized_pool vs pmr::synchronized_pool)
//
// In this test both pmr::unsynchronized_pool_resource and
// pmr::synchronized_pool_resource should be faster than
// pmr::new_delete_resource().
void test_lists_resource_per_thread()
{
std::mutex mx;
std::unique_lock<std::mutex> gate(mx, std::defer_lock);
struct state
{
std::thread thread;
// Per-thread pool resources:
std::pmr::unsynchronized_pool_resource unsync;
std::pmr::synchronized_pool_resource sync;
std::pmr::memory_resource* memres[3] = {
std::pmr::new_delete_resource(), &unsync, &sync
};
};
state states[4];
const std::string resnames[] = {"new-delete ", "unsync-pool", "sync-pool "};
auto run_test = [&mx] (std::pmr::memory_resource* memres,
__gnu_test::time_counter* timers)
{
std::lock_guard<std::mutex>{mx}; // block until the mutex can be locked
populate_lists(memres, timers);
};
auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
__gnu_test::time_counter time;
__gnu_test::resource_counter resource;
gate.lock();
auto* time_ptr = time_dtors ? nullptr : &time;
for (auto& s : states)
s.thread = std::thread{ run_test, s.memres[which], time_ptr };
start_counters(time, resource);
gate.unlock(); // let the threads run
for (auto& s : states)
s.thread.join();
stop_counters(time, resource);
report_performance(__FILE__, resnames[which] + testname, time, resource);
};
for (auto time_dtors : {false, true})
{
std::string testname = " resource-per-thread std::list push/pop";
if (time_dtors)
testname += "/destroy";
for (int which : {0, 1, 2})
time_threads(testname, time_dtors, which);
}
}
// A naive memory_resource that adds a mutex to unsynchronized_pool_resource
struct locking_pool_resource : std::pmr::unsynchronized_pool_resource
{
void* do_allocate(std::size_t b, std::size_t a) override
{
std::lock_guard<std::mutex> l(m);
return unsynchronized_pool_resource::do_allocate(b, a);
}
void do_deallocate(void* p, std::size_t b, std::size_t a) override
{
std::lock_guard<std::mutex> l(m);
return unsynchronized_pool_resource::do_deallocate(p, b, a);
}
std::mutex m;
};
// Multithreaded std::list test with all threads sharing the same resource.
// (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
//
// pmr::synchronized_pool_resource is not expected to be anywhere near
// as fast as pmr::new_delete_resource() here, but should perform much
// better than the naive locking_pool_resource type.
void test_lists_shared_resource()
{
std::mutex mx;
std::unique_lock<std::mutex> gate(mx, std::defer_lock);
locking_pool_resource unsync;
std::pmr::synchronized_pool_resource sync;
std::pmr::memory_resource* memres[3] = {
std::pmr::new_delete_resource(), &unsync, &sync
};
std::thread threads[4];
const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };
auto run_test = [&mx] (std::pmr::memory_resource* memres,
__gnu_test::time_counter* timers)
{
std::lock_guard<std::mutex>{mx}; // block until the mutex can be locked
populate_lists(memres, timers);
};
auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
__gnu_test::time_counter time;
__gnu_test::resource_counter resource;
gate.lock();
auto* time_ptr = time_dtors ? nullptr : &time;
for (auto& t : threads)
t = std::thread{ run_test, memres[which], time_ptr };
start_counters(time, resource);
gate.unlock(); // let the threads run
for (auto& t : threads)
t.join();
stop_counters(time, resource);
report_performance(__FILE__, resnames[which] + testname, time, resource);
};
for (auto time_dtors : {false, true})
{
std::string testname = " shared-resource std::list push/pop";
if (time_dtors)
testname += "/destroy";
for (int which : {0, 1, 2})
time_threads(testname, time_dtors, which);
}
}
// TODO threaded test just doing loads of allocations, no deallocs
// both with per-thread resource (unsync vs sync vs newdel)
// and shared resource (locked vs sync vs newdel)
// TODO threaded test just doing loads of deallocations, no allocs
// both with per-thread resource (unsync vs sync vs newdel)
// and shared resource (locked vs sync vs newdel)
// Multithreaded test where deallocations happen on different threads.
// (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
//
// This hits the slow path for pmr::synchronized_pool_resource, where
// an exclusive lock must be taken to access other threads' pools.
// pmr::synchronized_pool_resource is not expected to be anywhere near
// as fast as pmr::new_delete_resource() here, but should perform much
// better than the naive locking_pool_resource type.
void test_cross_thread_dealloc()
{
const int num_threads = 4;
struct X {
void* ptr;
unsigned size;
};
// A buffer for each thread, and extra buffers for half of the threads:
std::vector<X> allocs[num_threads * 3 / 2];
for (auto& v : allocs)
v.resize(1000 * iterations);
// Use a few different pools
const std::size_t sizes[] = { 8, 16, 8, 16, 32, 64, 8, 16, 32, 64 };
std::mutex mx;
auto run_test =
[&, num_threads] (std::pmr::memory_resource* memres, int i, bool with_exit)
{
std::size_t counter = 0;
std::lock_guard<std::mutex>{mx};
// Fill this thread's buffer with allocations:
for (X& x : allocs[i])
{
x.size = sizes[counter++ % 10];
x.ptr = memres->allocate(x.size, 1);
}
if (with_exit && i == 0)
{
// One of the threads exits, so that its pools transfer to the
// non-thread-specific list of pools.
return;
}
else if (i < num_threads / 2)
{
// Other threads continue allocating, into the extra buffers:
for (X& x : allocs[num_threads + i])
{
x.size = sizes[counter++ % 10];
x.ptr = memres->allocate(x.size, 1);
}
}
else
{
// Half of the threads start deallocating their own memory and the
// memory belonging to another pool
const int other = i - num_threads / 2;
for (unsigned n = 0; n < allocs[i].size(); ++n)
{
// Deallocate memory allocated in this thread:
X& x1 = allocs[i][n];
memres->deallocate(x1.ptr, x1.size, 1);
x1 = {};
// Deallocate memory allocated in another thread:
X& x2 = allocs[other][n];
memres->deallocate(x2.ptr, x2.size, 1);
x2 = {};
}
}
};
std::thread threads[num_threads];
locking_pool_resource unsync;
std::pmr::synchronized_pool_resource sync;
std::pmr::memory_resource* memres[3] = {
std::pmr::new_delete_resource(), &unsync, &sync
};
const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };
auto time_threads = [&] (std::string name, int which, bool with_exit)
{
__gnu_test::time_counter time;
__gnu_test::resource_counter resource;
std::unique_lock<std::mutex> gate(mx);
for (auto& t : threads)
t = std::thread{ run_test, memres[which], &t - threads, with_exit };
start_counters(time, resource);
gate.unlock();
for (auto& t : threads)
t.join();
stop_counters(time, resource);
report_performance(__FILE__, resnames[which] + name, time, resource);
// Clean up:
for (auto& a : allocs)
{
const int i = (&a - allocs);
if (i < num_threads) // These allocations were freed
for (auto& x : a)
{
assert(x.ptr == nullptr);
}
else if (with_exit && i == num_threads)
;
else
for (auto& x : a)
{
memres[which]->deallocate(x.ptr, x.size, 1);
x = {};
}
}
};
for (int which : {0, 1, 2})
time_threads(" cross-thread dealloc", which, false);
for (int which : {0, 1, 2})
time_threads(" cross-thread dealloc w/exit", which, true);
}
#endif
int main()
{
test_lists_single_thread();
#ifndef NOTHREAD
test_lists_resource_per_thread();
test_lists_shared_resource();
test_cross_thread_dealloc();
#endif
}