Files
rocm-systems/examples/parallel-overhead/parallel-overhead.cpp
T
Jonathan R. Madsen a9ff15ff8c CMake updates/fixes + parallel-overhead updates (#16)
- OMNITRACE_INSTALL_EXAMPLES option
- Fix lulesh standalone HIP compilation
- Fix transpose standalone HIP compilation
- Tweaks to parallel-overhead
2022-05-31 14:55:31 -05:00

116 righe
2.5 KiB
C++

#include <atomic>
#include <cstdio>
#include <cstdlib>
#include <pthread.h>
#include <random>
#include <string>
#include <thread>
#include <vector>
#if !defined(USE_LOCKS)
# define USE_LOCKS 0
#endif
#if USE_LOCKS > 0
# include <mutex>
using auto_lock_t = std::unique_lock<std::mutex>;
long total = 0;
std::mutex mtx{};
#else
std::atomic<long> total{ 0 };
#endif
long
fib(long n) __attribute__((noinline));
void
run(size_t nitr, long) __attribute__((noinline));
long
fib(long n)
{
return (n < 2) ? n : fib(n - 1) + fib(n - 2);
}
void
run(size_t nitr, long n)
{
static std::atomic<int> _tids{ 0 };
auto _tid = ++_tids;
std::default_random_engine eng(std::random_device{}() * (100 + _tid));
std::uniform_int_distribution<long> distr{ n - 2, n + 2 };
auto _get_n = [&]() { return distr(eng); };
printf("[%i] number of iterations: %zu\n", _tid, nitr);
#if USE_LOCKS > 0
for(size_t i = 0; i < nitr; ++i)
{
auto _v = fib(_get_n());
auto_lock_t _lk{ mtx };
total += _v;
}
#else
long local = 0;
for(size_t i = 0; i < nitr; ++i)
{
local += fib(_get_n());
}
total += local;
#endif
}
int
main(int argc, char** argv)
{
std::string _name = argv[0];
auto _pos = _name.find_last_of('/');
if(_pos != std::string::npos) _name = _name.substr(_pos + 1);
size_t nthread = std::min<size_t>(16, std::thread::hardware_concurrency());
size_t nitr = 50000;
long nfib = 10;
if(argc > 1) nfib = atol(argv[1]);
if(argc > 2) nthread = atol(argv[2]);
if(argc > 3) nitr = atol(argv[3]);
printf("\n[%s] Threads: %zu\n[%s] Iterations: %zu\n[%s] fibonacci(%li)...\n",
_name.c_str(), nthread, _name.c_str(), nitr, _name.c_str(), nfib);
bool run_on_main_thread = (USE_LOCKS == 0);
auto nwait = nthread + ((run_on_main_thread) ? 1 : 0);
pthread_barrier_t _barrier;
pthread_barrier_init(&_barrier, nullptr, nwait);
auto _run = [&_barrier](size_t nitr, long n) {
pthread_barrier_wait(&_barrier);
run(nitr, n);
};
std::vector<std::thread> threads{};
for(size_t i = 0; i < nthread; ++i)
{
threads.emplace_back(_run, nitr, nfib);
}
if(run_on_main_thread)
{
_run(nitr, nfib);
}
for(auto& itr : threads)
itr.join();
pthread_barrier_destroy(&_barrier);
printf("[%s] fibonacci(%li) x %lu = %li\n", _name.c_str(), nfib, nthread,
static_cast<long>(total));
return 0;
}