Files
rocm-systems/projects/rocprofiler-systems/examples/parallel-overhead/parallel-overhead.cpp
T
Jonathan R. Madsen 0d5f0fb9cf Support for tracing mutex locking (#52)
* Parallel overhead example with locks

* Support tracing mutex locking + more

- support wrapping pthread_mutex_lock
- support wrapping pthread_mutex_unlock
- support wrapping pthread_mutex_trylock
- get_perfetto_combined_traces setting
- OMNITRACE_TRACE_THREAD_LOCKS option
- ThreadState
- critical trace includes queue id
- enabled/disabled settings in timemory
- fix OMNITRACE_TIMEMORY_COMPONENTS
- fix reading config
- fix setting categories
- applied ThreadState::Internal in various places
- utility::get_filled_array
- utility::get_reserved_vector
- utility::get_thread_index
- fork_gotcha messages about forks
- split out some pthread_gotcha functionality into pthread_create_gotcha
- handle queue id in roctracer callbacks

* Update timemory and PTL submodules

* Misc CMake updates

- Includes fix to omnitrace-static-lib{gcc,stdcxx}

* Misc cleanup to pthread_mutex_gotcha and backtrace

* Fix to duplicate field in module_function json

* Improvement to debug messages

* omnitrace-dl and common improvements

- tweak to delimit
- common::ignore message
- common::join quoting of strings
- omnitrace_set_env ignores if inited and active
- omnitrace_set_mpi ignores if inited and active

* nsync for transpose example

* Fix to thread_deleter<void> functor invoke

* Fix thread state and HIP stream enums

[ROCm/rocprofiler-systems commit: b208047741]
2022-05-08 04:40:10 -05:00

87 строки
1.9 KiB
C++

#include <cstdio>
#include <cstdlib>
#include <string>
#include <thread>
#include <vector>
#if defined(USE_LOCKS)
# include <mutex>
using auto_lock_t = std::unique_lock<std::mutex>;
long total = 0;
std::mutex mtx{};
#else
# include <atomic>
std::atomic<long> total{ 0 };
#endif
long
fib(long n) __attribute__((noinline));
void
run(size_t nitr, long) __attribute__((noinline));
long
fib(long n)
{
return (n < 2) ? n : fib(n - 1) + fib(n - 2);
}
void
run(size_t nitr, long n)
{
#if defined(USE_LOCKS)
for(size_t i = 0; i < nitr; ++i)
{
auto _v = fib(n);
auto_lock_t _lk{ mtx };
total += _v;
}
#else
long local = 0;
for(size_t i = 0; i < nitr; ++i)
local += fib(n);
total += local;
#endif
}
int
main(int argc, char** argv)
{
std::string _name = argv[0];
auto _pos = _name.find_last_of('/');
if(_pos != std::string::npos) _name = _name.substr(_pos + 1);
size_t nthread = std::min<size_t>(16, std::thread::hardware_concurrency());
size_t nitr = 50000;
long nfib = 10;
if(argc > 1) nfib = atol(argv[1]);
if(argc > 2) nthread = atol(argv[2]);
if(argc > 3) nitr = atol(argv[3]);
printf("\n[%s] Threads: %zu\n[%s] Iterations: %zu\n[%s] fibonacci(%li)...\n",
_name.c_str(), nthread, _name.c_str(), nitr, _name.c_str(), nfib);
std::vector<std::thread> threads{};
for(size_t i = 0; i < nthread; ++i)
{
size_t _nitr = ((i % 2) == 1) ? (nitr - (0.1 * nitr)) : (nitr + (0.1 * nitr));
_nitr = std::max<size_t>(_nitr, 1);
threads.emplace_back(&run, _nitr, nfib);
}
#if !defined(USE_LOCKS)
auto _nitr = std::max<size_t>(nitr - 0.25 * nitr, 1);
run(_nitr, nfib - 0.1 * nfib);
#endif
for(auto& itr : threads)
itr.join();
printf("[%s] fibonacci(%li) x %lu = %li\n", _name.c_str(), nfib, nthread,
static_cast<long>(total));
return 0;
}