0d5f0fb9cf
* Parallel overhead example with locks
* Support tracing mutex locking + more
- support wrapping pthread_mutex_lock
- support wrapping pthread_mutex_unlock
- support wrapping pthread_mutex_trylock
- get_perfetto_combined_traces setting
- OMNITRACE_TRACE_THREAD_LOCKS option
- ThreadState
- critical trace includes queue id
- enabled/disabled settings in timemory
- fix OMNITRACE_TIMEMORY_COMPONENTS
- fix reading config
- fix setting categories
- applied ThreadState::Internal in various places
- utility::get_filled_array
- utility::get_reserved_vector
- utility::get_thread_index
- fork_gotcha messages about forks
- split out some pthread_gotcha functionality into pthread_create_gotcha
- handle queue id in roctracer callbacks
* Update timemory and PTL submodules
* Misc CMake updates
- Includes fix to omnitrace-static-lib{gcc,stdcxx}
* Misc cleanup to pthread_mutex_gotcha and backtrace
* Fix to duplicate field in module_function json
* Improvement to debug messages
* omnitrace-dl and common improvements
- tweak to delimit
- common::ignore message
- common::join quoting of strings
- omnitrace_set_env ignores if inited and active
- omnitrace_set_mpi ignores if inited and active
* nsync for transpose example
* Fix to thread_deleter<void> functor invoke
* Fix thread state and HIP stream enums
[ROCm/rocprofiler-systems commit: b208047741]
87 строки
1.9 KiB
C++
87 строки
1.9 KiB
C++
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <string>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#if defined(USE_LOCKS)
|
|
# include <mutex>
|
|
using auto_lock_t = std::unique_lock<std::mutex>;
|
|
long total = 0;
|
|
std::mutex mtx{};
|
|
#else
|
|
# include <atomic>
|
|
std::atomic<long> total{ 0 };
|
|
#endif
|
|
|
|
long
|
|
fib(long n) __attribute__((noinline));
|
|
|
|
void
|
|
run(size_t nitr, long) __attribute__((noinline));
|
|
|
|
long
|
|
fib(long n)
|
|
{
|
|
return (n < 2) ? n : fib(n - 1) + fib(n - 2);
|
|
}
|
|
|
|
void
|
|
run(size_t nitr, long n)
|
|
{
|
|
#if defined(USE_LOCKS)
|
|
for(size_t i = 0; i < nitr; ++i)
|
|
{
|
|
auto _v = fib(n);
|
|
auto_lock_t _lk{ mtx };
|
|
total += _v;
|
|
}
|
|
#else
|
|
long local = 0;
|
|
for(size_t i = 0; i < nitr; ++i)
|
|
local += fib(n);
|
|
total += local;
|
|
#endif
|
|
}
|
|
|
|
int
|
|
main(int argc, char** argv)
|
|
{
|
|
std::string _name = argv[0];
|
|
auto _pos = _name.find_last_of('/');
|
|
if(_pos != std::string::npos) _name = _name.substr(_pos + 1);
|
|
|
|
size_t nthread = std::min<size_t>(16, std::thread::hardware_concurrency());
|
|
size_t nitr = 50000;
|
|
long nfib = 10;
|
|
|
|
if(argc > 1) nfib = atol(argv[1]);
|
|
if(argc > 2) nthread = atol(argv[2]);
|
|
if(argc > 3) nitr = atol(argv[3]);
|
|
|
|
printf("\n[%s] Threads: %zu\n[%s] Iterations: %zu\n[%s] fibonacci(%li)...\n",
|
|
_name.c_str(), nthread, _name.c_str(), nitr, _name.c_str(), nfib);
|
|
|
|
std::vector<std::thread> threads{};
|
|
for(size_t i = 0; i < nthread; ++i)
|
|
{
|
|
size_t _nitr = ((i % 2) == 1) ? (nitr - (0.1 * nitr)) : (nitr + (0.1 * nitr));
|
|
_nitr = std::max<size_t>(_nitr, 1);
|
|
threads.emplace_back(&run, _nitr, nfib);
|
|
}
|
|
|
|
#if !defined(USE_LOCKS)
|
|
auto _nitr = std::max<size_t>(nitr - 0.25 * nitr, 1);
|
|
run(_nitr, nfib - 0.1 * nfib);
|
|
#endif
|
|
|
|
for(auto& itr : threads)
|
|
itr.join();
|
|
|
|
printf("[%s] fibonacci(%li) x %lu = %li\n", _name.c_str(), nfib, nthread,
|
|
static_cast<long>(total));
|
|
|
|
return 0;
|
|
}
|