6825578603
* various tweaks
* build updates + cleanup + overlap guard + min addr range
* Library source reorg + miscellaneous tweaks
* Removed unnecessary fwd decls
* Print address range in --print-X pair mode
- hosttrace modifications
- disable instrumenting functions with overlapping sections or multiple entry points by default (control via --allow-overlapping option)
- disable instrumenting functions whose address range < 512 bytes unless a loop is present by default (control via --min-address-range option)
- disable instrumenting functions w/ loops whose address range < 64 bytes (control via --min-loop-address-range)
- Support for wrapping MPI function calls even in binary rewrite mode
- e.g. use gotcha to wrap MPI functions with hosttrace_push_trace and hosttrace_pop_trace
- New timemory only mode --> HOSTTRACE_USE_TIMEMORY=ON
- New timemory + perfetto mode --> HOSTTRACE_USE_PERFETTO=ON + HOSTTRACE_USE_TIMEMORY=ON
- Full support for all timemory components
- parallel-overhead example for measuring the overhead in a MT-parallelized application with very small instrumentation functions
- improvements to output directories for hosttrace exe
- improvements to output directories for hosttrace library
- new hosttrace options
- --print-instrumented <type> prints out the instrumented entities and exits
- --print-available <type> prints out the available instrumentation entities and exits
- --print-overlapping <type> prints out the overlapping entities and exits
- NOTE: <type> above refers to the information printed out, e.g. module name vs. function name vs. module and function name, etc.
[ROCm/rocprofiler-systems commit: 1f15b3070f]
53 строки
958 B
C++
53 строки
958 B
C++
|
|
#include <atomic>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
std::atomic<long> total{ 0 };
|
|
long
|
|
fib(long n) __attribute__((noinline));
|
|
void
|
|
run(size_t nitr, long) __attribute__((noinline));
|
|
|
|
long
|
|
fib(long n)
|
|
{
|
|
return (n < 2) ? n : fib(n - 1) + fib(n - 2);
|
|
}
|
|
|
|
void
|
|
run(size_t nitr, long n)
|
|
{
|
|
long local = 0;
|
|
for(size_t i = 0; i < nitr; ++i)
|
|
local += fib(n);
|
|
total += local;
|
|
}
|
|
|
|
int
|
|
main(int argc, char** argv)
|
|
{
|
|
size_t nthread = 16;
|
|
size_t nitr = 50000;
|
|
long nfib = 10;
|
|
if(argc > 1)
|
|
nfib = atol(argv[1]);
|
|
if(argc > 2)
|
|
nthread = atol(argv[2]);
|
|
if(argc > 3)
|
|
nitr = atol(argv[3]);
|
|
|
|
std::vector<std::thread> threads{};
|
|
for(size_t i = 0; i < nthread; ++i)
|
|
threads.emplace_back(&run, nitr, nfib);
|
|
|
|
for(auto& itr : threads)
|
|
itr.join();
|
|
|
|
printf("fibonacci(%li) x %lu = %li\n", nfib, nthread, total.load());
|
|
|
|
return 0;
|
|
}
|