Fix the main library stop routine for timemory (#39)
* Fix the main library stop routine for timemory
- the main pop_timemory function was popping too many calls
- this primarily affected recursive calls
* Lengthen the timeout for the Configure CMake step
* Fix python tests
- new validate-timemory-json.py script
* Documentation update
- Call-counts in timemory output examples in documentation were affected by the changes
* Fix the per-thread metrics during finalization
- pthread_create_mutex starts/stops the per-thread data
- removed unintentional continue statement
* Docs tweaks
* Fix lap counter on per-thread metrics
[ROCm/rocprofiler-systems commit: a142b2029d]
This commit is contained in:
committato da
GitHub
parent
ad9fd4b7ec
commit
aeab36501a
@@ -66,7 +66,7 @@ jobs:
|
||||
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure CMake
|
||||
timeout-minutes: 5
|
||||
timeout-minutes: 10
|
||||
run:
|
||||
cmake --version &&
|
||||
if [ -z "${{ matrix.mpi }}" ]; then USE_MPI=OFF; else USE_MPI=ON; fi &&
|
||||
|
||||
+1
-1
@@ -48,7 +48,7 @@ jobs:
|
||||
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:/opt/dyninst/lib:/opt/elfutils/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure CMake
|
||||
timeout-minutes: 5
|
||||
timeout-minutes: 10
|
||||
run:
|
||||
git config --global --add safe.directory ${PWD} &&
|
||||
cmake --version &&
|
||||
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure CMake
|
||||
timeout-minutes: 5
|
||||
timeout-minutes: 10
|
||||
run:
|
||||
git config --global --add safe.directory ${PWD} &&
|
||||
cmake --version &&
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure CMake
|
||||
timeout-minutes: 5
|
||||
timeout-minutes: 10
|
||||
run:
|
||||
cmake --version &&
|
||||
if [ -z "${{ matrix.mpi }}" ]; then USE_MPI=OFF; else USE_MPI=ON; fi &&
|
||||
|
||||
@@ -1 +1 @@
|
||||
1.1.0
|
||||
1.1.1
|
||||
|
||||
@@ -34,6 +34,7 @@ manage extensions, resources, data, etc.
|
||||
|
||||
- Built-in MPI support
|
||||
- Kokkos-Tools support
|
||||
- OpenMP-Tools (OMPT)
|
||||
|
||||
### GPU Metrics
|
||||
|
||||
@@ -66,7 +67,6 @@ manage extensions, resources, data, etc.
|
||||
|
||||
### Third-party API support
|
||||
|
||||
- OpenMP-Tools (OMPT)
|
||||
- TAU
|
||||
- LIKWID
|
||||
- Caliper
|
||||
|
||||
@@ -124,26 +124,26 @@ Using `omnitrace-python ./example.py` with `OMNITRACE_USE_TIMEMORY=ON` and `OMNI
|
||||
| LABEL | COUNT | DEPTH | METRIC | SUM |
|
||||
|---------------------------------------------------|--------|--------|------------|--------|
|
||||
| |0>>> run | 1 | 0 | trip_count | 1 |
|
||||
| |0>>> |_fib | 10946 | 1 | trip_count | 10946 |
|
||||
| |0>>> |_fib | 4181 | 2 | trip_count | 4181 |
|
||||
| |0>>> |_fib | 2584 | 3 | trip_count | 2584 |
|
||||
| |0>>> |_fib | 1597 | 4 | trip_count | 1597 |
|
||||
| |0>>> |_fib | 987 | 5 | trip_count | 987 |
|
||||
| |0>>> |_fib | 610 | 6 | trip_count | 610 |
|
||||
| |0>>> |_fib | 377 | 7 | trip_count | 377 |
|
||||
| |0>>> |_fib | 233 | 8 | trip_count | 233 |
|
||||
| |0>>> |_fib | 144 | 9 | trip_count | 144 |
|
||||
| |0>>> |_fib | 89 | 10 | trip_count | 89 |
|
||||
| |0>>> |_fib | 55 | 11 | trip_count | 55 |
|
||||
| |0>>> |_fib | 34 | 12 | trip_count | 34 |
|
||||
| |0>>> |_fib | 21 | 13 | trip_count | 21 |
|
||||
| |0>>> |_fib | 13 | 14 | trip_count | 13 |
|
||||
| |0>>> |_fib | 8 | 15 | trip_count | 8 |
|
||||
| |0>>> |_fib | 5 | 16 | trip_count | 5 |
|
||||
| |0>>> |_fib | 3 | 17 | trip_count | 3 |
|
||||
| |0>>> |_fib | 2 | 18 | trip_count | 2 |
|
||||
| |0>>> |_fib | 1 | 19 | trip_count | 1 |
|
||||
| |0>>> |_fib | 1 | 20 | trip_count | 1 |
|
||||
| |0>>> |_fib | 1 | 1 | trip_count | 1 |
|
||||
| |0>>> |_fib | 2 | 2 | trip_count | 2 |
|
||||
| |0>>> |_fib | 4 | 3 | trip_count | 4 |
|
||||
| |0>>> |_fib | 8 | 4 | trip_count | 8 |
|
||||
| |0>>> |_fib | 16 | 5 | trip_count | 16 |
|
||||
| |0>>> |_fib | 32 | 6 | trip_count | 32 |
|
||||
| |0>>> |_fib | 64 | 7 | trip_count | 64 |
|
||||
| |0>>> |_fib | 128 | 8 | trip_count | 128 |
|
||||
| |0>>> |_fib | 256 | 9 | trip_count | 256 |
|
||||
| |0>>> |_fib | 512 | 10 | trip_count | 512 |
|
||||
| |0>>> |_fib | 1024 | 11 | trip_count | 1024 |
|
||||
| |0>>> |_fib | 2026 | 12 | trip_count | 2026 |
|
||||
| |0>>> |_fib | 3632 | 13 | trip_count | 3632 |
|
||||
| |0>>> |_fib | 5020 | 14 | trip_count | 5020 |
|
||||
| |0>>> |_fib | 4760 | 15 | trip_count | 4760 |
|
||||
| |0>>> |_fib | 2942 | 16 | trip_count | 2942 |
|
||||
| |0>>> |_fib | 1152 | 17 | trip_count | 1152 |
|
||||
| |0>>> |_fib | 274 | 18 | trip_count | 274 |
|
||||
| |0>>> |_fib | 36 | 19 | trip_count | 36 |
|
||||
| |0>>> |_fib | 2 | 20 | trip_count | 2 |
|
||||
| |0>>> |_inefficient | 1 | 1 | trip_count | 1 |
|
||||
|-------------------------------------------------------------------------------------------|
|
||||
```
|
||||
@@ -205,26 +205,26 @@ The results for both of the source-level instrumentation modes are identical to
|
||||
| LABEL | COUNT | DEPTH | METRIC | SUM |
|
||||
|---------------------------------------------------|--------|--------|------------|--------|
|
||||
| |0>>> run | 1 | 0 | trip_count | 1 |
|
||||
| |0>>> |_fib | 10946 | 1 | trip_count | 10946 |
|
||||
| |0>>> |_fib | 4181 | 2 | trip_count | 4181 |
|
||||
| |0>>> |_fib | 2584 | 3 | trip_count | 2584 |
|
||||
| |0>>> |_fib | 1597 | 4 | trip_count | 1597 |
|
||||
| |0>>> |_fib | 987 | 5 | trip_count | 987 |
|
||||
| |0>>> |_fib | 610 | 6 | trip_count | 610 |
|
||||
| |0>>> |_fib | 377 | 7 | trip_count | 377 |
|
||||
| |0>>> |_fib | 233 | 8 | trip_count | 233 |
|
||||
| |0>>> |_fib | 144 | 9 | trip_count | 144 |
|
||||
| |0>>> |_fib | 89 | 10 | trip_count | 89 |
|
||||
| |0>>> |_fib | 55 | 11 | trip_count | 55 |
|
||||
| |0>>> |_fib | 34 | 12 | trip_count | 34 |
|
||||
| |0>>> |_fib | 21 | 13 | trip_count | 21 |
|
||||
| |0>>> |_fib | 13 | 14 | trip_count | 13 |
|
||||
| |0>>> |_fib | 8 | 15 | trip_count | 8 |
|
||||
| |0>>> |_fib | 5 | 16 | trip_count | 5 |
|
||||
| |0>>> |_fib | 3 | 17 | trip_count | 3 |
|
||||
| |0>>> |_fib | 2 | 18 | trip_count | 2 |
|
||||
| |0>>> |_fib | 1 | 19 | trip_count | 1 |
|
||||
| |0>>> |_fib | 1 | 20 | trip_count | 1 |
|
||||
| |0>>> |_fib | 1 | 1 | trip_count | 1 |
|
||||
| |0>>> |_fib | 2 | 2 | trip_count | 2 |
|
||||
| |0>>> |_fib | 4 | 3 | trip_count | 4 |
|
||||
| |0>>> |_fib | 8 | 4 | trip_count | 8 |
|
||||
| |0>>> |_fib | 16 | 5 | trip_count | 16 |
|
||||
| |0>>> |_fib | 32 | 6 | trip_count | 32 |
|
||||
| |0>>> |_fib | 64 | 7 | trip_count | 64 |
|
||||
| |0>>> |_fib | 128 | 8 | trip_count | 128 |
|
||||
| |0>>> |_fib | 256 | 9 | trip_count | 256 |
|
||||
| |0>>> |_fib | 512 | 10 | trip_count | 512 |
|
||||
| |0>>> |_fib | 1024 | 11 | trip_count | 1024 |
|
||||
| |0>>> |_fib | 2026 | 12 | trip_count | 2026 |
|
||||
| |0>>> |_fib | 3632 | 13 | trip_count | 3632 |
|
||||
| |0>>> |_fib | 5020 | 14 | trip_count | 5020 |
|
||||
| |0>>> |_fib | 4760 | 15 | trip_count | 4760 |
|
||||
| |0>>> |_fib | 2942 | 16 | trip_count | 2942 |
|
||||
| |0>>> |_fib | 1152 | 17 | trip_count | 1152 |
|
||||
| |0>>> |_fib | 274 | 18 | trip_count | 274 |
|
||||
| |0>>> |_fib | 36 | 19 | trip_count | 36 |
|
||||
| |0>>> |_fib | 2 | 20 | trip_count | 2 |
|
||||
| |0>>> |_inefficient | 1 | 1 | trip_count | 1 |
|
||||
|-------------------------------------------------------------------------------------------|
|
||||
```
|
||||
|
||||
@@ -125,7 +125,7 @@ custom_push_region(const char* name)
|
||||
### User API Output
|
||||
|
||||
```console
|
||||
$ omnitrace -l --min-address-range=0 --min-address-range-loop=0 --min-instructions=8 -E custom_push_region -o -- ./user-api
|
||||
$ omnitrace -l --min-instructions=8 -E custom_push_region -o -- ./user-api
|
||||
...
|
||||
$ export OMNITRACE_USE_TIMEMORY=ON
|
||||
$ export OMNITRACE_USE_PID=OFF
|
||||
@@ -150,63 +150,83 @@ Pushing custom region :: initialization
|
||||
[./user-api.inst] Iterations: 100
|
||||
[./user-api.inst] fibonacci(20)...
|
||||
Pushing custom region :: thread_creation
|
||||
Pushing custom region :: run(20) x 100
|
||||
Pushing custom region :: thread_wait
|
||||
Pushing custom region :: run(20) x 100
|
||||
Pushing custom region :: run(20) x 100
|
||||
Pushing custom region :: run(20) x 100
|
||||
Pushing custom region :: run(20) x 100
|
||||
Pushing custom region :: run(20) x 100
|
||||
[./user-api.inst] fibonacci(20) x 4 = 3382500
|
||||
[omnitrace][86267][0][omnitrace_finalize] finalizing...
|
||||
|
||||
|
||||
[omnitrace][2637959][0] omnitrace : 2.716905 sec wall_clock, 1.216 mb peak_rss, 3.680000 sec cpu_clock, 135.4 % cpu_util [laps: 1]
|
||||
[omnitrace][2637959][0] user-api.inst/thread-0 : 2.715708 sec wall_clock, 2.354223 sec thread_cpu_clock, 86.7 % thread_cpu_util, 1.216 mb peak_rss [laps: 1]
|
||||
[omnitrace][2637959][0] user-api.inst/thread-1 : 0.329802 sec wall_clock, 0.329739 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.000 mb peak_rss [laps: 1]
|
||||
[omnitrace][2637959][0] user-api.inst/thread-2 : 0.355981 sec wall_clock, 0.335795 sec thread_cpu_clock, 94.3 % thread_cpu_util, 0.528 mb peak_rss [laps: 1]
|
||||
[omnitrace][2637959][0] user-api.inst/thread-3 : 0.341329 sec wall_clock, 0.331214 sec thread_cpu_clock, 97.0 % thread_cpu_util, 0.456 mb peak_rss [laps: 1]
|
||||
[omnitrace][2637959][0] user-api.inst/thread-4 : 0.360631 sec wall_clock, 0.330374 sec thread_cpu_clock, 91.6 % thread_cpu_util, 0.600 mb peak_rss [laps: 1]
|
||||
[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.json'...
|
||||
[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.tree.json'...
|
||||
[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.txt'...
|
||||
[omnitrace][86267][0] omnitrace : 5.190895 sec wall_clock, 2.748 mb peak_rss, 6.330000 sec cpu_clock, 121.9 % cpu_util [laps: 1]
|
||||
[omnitrace][86267][0] user-api.inst/thread-0 : 5.078713 sec wall_clock, 4.722415 sec thread_cpu_clock, 93.0 % thread_cpu_util, 1.276 mb peak_rss [laps: 1]
|
||||
[omnitrace][86267][0] user-api.inst/thread-1 : 0.322248 sec wall_clock, 0.322191 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.000 mb peak_rss [laps: 1]
|
||||
[omnitrace][86267][0] user-api.inst/thread-2 : 0.323255 sec wall_clock, 0.323194 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.000 mb peak_rss [laps: 1]
|
||||
[omnitrace][86267][0] user-api.inst/thread-3 : 0.323569 sec wall_clock, 0.323484 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.092 mb peak_rss [laps: 1]
|
||||
[omnitrace][86267][0] user-api.inst/thread-4 : 0.324178 sec wall_clock, 0.324057 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.184 mb peak_rss [laps: 1]
|
||||
[omnitrace][86267][0] Post-processing 51 cpu frequency and memory usage entries...
|
||||
|
||||
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.json'...
|
||||
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.tree.json'...
|
||||
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.txt'...
|
||||
|
||||
[metadata::manager::finalize]> Outputting 'omnitrace-example-output/metadata.json' and 'omnitrace-example-output/functions.json'...
|
||||
[omnitrace][manager::finalize][metadata]> Outputting 'omnitrace-user-api.inst-output/metadata.json' and 'omnitrace-user-api.inst-output/functions.json'...
|
||||
[omnitrace][86267][0][omnitrace_finalize] Finalized
|
||||
$ cat omnitrace-example-output/wall_clock.txt
|
||||
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| REAL-CLOCK TIMER (I.E. WALL-CLOCK TIMER) |
|
||||
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| LABEL | COUNT | DEPTH | METRIC | UNITS | SUM | MEAN | MIN | MAX | VAR | STDDEV | % SELF |
|
||||
|-----------------------------------------------------------------------|---------|--------|------------|--------|----------|----------|----------|----------|----------|----------|--------|
|
||||
| |0>>> ./user-api.inst | 1 | 0 | wall_clock | sec | 2.715611 | 2.715611 | 2.715611 | 2.715611 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |0>>> |_initialization | 1 | 1 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_thread_creation | 1 | 1 | wall_clock | sec | 0.000170 | 0.000170 | 0.000170 | 0.000170 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_thread_wait | 1 | 1 | wall_clock | sec | 0.360751 | 0.360751 | 0.360751 | 0.360751 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |1>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.329472 | 0.329472 | 0.329472 | 0.329472 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |3>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.331028 | 0.331028 | 0.331028 | 0.331028 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |2>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.335554 | 0.335554 | 0.335554 | 0.335554 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |4>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.330220 | 0.330220 | 0.330220 | 0.330220 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_run | 1 | 1 | wall_clock | sec | 2.354618 | 2.354618 | 2.354618 | 2.354618 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |0>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 2.354600 | 2.354600 | 2.354600 | 2.354600 | 0.000000 | 0.000000 | 48.3 |
|
||||
| |0>>> |_fib | 1094600 | 3 | wall_clock | sec | 1.217671 | 0.000001 | 0.000000 | 0.000055 | 0.000000 | 0.000002 | 41.3 |
|
||||
| |0>>> |_fib | 418100 | 4 | wall_clock | sec | 0.714197 | 0.000002 | 0.000000 | 0.000050 | 0.000000 | 0.000002 | 38.1 |
|
||||
| |0>>> |_fib | 258400 | 5 | wall_clock | sec | 0.441874 | 0.000002 | 0.000000 | 0.000047 | 0.000000 | 0.000002 | 37.9 |
|
||||
| |0>>> |_fib | 159700 | 6 | wall_clock | sec | 0.274224 | 0.000002 | 0.000000 | 0.000044 | 0.000000 | 0.000002 | 37.9 |
|
||||
| |0>>> |_fib | 98700 | 7 | wall_clock | sec | 0.170399 | 0.000002 | 0.000000 | 0.000042 | 0.000000 | 0.000002 | 37.7 |
|
||||
| |0>>> |_fib | 61000 | 8 | wall_clock | sec | 0.106093 | 0.000002 | 0.000000 | 0.000039 | 0.000000 | 0.000002 | 37.5 |
|
||||
| |0>>> |_fib | 37700 | 9 | wall_clock | sec | 0.066316 | 0.000002 | 0.000000 | 0.000036 | 0.000000 | 0.000002 | 40.2 |
|
||||
| |0>>> |_fib | 23300 | 10 | wall_clock | sec | 0.039640 | 0.000002 | 0.000000 | 0.000033 | 0.000000 | 0.000002 | 38.2 |
|
||||
| |0>>> |_fib | 14400 | 11 | wall_clock | sec | 0.024504 | 0.000002 | 0.000000 | 0.000030 | 0.000000 | 0.000002 | 37.9 |
|
||||
| |0>>> |_fib | 8900 | 12 | wall_clock | sec | 0.015219 | 0.000002 | 0.000000 | 0.000027 | 0.000000 | 0.000002 | 38.1 |
|
||||
| |0>>> |_fib | 5500 | 13 | wall_clock | sec | 0.009417 | 0.000002 | 0.000000 | 0.000024 | 0.000000 | 0.000002 | 38.3 |
|
||||
| |0>>> |_fib | 3400 | 14 | wall_clock | sec | 0.005806 | 0.000002 | 0.000000 | 0.000021 | 0.000000 | 0.000002 | 38.4 |
|
||||
| |0>>> |_fib | 2100 | 15 | wall_clock | sec | 0.003576 | 0.000002 | 0.000000 | 0.000019 | 0.000000 | 0.000002 | 38.4 |
|
||||
| |0>>> |_fib | 1300 | 16 | wall_clock | sec | 0.002201 | 0.000002 | 0.000000 | 0.000016 | 0.000000 | 0.000002 | 40.3 |
|
||||
| |0>>> |_fib | 800 | 17 | wall_clock | sec | 0.001315 | 0.000002 | 0.000000 | 0.000014 | 0.000000 | 0.000002 | 42.1 |
|
||||
| |0>>> |_fib | 500 | 18 | wall_clock | sec | 0.000762 | 0.000002 | 0.000000 | 0.000010 | 0.000000 | 0.000001 | 42.1 |
|
||||
| |0>>> |_fib | 300 | 19 | wall_clock | sec | 0.000441 | 0.000001 | 0.000000 | 0.000008 | 0.000000 | 0.000001 | 47.8 |
|
||||
| |0>>> |_fib | 200 | 20 | wall_clock | sec | 0.000230 | 0.000001 | 0.000000 | 0.000006 | 0.000000 | 0.000001 | 49.0 |
|
||||
| |0>>> |_fib | 100 | 21 | wall_clock | sec | 0.000117 | 0.000001 | 0.000001 | 0.000003 | 0.000000 | 0.000000 | 84.5 |
|
||||
| |0>>> |_fib | 100 | 22 | wall_clock | sec | 0.000018 | 0.000000 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> std::vector<std::thread, std::allocator<std::thread> >::~vector | 1 | 0 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| REAL-CLOCK TIMER (I.E. WALL-CLOCK TIMER) |
|
||||
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| LABEL | COUNT | DEPTH | METRIC | UNITS | SUM | MEAN | MIN | MAX | VAR | STDDEV | % SELF |
|
||||
|---------------------------------------------------------------------------------|--------|--------|------------|--------|----------|----------|----------|----------|----------|----------|--------|
|
||||
| |0>>> ./user-api.inst | 1 | 0 | wall_clock | sec | 5.078521 | 5.078521 | 5.078521 | 5.078521 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |0>>> |_initialization | 1 | 1 | wall_clock | sec | 0.000004 | 0.000004 | 0.000004 | 0.000004 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_thread_creation | 1 | 1 | wall_clock | sec | 0.000159 | 0.000159 | 0.000159 | 0.000159 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_thread_wait | 1 | 1 | wall_clock | sec | 0.355307 | 0.355307 | 0.355307 | 0.355307 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |0>>> |_std::vector<std::thread, std::allocator<std::thread> >::begin | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::vector<std::thread, std::allocator<std::thread> >::end | 1 | 2 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_pthread_join | 4 | 2 | wall_clock | sec | 0.355257 | 0.088814 | 0.000001 | 0.333144 | 0.026559 | 0.162970 | 100.0 |
|
||||
| |2>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000032 | 0.000032 | 0.000032 | 0.000032 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |1>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000036 | 0.000036 | 0.000036 | 0.000036 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |3>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000034 | 0.000034 | 0.000034 | 0.000034 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |4>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000039 | 0.000039 | 0.000039 | 0.000039 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_run | 1 | 1 | wall_clock | sec | 4.722993 | 4.722993 | 4.722993 | 4.722993 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |0>>> |_std::char_traits<char>::length | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::distance<char const*> | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::operator+<char, std::char_traits<char>, std::allocator<char> > | 2 | 2 | wall_clock | sec | 0.000002 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 4.722951 | 4.722951 | 4.722951 | 4.722951 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |0>>> |_run [{94,25}-{96,25}] | 1 | 3 | wall_clock | sec | 4.722925 | 4.722925 | 4.722925 | 4.722925 | 0.000000 | 0.000000 | 0.0 |
|
||||
| |0>>> |_fib | 100 | 4 | wall_clock | sec | 4.722718 | 0.047227 | 0.046713 | 0.051987 | 0.000000 | 0.000625 | 0.0 |
|
||||
| |0>>> |_fib | 200 | 5 | wall_clock | sec | 4.722302 | 0.023612 | 0.017827 | 0.034091 | 0.000032 | 0.005627 | 0.0 |
|
||||
| |0>>> |_fib | 400 | 6 | wall_clock | sec | 4.721485 | 0.011804 | 0.006790 | 0.023003 | 0.000016 | 0.004024 | 0.0 |
|
||||
| |0>>> |_fib | 800 | 7 | wall_clock | sec | 4.719858 | 0.005900 | 0.002564 | 0.016078 | 0.000006 | 0.002498 | 0.1 |
|
||||
| |0>>> |_fib | 1600 | 8 | wall_clock | sec | 4.716572 | 0.002948 | 0.000977 | 0.011849 | 0.000002 | 0.001465 | 0.1 |
|
||||
| |0>>> |_fib | 3200 | 9 | wall_clock | sec | 4.709918 | 0.001472 | 0.000371 | 0.008246 | 0.000001 | 0.000831 | 0.3 |
|
||||
| |0>>> |_fib | 6400 | 10 | wall_clock | sec | 4.696775 | 0.000734 | 0.000140 | 0.005111 | 0.000000 | 0.000461 | 0.6 |
|
||||
| |0>>> |_fib | 12800 | 11 | wall_clock | sec | 4.670093 | 0.000365 | 0.000050 | 0.003166 | 0.000000 | 0.000253 | 1.1 |
|
||||
| |0>>> |_fib | 25600 | 12 | wall_clock | sec | 4.617496 | 0.000180 | 0.000017 | 0.001959 | 0.000000 | 0.000137 | 2.3 |
|
||||
| |0>>> |_fib | 51200 | 13 | wall_clock | sec | 4.512671 | 0.000088 | 0.000004 | 0.001212 | 0.000000 | 0.000074 | 4.6 |
|
||||
| |0>>> |_fib | 102400 | 14 | wall_clock | sec | 4.304142 | 0.000042 | 0.000000 | 0.000752 | 0.000000 | 0.000039 | 9.6 |
|
||||
| |0>>> |_fib | 202600 | 15 | wall_clock | sec | 3.892580 | 0.000019 | 0.000000 | 0.000469 | 0.000000 | 0.000021 | 19.0 |
|
||||
| |0>>> |_fib | 363200 | 16 | wall_clock | sec | 3.151143 | 0.000009 | 0.000000 | 0.000293 | 0.000000 | 0.000011 | 33.2 |
|
||||
| |0>>> |_fib | 502000 | 17 | wall_clock | sec | 2.105217 | 0.000004 | 0.000000 | 0.000183 | 0.000000 | 0.000006 | 49.1 |
|
||||
| |0>>> |_fib | 476000 | 18 | wall_clock | sec | 1.071652 | 0.000002 | 0.000000 | 0.000114 | 0.000000 | 0.000004 | 63.6 |
|
||||
| |0>>> |_fib | 294200 | 19 | wall_clock | sec | 0.390193 | 0.000001 | 0.000000 | 0.000071 | 0.000000 | 0.000003 | 75.3 |
|
||||
| |0>>> |_fib | 115200 | 20 | wall_clock | sec | 0.096190 | 0.000001 | 0.000000 | 0.000043 | 0.000000 | 0.000002 | 84.4 |
|
||||
| |0>>> |_fib | 27400 | 21 | wall_clock | sec | 0.015020 | 0.000001 | 0.000000 | 0.000025 | 0.000000 | 0.000001 | 91.1 |
|
||||
| |0>>> |_fib | 3600 | 22 | wall_clock | sec | 0.001336 | 0.000000 | 0.000000 | 0.000013 | 0.000000 | 0.000001 | 96.3 |
|
||||
| |0>>> |_fib | 200 | 23 | wall_clock | sec | 0.000050 | 0.000000 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::char_traits<char>::length | 1 | 3 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::distance<char const*> | 1 | 3 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::operator+<char, std::char_traits<char>, std::allocator<char> > | 2 | 3 | wall_clock | sec | 0.000001 | 0.000001 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::operator& | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> std::vector<std::thread, std::allocator<std::thread> >::~vector | 1 | 0 | wall_clock | sec | 0.000045 | 0.000045 | 0.000045 | 0.000045 | 0.000000 | 0.000000 | 32.7 |
|
||||
| |0>>> |_std::thread::~thread | 4 | 1 | wall_clock | sec | 0.000030 | 0.000007 | 0.000007 | 0.000009 | 0.000000 | 0.000001 | 31.2 |
|
||||
| |0>>> |_std::thread::joinable | 4 | 2 | wall_clock | sec | 0.000021 | 0.000005 | 0.000005 | 0.000006 | 0.000000 | 0.000001 | 89.4 |
|
||||
| |0>>> |_std::thread::id::id | 4 | 3 | wall_clock | sec | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::operator== | 4 | 3 | wall_clock | sec | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::allocator_traits<std::allocator<std::thread> >::deallocate | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
| |0>>> |_std::allocator<std::thread>::~allocator | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|
||||
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
```
|
||||
|
||||
@@ -696,8 +696,10 @@ omnitrace_init_tooling_hidden()
|
||||
if(get_state() != State::Finalized)
|
||||
{
|
||||
if(get_use_sampling()) sampling::shutdown();
|
||||
if(thread_data<omnitrace_thread_bundle_t>::instance())
|
||||
thread_data<omnitrace_thread_bundle_t>::instance()->stop();
|
||||
auto& _thr_bundle = thread_data<omnitrace_thread_bundle_t>::instance();
|
||||
if(_thr_bundle && _thr_bundle->get<comp::wall_clock>() &&
|
||||
_thr_bundle->get<comp::wall_clock>()->get_is_running())
|
||||
_thr_bundle->stop();
|
||||
}
|
||||
} };
|
||||
(void) _thread_setup;
|
||||
@@ -744,6 +746,7 @@ omnitrace_init_tooling_hidden()
|
||||
_data.allocator.destroy(_v);
|
||||
_data.allocator.deallocate(_v, 1);
|
||||
_data.bundles.erase(_data.bundles.begin() + (i - 1));
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -1122,7 +1125,6 @@ omnitrace_finalize_hidden(void)
|
||||
if(itr && itr->get<comp::wall_clock>() &&
|
||||
!itr->get<comp::wall_clock>()->get_is_running())
|
||||
{
|
||||
continue;
|
||||
std::string _msg = JOIN("", *itr);
|
||||
auto _pos = _msg.find(">>> ");
|
||||
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
|
||||
|
||||
+13
@@ -109,6 +109,8 @@ pthread_create_gotcha::wrapper::wrapper(routine_t _routine, void* _arg,
|
||||
void*
|
||||
pthread_create_gotcha::wrapper::operator()() const
|
||||
{
|
||||
using thread_bundle_data_t = thread_data<omnitrace_thread_bundle_t>;
|
||||
|
||||
if(is_shutdown && *is_shutdown)
|
||||
{
|
||||
if(m_promise) m_promise->set_value();
|
||||
@@ -134,6 +136,10 @@ pthread_create_gotcha::wrapper::operator()() const
|
||||
if(_tid >= 0)
|
||||
{
|
||||
get_execution_time(_tid)->second = comp::wall_clock::record();
|
||||
auto& _thr_bundle = thread_bundle_data_t::instance();
|
||||
if(_thr_bundle && _thr_bundle->get<comp::wall_clock>() &&
|
||||
_thr_bundle->get<comp::wall_clock>()->get_is_running())
|
||||
_thr_bundle->stop();
|
||||
pthread_create_gotcha::shutdown(_tid);
|
||||
}
|
||||
set_thread_state(ThreadState::Completed);
|
||||
@@ -146,6 +152,13 @@ pthread_create_gotcha::wrapper::operator()() const
|
||||
{
|
||||
_tid = threading::get_id();
|
||||
threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str());
|
||||
if(!thread_bundle_data_t::instances().at(_tid))
|
||||
{
|
||||
thread_data<omnitrace_thread_bundle_t>::construct(
|
||||
TIMEMORY_JOIN("", get_exe_name(), "/thread-", threading::get_id()),
|
||||
quirk::config<quirk::auto_start>{});
|
||||
thread_bundle_data_t::instances().at(_tid)->start();
|
||||
}
|
||||
if(bundles && bundles_mutex)
|
||||
{
|
||||
std::unique_lock<std::mutex> _lk{ *bundles_mutex };
|
||||
|
||||
@@ -886,12 +886,15 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
|
||||
|
||||
omnitrace_add_python_test(
|
||||
NAME python-source-check
|
||||
COMMAND ${OMNITRACE_CAT_COMMAND}
|
||||
COMMAND
|
||||
${_PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/validate-timemory-json.py
|
||||
-l run fib fib fib fib fib inefficient _sum -c 5 5 10 20 30 10 5 5 -d 0 1
|
||||
2 3 4 5 1 2 -m trip_count -i
|
||||
PYTHON_VERSION ${_VERSION}
|
||||
FILE omnitrace-tests-output/python-source/${_VERSION}/trip_count.txt
|
||||
PASS_REGEX
|
||||
"(\\\| \\\|0>>> run \\\| 5).*(\\\| \\\|0>>> \\\|_fib \\\| 40).*(\\\| \\\|0>>> \\\|_fib \\\| 5).*(\\\| \\\|0>>> \\\|_inefficient \\\| 5).*(\\\| \\\|0>>> \\\|__sum \\\| 5)"
|
||||
FILE omnitrace-tests-output/python-source/${_VERSION}/trip_count.json
|
||||
DEPENDS python-source-${_VERSION}
|
||||
PASS_REGEX
|
||||
"omnitrace-tests-output/python-source/${_VERSION}/trip_count.json validated"
|
||||
ENVIRONMENT "${_python_environment}")
|
||||
else()
|
||||
omnitrace_message(
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
|
||||
|
||||
def validate_json(data, labels, counts, depths):
|
||||
expected = []
|
||||
for litr, citr, ditr in zip(labels, counts, depths):
|
||||
entry = []
|
||||
_label = litr
|
||||
if ditr > 0:
|
||||
_label = "{}|_{}".format(" " * (ditr - 1), litr)
|
||||
entry = [_label, citr, ditr]
|
||||
expected.append(entry)
|
||||
|
||||
for ditr, eitr in zip(data, expected):
|
||||
_prefix = ditr["prefix"]
|
||||
_depth = ditr["depth"]
|
||||
_count = ditr["entry"]["laps"]
|
||||
_idx = _prefix.find(">>>")
|
||||
if _idx is not None:
|
||||
_prefix = _prefix[(_idx + 4) :]
|
||||
if _prefix != eitr[0]:
|
||||
raise RuntimeError(f"Mismatched prefix: {_prefix} vs. {eitr[0]}")
|
||||
if _count != eitr[1]:
|
||||
raise RuntimeError(f"Mismatched depth: {_depth} vs. {eitr[2]}")
|
||||
if _depth != eitr[2]:
|
||||
raise RuntimeError(f"Mismatched depth: {_depth} vs. {eitr[2]}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--metric", type=str, help="JSON metric", required=True)
|
||||
parser.add_argument(
|
||||
"-l", "--labels", nargs="+", type=str, help="Expected labels", default=[]
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c", "--counts", nargs="+", type=int, help="Expected counts", default=[]
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--depths", nargs="+", type=int, help="Expected depths", default=[]
|
||||
)
|
||||
parser.add_argument("-i", "--input", type=str, help="Input file", required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if len(args.labels) != len(args.counts) or len(args.labels) != len(args.depths):
|
||||
raise RuntimeError(
|
||||
"The same number of labels, counts, and depths must be specified"
|
||||
)
|
||||
|
||||
ret = 0
|
||||
with open(args.input) as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
validate_json(
|
||||
data["timemory"][args.metric]["ranks"][0]["graph"],
|
||||
args.labels,
|
||||
args.counts,
|
||||
args.depths,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
print(f"{e}")
|
||||
ret = 1
|
||||
if ret == 0:
|
||||
print(f"{args.input} validated")
|
||||
sys.exit(ret)
|
||||
Fai riferimento in un nuovo problema
Block a user