Fix the main library stop routine for timemory (#39)

* Fix the main library stop routine for timemory

- the main pop_timemory function was popping too many calls
- this primarily affected recursive calls

* Lengthen the timeout for the Configure CMake step

* Fix python tests

- new validate-timemory-json.py script

* Documentation update

- Call-counts in timemory output examples in documentation were affected by the changes

* Fix the per-thread metrics during finalization

- pthread_create_mutex starts/stops the per-thread data
- removed unintentional continue statement

* Docs tweaks

* Fix lap counter on per-thread metrics

[ROCm/rocprofiler-systems commit: a142b2029d]
This commit is contained in:
Jonathan R. Madsen
2022-06-13 15:57:44 -05:00
committato da GitHub
parent ad9fd4b7ec
commit aeab36501a
12 ha cambiato i file con 209 aggiunte e 102 eliminazioni
@@ -66,7 +66,7 @@ jobs:
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
- name: Configure CMake
timeout-minutes: 5
timeout-minutes: 10
run:
cmake --version &&
if [ -z "${{ matrix.mpi }}" ]; then USE_MPI=OFF; else USE_MPI=ON; fi &&
@@ -48,7 +48,7 @@ jobs:
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:/opt/dyninst/lib:/opt/elfutils/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
- name: Configure CMake
timeout-minutes: 5
timeout-minutes: 10
run:
git config --global --add safe.directory ${PWD} &&
cmake --version &&
@@ -76,7 +76,7 @@ jobs:
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
- name: Configure CMake
timeout-minutes: 5
timeout-minutes: 10
run:
git config --global --add safe.directory ${PWD} &&
cmake --version &&
@@ -40,7 +40,7 @@ jobs:
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
- name: Configure CMake
timeout-minutes: 5
timeout-minutes: 10
run:
cmake --version &&
if [ -z "${{ matrix.mpi }}" ]; then USE_MPI=OFF; else USE_MPI=ON; fi &&
+1 -1
Vedi File
@@ -1 +1 @@
1.1.0
1.1.1
@@ -34,6 +34,7 @@ manage extensions, resources, data, etc.
- Built-in MPI support
- Kokkos-Tools support
- OpenMP-Tools (OMPT)
### GPU Metrics
@@ -66,7 +67,6 @@ manage extensions, resources, data, etc.
### Third-party API support
- OpenMP-Tools (OMPT)
- TAU
- LIKWID
- Caliper
@@ -124,26 +124,26 @@ Using `omnitrace-python ./example.py` with `OMNITRACE_USE_TIMEMORY=ON` and `OMNI
| LABEL | COUNT | DEPTH | METRIC | SUM |
|---------------------------------------------------|--------|--------|------------|--------|
| |0>>> run | 1 | 0 | trip_count | 1 |
| |0>>> |_fib | 10946 | 1 | trip_count | 10946 |
| |0>>> |_fib | 4181 | 2 | trip_count | 4181 |
| |0>>> |_fib | 2584 | 3 | trip_count | 2584 |
| |0>>> |_fib | 1597 | 4 | trip_count | 1597 |
| |0>>> |_fib | 987 | 5 | trip_count | 987 |
| |0>>> |_fib | 610 | 6 | trip_count | 610 |
| |0>>> |_fib | 377 | 7 | trip_count | 377 |
| |0>>> |_fib | 233 | 8 | trip_count | 233 |
| |0>>> |_fib | 144 | 9 | trip_count | 144 |
| |0>>> |_fib | 89 | 10 | trip_count | 89 |
| |0>>> |_fib | 55 | 11 | trip_count | 55 |
| |0>>> |_fib | 34 | 12 | trip_count | 34 |
| |0>>> |_fib | 21 | 13 | trip_count | 21 |
| |0>>> |_fib | 13 | 14 | trip_count | 13 |
| |0>>> |_fib | 8 | 15 | trip_count | 8 |
| |0>>> |_fib | 5 | 16 | trip_count | 5 |
| |0>>> |_fib | 3 | 17 | trip_count | 3 |
| |0>>> |_fib | 2 | 18 | trip_count | 2 |
| |0>>> |_fib | 1 | 19 | trip_count | 1 |
| |0>>> |_fib | 1 | 20 | trip_count | 1 |
| |0>>> |_fib | 1 | 1 | trip_count | 1 |
| |0>>> |_fib | 2 | 2 | trip_count | 2 |
| |0>>> |_fib | 4 | 3 | trip_count | 4 |
| |0>>> |_fib | 8 | 4 | trip_count | 8 |
| |0>>> |_fib | 16 | 5 | trip_count | 16 |
| |0>>> |_fib | 32 | 6 | trip_count | 32 |
| |0>>> |_fib | 64 | 7 | trip_count | 64 |
| |0>>> |_fib | 128 | 8 | trip_count | 128 |
| |0>>> |_fib | 256 | 9 | trip_count | 256 |
| |0>>> |_fib | 512 | 10 | trip_count | 512 |
| |0>>> |_fib | 1024 | 11 | trip_count | 1024 |
| |0>>> |_fib | 2026 | 12 | trip_count | 2026 |
| |0>>> |_fib | 3632 | 13 | trip_count | 3632 |
| |0>>> |_fib | 5020 | 14 | trip_count | 5020 |
| |0>>> |_fib | 4760 | 15 | trip_count | 4760 |
| |0>>> |_fib | 2942 | 16 | trip_count | 2942 |
| |0>>> |_fib | 1152 | 17 | trip_count | 1152 |
| |0>>> |_fib | 274 | 18 | trip_count | 274 |
| |0>>> |_fib | 36 | 19 | trip_count | 36 |
| |0>>> |_fib | 2 | 20 | trip_count | 2 |
| |0>>> |_inefficient | 1 | 1 | trip_count | 1 |
|-------------------------------------------------------------------------------------------|
```
@@ -205,26 +205,26 @@ The results for both of the source-level instrumentation modes are identical to
| LABEL | COUNT | DEPTH | METRIC | SUM |
|---------------------------------------------------|--------|--------|------------|--------|
| |0>>> run | 1 | 0 | trip_count | 1 |
| |0>>> |_fib | 10946 | 1 | trip_count | 10946 |
| |0>>> |_fib | 4181 | 2 | trip_count | 4181 |
| |0>>> |_fib | 2584 | 3 | trip_count | 2584 |
| |0>>> |_fib | 1597 | 4 | trip_count | 1597 |
| |0>>> |_fib | 987 | 5 | trip_count | 987 |
| |0>>> |_fib | 610 | 6 | trip_count | 610 |
| |0>>> |_fib | 377 | 7 | trip_count | 377 |
| |0>>> |_fib | 233 | 8 | trip_count | 233 |
| |0>>> |_fib | 144 | 9 | trip_count | 144 |
| |0>>> |_fib | 89 | 10 | trip_count | 89 |
| |0>>> |_fib | 55 | 11 | trip_count | 55 |
| |0>>> |_fib | 34 | 12 | trip_count | 34 |
| |0>>> |_fib | 21 | 13 | trip_count | 21 |
| |0>>> |_fib | 13 | 14 | trip_count | 13 |
| |0>>> |_fib | 8 | 15 | trip_count | 8 |
| |0>>> |_fib | 5 | 16 | trip_count | 5 |
| |0>>> |_fib | 3 | 17 | trip_count | 3 |
| |0>>> |_fib | 2 | 18 | trip_count | 2 |
| |0>>> |_fib | 1 | 19 | trip_count | 1 |
| |0>>> |_fib | 1 | 20 | trip_count | 1 |
| |0>>> |_fib | 1 | 1 | trip_count | 1 |
| |0>>> |_fib | 2 | 2 | trip_count | 2 |
| |0>>> |_fib | 4 | 3 | trip_count | 4 |
| |0>>> |_fib | 8 | 4 | trip_count | 8 |
| |0>>> |_fib | 16 | 5 | trip_count | 16 |
| |0>>> |_fib | 32 | 6 | trip_count | 32 |
| |0>>> |_fib | 64 | 7 | trip_count | 64 |
| |0>>> |_fib | 128 | 8 | trip_count | 128 |
| |0>>> |_fib | 256 | 9 | trip_count | 256 |
| |0>>> |_fib | 512 | 10 | trip_count | 512 |
| |0>>> |_fib | 1024 | 11 | trip_count | 1024 |
| |0>>> |_fib | 2026 | 12 | trip_count | 2026 |
| |0>>> |_fib | 3632 | 13 | trip_count | 3632 |
| |0>>> |_fib | 5020 | 14 | trip_count | 5020 |
| |0>>> |_fib | 4760 | 15 | trip_count | 4760 |
| |0>>> |_fib | 2942 | 16 | trip_count | 2942 |
| |0>>> |_fib | 1152 | 17 | trip_count | 1152 |
| |0>>> |_fib | 274 | 18 | trip_count | 274 |
| |0>>> |_fib | 36 | 19 | trip_count | 36 |
| |0>>> |_fib | 2 | 20 | trip_count | 2 |
| |0>>> |_inefficient | 1 | 1 | trip_count | 1 |
|-------------------------------------------------------------------------------------------|
```
@@ -125,7 +125,7 @@ custom_push_region(const char* name)
### User API Output
```console
$ omnitrace -l --min-address-range=0 --min-address-range-loop=0 --min-instructions=8 -E custom_push_region -o -- ./user-api
$ omnitrace -l --min-instructions=8 -E custom_push_region -o -- ./user-api
...
$ export OMNITRACE_USE_TIMEMORY=ON
$ export OMNITRACE_USE_PID=OFF
@@ -150,63 +150,83 @@ Pushing custom region :: initialization
[./user-api.inst] Iterations: 100
[./user-api.inst] fibonacci(20)...
Pushing custom region :: thread_creation
Pushing custom region :: run(20) x 100
Pushing custom region :: thread_wait
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
Pushing custom region :: run(20) x 100
[./user-api.inst] fibonacci(20) x 4 = 3382500
[omnitrace][86267][0][omnitrace_finalize] finalizing...
[omnitrace][2637959][0] omnitrace : 2.716905 sec wall_clock, 1.216 mb peak_rss, 3.680000 sec cpu_clock, 135.4 % cpu_util [laps: 1]
[omnitrace][2637959][0] user-api.inst/thread-0 : 2.715708 sec wall_clock, 2.354223 sec thread_cpu_clock, 86.7 % thread_cpu_util, 1.216 mb peak_rss [laps: 1]
[omnitrace][2637959][0] user-api.inst/thread-1 : 0.329802 sec wall_clock, 0.329739 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.000 mb peak_rss [laps: 1]
[omnitrace][2637959][0] user-api.inst/thread-2 : 0.355981 sec wall_clock, 0.335795 sec thread_cpu_clock, 94.3 % thread_cpu_util, 0.528 mb peak_rss [laps: 1]
[omnitrace][2637959][0] user-api.inst/thread-3 : 0.341329 sec wall_clock, 0.331214 sec thread_cpu_clock, 97.0 % thread_cpu_util, 0.456 mb peak_rss [laps: 1]
[omnitrace][2637959][0] user-api.inst/thread-4 : 0.360631 sec wall_clock, 0.330374 sec thread_cpu_clock, 91.6 % thread_cpu_util, 0.600 mb peak_rss [laps: 1]
[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.json'...
[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.tree.json'...
[wall_clock]|0> Outputting 'omnitrace-example-output/wall_clock.txt'...
[omnitrace][86267][0] omnitrace : 5.190895 sec wall_clock, 2.748 mb peak_rss, 6.330000 sec cpu_clock, 121.9 % cpu_util [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-0 : 5.078713 sec wall_clock, 4.722415 sec thread_cpu_clock, 93.0 % thread_cpu_util, 1.276 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-1 : 0.322248 sec wall_clock, 0.322191 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.000 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-2 : 0.323255 sec wall_clock, 0.323194 sec thread_cpu_clock, 100.0 % thread_cpu_util, 0.000 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-3 : 0.323569 sec wall_clock, 0.323484 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.092 mb peak_rss [laps: 1]
[omnitrace][86267][0] user-api.inst/thread-4 : 0.324178 sec wall_clock, 0.324057 sec thread_cpu_clock, 100.0 % thread_cpu_util, 1.184 mb peak_rss [laps: 1]
[omnitrace][86267][0] Post-processing 51 cpu frequency and memory usage entries...
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.json'...
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.tree.json'...
[omnitrace][wall_clock]|0> Outputting 'omnitrace-user-api.inst-output/wall_clock.txt'...
[metadata::manager::finalize]> Outputting 'omnitrace-example-output/metadata.json' and 'omnitrace-example-output/functions.json'...
[omnitrace][manager::finalize][metadata]> Outputting 'omnitrace-user-api.inst-output/metadata.json' and 'omnitrace-user-api.inst-output/functions.json'...
[omnitrace][86267][0][omnitrace_finalize] Finalized
$ cat omnitrace-example-output/wall_clock.txt
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| REAL-CLOCK TIMER (I.E. WALL-CLOCK TIMER) |
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| LABEL | COUNT | DEPTH | METRIC | UNITS | SUM | MEAN | MIN | MAX | VAR | STDDEV | % SELF |
|-----------------------------------------------------------------------|---------|--------|------------|--------|----------|----------|----------|----------|----------|----------|--------|
| |0>>> ./user-api.inst | 1 | 0 | wall_clock | sec | 2.715611 | 2.715611 | 2.715611 | 2.715611 | 0.000000 | 0.000000 | 0.0 |
| |0>>> |_initialization | 1 | 1 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_thread_creation | 1 | 1 | wall_clock | sec | 0.000170 | 0.000170 | 0.000170 | 0.000170 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_thread_wait | 1 | 1 | wall_clock | sec | 0.360751 | 0.360751 | 0.360751 | 0.360751 | 0.000000 | 0.000000 | 0.0 |
| |1>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.329472 | 0.329472 | 0.329472 | 0.329472 | 0.000000 | 0.000000 | 100.0 |
| |3>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.331028 | 0.331028 | 0.331028 | 0.331028 | 0.000000 | 0.000000 | 100.0 |
| |2>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.335554 | 0.335554 | 0.335554 | 0.335554 | 0.000000 | 0.000000 | 100.0 |
| |4>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 0.330220 | 0.330220 | 0.330220 | 0.330220 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_run | 1 | 1 | wall_clock | sec | 2.354618 | 2.354618 | 2.354618 | 2.354618 | 0.000000 | 0.000000 | 0.0 |
| |0>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 2.354600 | 2.354600 | 2.354600 | 2.354600 | 0.000000 | 0.000000 | 48.3 |
| |0>>> |_fib | 1094600 | 3 | wall_clock | sec | 1.217671 | 0.000001 | 0.000000 | 0.000055 | 0.000000 | 0.000002 | 41.3 |
| |0>>> |_fib | 418100 | 4 | wall_clock | sec | 0.714197 | 0.000002 | 0.000000 | 0.000050 | 0.000000 | 0.000002 | 38.1 |
| |0>>> |_fib | 258400 | 5 | wall_clock | sec | 0.441874 | 0.000002 | 0.000000 | 0.000047 | 0.000000 | 0.000002 | 37.9 |
| |0>>> |_fib | 159700 | 6 | wall_clock | sec | 0.274224 | 0.000002 | 0.000000 | 0.000044 | 0.000000 | 0.000002 | 37.9 |
| |0>>> |_fib | 98700 | 7 | wall_clock | sec | 0.170399 | 0.000002 | 0.000000 | 0.000042 | 0.000000 | 0.000002 | 37.7 |
| |0>>> |_fib | 61000 | 8 | wall_clock | sec | 0.106093 | 0.000002 | 0.000000 | 0.000039 | 0.000000 | 0.000002 | 37.5 |
| |0>>> |_fib | 37700 | 9 | wall_clock | sec | 0.066316 | 0.000002 | 0.000000 | 0.000036 | 0.000000 | 0.000002 | 40.2 |
| |0>>> |_fib | 23300 | 10 | wall_clock | sec | 0.039640 | 0.000002 | 0.000000 | 0.000033 | 0.000000 | 0.000002 | 38.2 |
| |0>>> |_fib | 14400 | 11 | wall_clock | sec | 0.024504 | 0.000002 | 0.000000 | 0.000030 | 0.000000 | 0.000002 | 37.9 |
| |0>>> |_fib | 8900 | 12 | wall_clock | sec | 0.015219 | 0.000002 | 0.000000 | 0.000027 | 0.000000 | 0.000002 | 38.1 |
| |0>>> |_fib | 5500 | 13 | wall_clock | sec | 0.009417 | 0.000002 | 0.000000 | 0.000024 | 0.000000 | 0.000002 | 38.3 |
| |0>>> |_fib | 3400 | 14 | wall_clock | sec | 0.005806 | 0.000002 | 0.000000 | 0.000021 | 0.000000 | 0.000002 | 38.4 |
| |0>>> |_fib | 2100 | 15 | wall_clock | sec | 0.003576 | 0.000002 | 0.000000 | 0.000019 | 0.000000 | 0.000002 | 38.4 |
| |0>>> |_fib | 1300 | 16 | wall_clock | sec | 0.002201 | 0.000002 | 0.000000 | 0.000016 | 0.000000 | 0.000002 | 40.3 |
| |0>>> |_fib | 800 | 17 | wall_clock | sec | 0.001315 | 0.000002 | 0.000000 | 0.000014 | 0.000000 | 0.000002 | 42.1 |
| |0>>> |_fib | 500 | 18 | wall_clock | sec | 0.000762 | 0.000002 | 0.000000 | 0.000010 | 0.000000 | 0.000001 | 42.1 |
| |0>>> |_fib | 300 | 19 | wall_clock | sec | 0.000441 | 0.000001 | 0.000000 | 0.000008 | 0.000000 | 0.000001 | 47.8 |
| |0>>> |_fib | 200 | 20 | wall_clock | sec | 0.000230 | 0.000001 | 0.000000 | 0.000006 | 0.000000 | 0.000001 | 49.0 |
| |0>>> |_fib | 100 | 21 | wall_clock | sec | 0.000117 | 0.000001 | 0.000001 | 0.000003 | 0.000000 | 0.000000 | 84.5 |
| |0>>> |_fib | 100 | 22 | wall_clock | sec | 0.000018 | 0.000000 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> std::vector<std::thread, std::allocator<std::thread> >::~vector | 1 | 0 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| REAL-CLOCK TIMER (I.E. WALL-CLOCK TIMER) |
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| LABEL | COUNT | DEPTH | METRIC | UNITS | SUM | MEAN | MIN | MAX | VAR | STDDEV | % SELF |
|---------------------------------------------------------------------------------|--------|--------|------------|--------|----------|----------|----------|----------|----------|----------|--------|
| |0>>> ./user-api.inst | 1 | 0 | wall_clock | sec | 5.078521 | 5.078521 | 5.078521 | 5.078521 | 0.000000 | 0.000000 | 0.0 |
| |0>>> |_initialization | 1 | 1 | wall_clock | sec | 0.000004 | 0.000004 | 0.000004 | 0.000004 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_thread_creation | 1 | 1 | wall_clock | sec | 0.000159 | 0.000159 | 0.000159 | 0.000159 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_thread_wait | 1 | 1 | wall_clock | sec | 0.355307 | 0.355307 | 0.355307 | 0.355307 | 0.000000 | 0.000000 | 0.0 |
| |0>>> |_std::vector<std::thread, std::allocator<std::thread> >::begin | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::vector<std::thread, std::allocator<std::thread> >::end | 1 | 2 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_pthread_join | 4 | 2 | wall_clock | sec | 0.355257 | 0.088814 | 0.000001 | 0.333144 | 0.026559 | 0.162970 | 100.0 |
| |2>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000032 | 0.000032 | 0.000032 | 0.000032 | 0.000000 | 0.000000 | 100.0 |
| |1>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000036 | 0.000036 | 0.000036 | 0.000036 | 0.000000 | 0.000000 | 100.0 |
| |3>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000034 | 0.000034 | 0.000034 | 0.000034 | 0.000000 | 0.000000 | 100.0 |
| |4>>> |_start_thread | 1 | 3 | wall_clock | sec | 0.000039 | 0.000039 | 0.000039 | 0.000039 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_run | 1 | 1 | wall_clock | sec | 4.722993 | 4.722993 | 4.722993 | 4.722993 | 0.000000 | 0.000000 | 0.0 |
| |0>>> |_std::char_traits<char>::length | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::distance<char const*> | 1 | 2 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::operator+<char, std::char_traits<char>, std::allocator<char> > | 2 | 2 | wall_clock | sec | 0.000002 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_run(20) x 100 | 1 | 2 | wall_clock | sec | 4.722951 | 4.722951 | 4.722951 | 4.722951 | 0.000000 | 0.000000 | 0.0 |
| |0>>> |_run [{94,25}-{96,25}] | 1 | 3 | wall_clock | sec | 4.722925 | 4.722925 | 4.722925 | 4.722925 | 0.000000 | 0.000000 | 0.0 |
| |0>>> |_fib | 100 | 4 | wall_clock | sec | 4.722718 | 0.047227 | 0.046713 | 0.051987 | 0.000000 | 0.000625 | 0.0 |
| |0>>> |_fib | 200 | 5 | wall_clock | sec | 4.722302 | 0.023612 | 0.017827 | 0.034091 | 0.000032 | 0.005627 | 0.0 |
| |0>>> |_fib | 400 | 6 | wall_clock | sec | 4.721485 | 0.011804 | 0.006790 | 0.023003 | 0.000016 | 0.004024 | 0.0 |
| |0>>> |_fib | 800 | 7 | wall_clock | sec | 4.719858 | 0.005900 | 0.002564 | 0.016078 | 0.000006 | 0.002498 | 0.1 |
| |0>>> |_fib | 1600 | 8 | wall_clock | sec | 4.716572 | 0.002948 | 0.000977 | 0.011849 | 0.000002 | 0.001465 | 0.1 |
| |0>>> |_fib | 3200 | 9 | wall_clock | sec | 4.709918 | 0.001472 | 0.000371 | 0.008246 | 0.000001 | 0.000831 | 0.3 |
| |0>>> |_fib | 6400 | 10 | wall_clock | sec | 4.696775 | 0.000734 | 0.000140 | 0.005111 | 0.000000 | 0.000461 | 0.6 |
| |0>>> |_fib | 12800 | 11 | wall_clock | sec | 4.670093 | 0.000365 | 0.000050 | 0.003166 | 0.000000 | 0.000253 | 1.1 |
| |0>>> |_fib | 25600 | 12 | wall_clock | sec | 4.617496 | 0.000180 | 0.000017 | 0.001959 | 0.000000 | 0.000137 | 2.3 |
| |0>>> |_fib | 51200 | 13 | wall_clock | sec | 4.512671 | 0.000088 | 0.000004 | 0.001212 | 0.000000 | 0.000074 | 4.6 |
| |0>>> |_fib | 102400 | 14 | wall_clock | sec | 4.304142 | 0.000042 | 0.000000 | 0.000752 | 0.000000 | 0.000039 | 9.6 |
| |0>>> |_fib | 202600 | 15 | wall_clock | sec | 3.892580 | 0.000019 | 0.000000 | 0.000469 | 0.000000 | 0.000021 | 19.0 |
| |0>>> |_fib | 363200 | 16 | wall_clock | sec | 3.151143 | 0.000009 | 0.000000 | 0.000293 | 0.000000 | 0.000011 | 33.2 |
| |0>>> |_fib | 502000 | 17 | wall_clock | sec | 2.105217 | 0.000004 | 0.000000 | 0.000183 | 0.000000 | 0.000006 | 49.1 |
| |0>>> |_fib | 476000 | 18 | wall_clock | sec | 1.071652 | 0.000002 | 0.000000 | 0.000114 | 0.000000 | 0.000004 | 63.6 |
| |0>>> |_fib | 294200 | 19 | wall_clock | sec | 0.390193 | 0.000001 | 0.000000 | 0.000071 | 0.000000 | 0.000003 | 75.3 |
| |0>>> |_fib | 115200 | 20 | wall_clock | sec | 0.096190 | 0.000001 | 0.000000 | 0.000043 | 0.000000 | 0.000002 | 84.4 |
| |0>>> |_fib | 27400 | 21 | wall_clock | sec | 0.015020 | 0.000001 | 0.000000 | 0.000025 | 0.000000 | 0.000001 | 91.1 |
| |0>>> |_fib | 3600 | 22 | wall_clock | sec | 0.001336 | 0.000000 | 0.000000 | 0.000013 | 0.000000 | 0.000001 | 96.3 |
| |0>>> |_fib | 200 | 23 | wall_clock | sec | 0.000050 | 0.000000 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::char_traits<char>::length | 1 | 3 | wall_clock | sec | 0.000001 | 0.000001 | 0.000001 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::distance<char const*> | 1 | 3 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::operator+<char, std::char_traits<char>, std::allocator<char> > | 2 | 3 | wall_clock | sec | 0.000001 | 0.000001 | 0.000000 | 0.000001 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::operator& | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
| |0>>> std::vector<std::thread, std::allocator<std::thread> >::~vector | 1 | 0 | wall_clock | sec | 0.000045 | 0.000045 | 0.000045 | 0.000045 | 0.000000 | 0.000000 | 32.7 |
| |0>>> |_std::thread::~thread | 4 | 1 | wall_clock | sec | 0.000030 | 0.000007 | 0.000007 | 0.000009 | 0.000000 | 0.000001 | 31.2 |
| |0>>> |_std::thread::joinable | 4 | 2 | wall_clock | sec | 0.000021 | 0.000005 | 0.000005 | 0.000006 | 0.000000 | 0.000001 | 89.4 |
| |0>>> |_std::thread::id::id | 4 | 3 | wall_clock | sec | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::operator== | 4 | 3 | wall_clock | sec | 0.000001 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::allocator_traits<std::allocator<std::thread> >::deallocate | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
| |0>>> |_std::allocator<std::thread>::~allocator | 1 | 1 | wall_clock | sec | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 100.0 |
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
```
@@ -696,8 +696,10 @@ omnitrace_init_tooling_hidden()
if(get_state() != State::Finalized)
{
if(get_use_sampling()) sampling::shutdown();
if(thread_data<omnitrace_thread_bundle_t>::instance())
thread_data<omnitrace_thread_bundle_t>::instance()->stop();
auto& _thr_bundle = thread_data<omnitrace_thread_bundle_t>::instance();
if(_thr_bundle && _thr_bundle->get<comp::wall_clock>() &&
_thr_bundle->get<comp::wall_clock>()->get_is_running())
_thr_bundle->stop();
}
} };
(void) _thread_setup;
@@ -744,6 +746,7 @@ omnitrace_init_tooling_hidden()
_data.allocator.destroy(_v);
_data.allocator.deallocate(_v, 1);
_data.bundles.erase(_data.bundles.begin() + (i - 1));
break;
}
}
};
@@ -1122,7 +1125,6 @@ omnitrace_finalize_hidden(void)
if(itr && itr->get<comp::wall_clock>() &&
!itr->get<comp::wall_clock>()->get_is_running())
{
continue;
std::string _msg = JOIN("", *itr);
auto _pos = _msg.find(">>> ");
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
@@ -109,6 +109,8 @@ pthread_create_gotcha::wrapper::wrapper(routine_t _routine, void* _arg,
void*
pthread_create_gotcha::wrapper::operator()() const
{
using thread_bundle_data_t = thread_data<omnitrace_thread_bundle_t>;
if(is_shutdown && *is_shutdown)
{
if(m_promise) m_promise->set_value();
@@ -134,6 +136,10 @@ pthread_create_gotcha::wrapper::operator()() const
if(_tid >= 0)
{
get_execution_time(_tid)->second = comp::wall_clock::record();
auto& _thr_bundle = thread_bundle_data_t::instance();
if(_thr_bundle && _thr_bundle->get<comp::wall_clock>() &&
_thr_bundle->get<comp::wall_clock>()->get_is_running())
_thr_bundle->stop();
pthread_create_gotcha::shutdown(_tid);
}
set_thread_state(ThreadState::Completed);
@@ -146,6 +152,13 @@ pthread_create_gotcha::wrapper::operator()() const
{
_tid = threading::get_id();
threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str());
if(!thread_bundle_data_t::instances().at(_tid))
{
thread_data<omnitrace_thread_bundle_t>::construct(
TIMEMORY_JOIN("", get_exe_name(), "/thread-", threading::get_id()),
quirk::config<quirk::auto_start>{});
thread_bundle_data_t::instances().at(_tid)->start();
}
if(bundles && bundles_mutex)
{
std::unique_lock<std::mutex> _lk{ *bundles_mutex };
@@ -886,12 +886,15 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
omnitrace_add_python_test(
NAME python-source-check
COMMAND ${OMNITRACE_CAT_COMMAND}
COMMAND
${_PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/validate-timemory-json.py
-l run fib fib fib fib fib inefficient _sum -c 5 5 10 20 30 10 5 5 -d 0 1
2 3 4 5 1 2 -m trip_count -i
PYTHON_VERSION ${_VERSION}
FILE omnitrace-tests-output/python-source/${_VERSION}/trip_count.txt
PASS_REGEX
"(\\\| \\\|0>>> run \\\| 5).*(\\\| \\\|0>>> \\\|_fib \\\| 40).*(\\\| \\\|0>>> \\\|_fib \\\| 5).*(\\\| \\\|0>>> \\\|_inefficient \\\| 5).*(\\\| \\\|0>>> \\\|__sum \\\| 5)"
FILE omnitrace-tests-output/python-source/${_VERSION}/trip_count.json
DEPENDS python-source-${_VERSION}
PASS_REGEX
"omnitrace-tests-output/python-source/${_VERSION}/trip_count.json validated"
ENVIRONMENT "${_python_environment}")
else()
omnitrace_message(
@@ -0,0 +1,69 @@
#!/usr/bin/env python3
import sys
import json
import argparse
def validate_json(data, labels, counts, depths):
expected = []
for litr, citr, ditr in zip(labels, counts, depths):
entry = []
_label = litr
if ditr > 0:
_label = "{}|_{}".format(" " * (ditr - 1), litr)
entry = [_label, citr, ditr]
expected.append(entry)
for ditr, eitr in zip(data, expected):
_prefix = ditr["prefix"]
_depth = ditr["depth"]
_count = ditr["entry"]["laps"]
_idx = _prefix.find(">>>")
if _idx is not None:
_prefix = _prefix[(_idx + 4) :]
if _prefix != eitr[0]:
raise RuntimeError(f"Mismatched prefix: {_prefix} vs. {eitr[0]}")
if _count != eitr[1]:
raise RuntimeError(f"Mismatched depth: {_depth} vs. {eitr[2]}")
if _depth != eitr[2]:
raise RuntimeError(f"Mismatched depth: {_depth} vs. {eitr[2]}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--metric", type=str, help="JSON metric", required=True)
parser.add_argument(
"-l", "--labels", nargs="+", type=str, help="Expected labels", default=[]
)
parser.add_argument(
"-c", "--counts", nargs="+", type=int, help="Expected counts", default=[]
)
parser.add_argument(
"-d", "--depths", nargs="+", type=int, help="Expected depths", default=[]
)
parser.add_argument("-i", "--input", type=str, help="Input file", required=True)
args = parser.parse_args()
if len(args.labels) != len(args.counts) or len(args.labels) != len(args.depths):
raise RuntimeError(
"The same number of labels, counts, and depths must be specified"
)
ret = 0
with open(args.input) as f:
data = json.load(f)
try:
validate_json(
data["timemory"][args.metric]["ranks"][0]["graph"],
args.labels,
args.counts,
args.depths,
)
except RuntimeError as e:
print(f"{e}")
ret = 1
if ret == 0:
print(f"{args.input} validated")
sys.exit(ret)