b5bdba12e4
* Fix roctracer_flush_activity
- invoke roctracer_flush_activity() before disabling domains
* create comp::roctracer::flush()
- real issue was the global state when roctracer_flush_activity() was called
* formatting
* Update lib/omnitrace/library/components/roctracer.hpp
- provide definition of comp::roctracer::flush when OMNITRACE_USE_ROCTRACER is not defined
* omnitrace.cfg -> perfetto.cfg
- rename provided perfetto config file (omnitrace.cfg) to perfetto.cfg to avoid confusion
* Update lib/core
- gpu.hpp: defines for OMNITRACE_USE_{HIP,ROCTRACER,ROCPROFILER,ROCM_SMI}
- gpu.cpp
- include core/hip_runtime.hpp
- fix serialization of hipDeviceProp_t
- add hip_runtime.hpp
- ensure proper inclusion of hip_runtime.h
- add rccl.hpp
- ensure proper inclusion of rccl.h
* Update lib/omnitrace/library
- rcclp.cpp
- update includes for rccl
- roctracer.hpp
- update includes for hip_runtime
- components/comm_data.hpp
- update includes for rccl
- components/rcclp.hpp
- update includes for rccl
* Update bin/omnitrace-avail/avail.cpp
- update includes for hip_runtime
* Update examples/rccl/CMakeLists.txt
- fix find_package for rccl when CI enabled
* Update CMakeLists.txt
- set cmake policy CMP0135 to NEW for cmake >= 3.24
- Enable DOWNLOAD_EXTRACT_TIMESTAMP with ExternalProject_Add + URL download method
* Update timemory submodule
* Update pybind11 submodule
* Update pybind11 submodule
* Update lib/core/rccl.hpp
- include rccl.h only if OMNITRACE_USE_RCCL > 0
* Update lib/core/{gpu,hip_runtime}.hpp
* Update lib/core/gpu.cpp
- reintroduce some ppdefs
* Update lib/core/gpu.cpp
- fix ifdef on OMNITRACE_HIP_VERSION
* Update lib/core/gpu.cpp
- fix static assert for OMNITRACE_HIP_VERSION_MINOR when HIP version 4.x or older (unreliable minor versions)
* Update lib/core/gpu.cpp
- fix ifdef on OMNITRACE_HIP_VERSION
* Update lib/core/config.cpp
- disable OMNITRACE_PERFETTO_COMBINE_TRACES by default
* Update lib/core/perfetto.cpp
- if unable to open perfetto temp file, return the ReadTraceBlocking()
* Update lib/core/config.*
- flush tmpfile before closing
[ROCm/rocprofiler-systems commit: 7bc50f5a0a]
98 línte
3.2 KiB
CMake
98 línte
3.2 KiB
CMake
# -------------------------------------------------------------------------------------- #
|
|
#
|
|
# ROCm tests
|
|
#
|
|
# -------------------------------------------------------------------------------------- #
|
|
|
|
set(OMNITRACE_ROCM_EVENTS_TEST
|
|
"GRBM_COUNT,GPUBusy,SQ_WAVES,SQ_INSTS_VALU,VALUInsts,TCC_HIT_sum,TA_TA_BUSY[0]:device=0,TA_TA_BUSY[11]:device=0"
|
|
)
|
|
|
|
omnitrace_add_test(
|
|
NAME transpose
|
|
TARGET transpose
|
|
MPI ${TRANSPOSE_USE_MPI}
|
|
GPU ON
|
|
NUM_PROCS ${NUM_PROCS}
|
|
REWRITE_ARGS -e -v 2 --print-instructions -E uniform_int_distribution
|
|
RUNTIME_ARGS
|
|
-e
|
|
-v
|
|
1
|
|
--label
|
|
file
|
|
line
|
|
return
|
|
args
|
|
-E
|
|
uniform_int_distribution
|
|
ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=ON")
|
|
|
|
omnitrace_add_test(
|
|
SKIP_REWRITE SKIP_RUNTIME
|
|
NAME transpose-two-kernels
|
|
TARGET transpose
|
|
MPI OFF
|
|
GPU ON
|
|
NUM_PROCS 1
|
|
RUN_ARGS 1 2 2
|
|
ENVIRONMENT
|
|
"${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_ROCTRACER_HSA_ACTIVITY=OFF;OMNITRACE_ROCTRACER_HSA_API=OFF"
|
|
)
|
|
|
|
omnitrace_add_test(
|
|
SKIP_BASELINE SKIP_RUNTIME
|
|
NAME transpose-loops
|
|
TARGET transpose
|
|
LABELS "loops"
|
|
MPI ${TRANSPOSE_USE_MPI}
|
|
GPU ON
|
|
NUM_PROCS ${NUM_PROCS}
|
|
REWRITE_ARGS
|
|
-e
|
|
-v
|
|
2
|
|
--label
|
|
return
|
|
args
|
|
-l
|
|
-i
|
|
8
|
|
-E
|
|
uniform_int_distribution
|
|
RUN_ARGS 2 100 50
|
|
ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF"
|
|
REWRITE_FAIL_REGEX "0 instrumented loops in procedure transpose")
|
|
|
|
if(OMNITRACE_USE_ROCPROFILER)
|
|
omnitrace_add_test(
|
|
SKIP_BASELINE SKIP_RUNTIME
|
|
NAME transpose-rocprofiler
|
|
TARGET transpose
|
|
LABELS "rocprofiler"
|
|
MPI ${TRANSPOSE_USE_MPI}
|
|
GPU ON
|
|
NUM_PROCS ${NUM_PROCS}
|
|
REWRITE_ARGS -e -v 2 -E uniform_int_distribution
|
|
ENVIRONMENT
|
|
"${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}"
|
|
REWRITE_RUN_PASS_REGEX
|
|
"rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt"
|
|
)
|
|
|
|
omnitrace_add_test(
|
|
SKIP_BASELINE SKIP_RUNTIME
|
|
NAME transpose-rocprofiler-no-roctracer
|
|
TARGET transpose
|
|
LABELS "rocprofiler"
|
|
MPI ${TRANSPOSE_USE_MPI}
|
|
GPU ON
|
|
NUM_PROCS ${NUM_PROCS}
|
|
REWRITE_ARGS -e -v 2 -E uniform_int_distribution
|
|
ENVIRONMENT
|
|
"${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_ROCTRACER=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}"
|
|
REWRITE_RUN_PASS_REGEX
|
|
"rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt"
|
|
REWRITE_RUN_FAIL_REGEX "roctracer.txt|OMNITRACE_ABORT_FAIL_REGEX")
|
|
endif()
|