Reorganization and critical trace support (#17)

* Roctracer wall clock integration (#16)

* Integrates roctracer values into wall-clock

* Fixed scoping + timemory roctracer

* Fixed data race in roctracer

* Synchronized HIP API on main thread

- Cache hip activity callbacks and execute on main thread
- Minor updates to transpose

* Debugging + MPI + transpose updates

* PTL + HSA and timemory + kernel timing

- PTL usage fixed HSA + timemory issues bc we could control the thread destruction
- Fixed laps counting in roctracer callbacks

* Ignore select HIP API types

- The ignored API types are ignored because there appears to be a bug
  which causes the "end" callback to be labeled as begin
- hipDeviceEnablePeerAccess
- hipImportExternalMemory
- hipDestroyExternalMemory

* Tweaks to PTL config

* Timemory update + pid-prefix w/ mpi headers

- %pid%- prefix with mpi headers
- timemory submodule update

* CMake + critical trace + reorganize library source

- clang-tidy tweaks
- cmake function updates to use hosttrace_ prefix
- update gitignore
- cmake HOSTTRACE_MAX_THREADS option
- Formatting.cmake
- cleaned up MacroUtilities.cmake
- PTL submodule + usage
- tweak to Findroctracer.cmake
- MT transpose
- Updated PTL submodule
- Updated timemory submodule
- fix to hosttrace return value type if type not found
- reorganized library source code
- support for critical trace

* Remove bits/stdint-uintn.h headers

* Rename + config + depth + critical path

- rename hosttrace_timemory_data to instrumentation_bundles
- rename hosttrace_bundle_t to main_bundle_t
- rename bundle_t to instrumentation_bundle_t
- rework of configuration setup
- critical_trace write directly to file option
- tweaked depth calculation
- updated timemory submodule
- improved parallel support in roctracer callbacks
- working critical_trace
- perfetto device-critical-trace and host-critical-trace categories
- made transpose example parallel
- made parallel-overhead example a bit uneven
- relocated LTO activation

* Fixed duplicates in perfetto critical-trace

* reworked critical trace support

- substantial perf improvement (30-45 min -> 30 sec)
- changes to configuration (new and removed options)

* Removed "%pid%-" output prefix in mpi_gotcha

* Update timemory submodule

[ROCm/rocprofiler-systems commit: 752424efc2]
Dieser Commit ist enthalten in:
Jonathan R. Madsen
2021-11-23 02:53:14 -06:00
committet von GitHub
Ursprung cdd2707058
Commit efb6d766af
52 geänderte Dateien mit 7785 neuen und 2394 gelöschten Zeilen
@@ -18,6 +18,7 @@ modernize-*,\
-modernize-use-using,\
-modernize-use-auto,\
-modernize-concat-nested-namespaces,\
-modernize-use-nodiscard,\
performance-*,\
readability-*,\
-readability-function-size,\
@@ -1,6 +1,6 @@
parse:
additional_commands:
checkout_git_submodule:
hosttrace_checkout_git_submodule:
flags:
- RECURSIVE
kwargs:
@@ -33,3 +33,6 @@
/build*
/.vscode
/.cache
/.clangd
/compile_commands.json
@@ -10,3 +10,6 @@
[submodule "external/dyninst"]
path = external/dyninst
url = https://github.com/jrmadsen/dyninst.git
[submodule "external/PTL"]
path = external/PTL
url = https://github.com/jrmadsen/PTL.git
+108 -23
Datei anzeigen
@@ -50,49 +50,124 @@ include(BuildSettings) # compiler flags
set(CMAKE_CXX_STANDARD
17
CACHE STRING "CXX language standard")
add_option(CMAKE_CXX_STANDARD_REQUIRED "Require C++ language standard" ON)
add_option(CMAKE_CXX_EXTENSIONS "Compiler specific language extensions" OFF)
add_option(CMAKE_INSTALL_RPATH_USE_LINK_PATH "Enable rpath to linked libraries" ON)
add_option(HOSTTRACE_USE_CLANG_TIDY "Enable clang-tidy" OFF)
add_option(HOSTTRACE_USE_MPI "Enable MPI support" OFF)
add_option(HOSTTRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF)
add_option(HOSTTRACE_USE_ROCTRACER "Enable roctracer support" ON)
add_option(HOSTTRACE_BUILD_DYNINST "Build dyninst from submodule" OFF)
add_option(HOSTTRACE_USE_MPI_HEADERS
"Enable wrapping MPI functions w/o enabling MPI dependency" OFF)
hosttrace_add_feature(CMAKE_CXX_STANDARD "CXX language standard")
hosttrace_add_option(CMAKE_CXX_STANDARD_REQUIRED "Require C++ language standard" ON)
hosttrace_add_option(CMAKE_CXX_EXTENSIONS "Compiler specific language extensions" OFF)
hosttrace_add_option(CMAKE_INSTALL_RPATH_USE_LINK_PATH "Enable rpath to linked libraries"
ON)
hosttrace_add_option(HOSTTRACE_USE_CLANG_TIDY "Enable clang-tidy" OFF)
hosttrace_add_option(HOSTTRACE_USE_MPI "Enable MPI support" OFF)
hosttrace_add_option(HOSTTRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF)
hosttrace_add_option(HOSTTRACE_USE_ROCTRACER "Enable roctracer support" ON)
hosttrace_add_option(HOSTTRACE_BUILD_DYNINST "Build dyninst from submodule" OFF)
hosttrace_add_option(HOSTTRACE_USE_MPI_HEADERS
"Enable wrapping MPI functions w/o enabling MPI dependency" OFF)
include(ProcessorCount)
processorcount(HOSTTRACE_PROCESSOR_COUNT)
math(EXPR HOSTTRACE_THREAD_COUNT "8 * ${HOSTTRACE_PROCESSOR_COUNT}")
set(HOSTTRACE_MAX_THREADS
"${HOSTTRACE_THREAD_COUNT}"
CACHE
STRING
"Maximum number of threads in the host application. Likely only needs to be increased if host app does not use thread-pool but creates many threads"
)
hosttrace_add_feature(
HOSTTRACE_MAX_THREADS
"Maximum number of total threads supported in the host application (default: 8 * nproc)"
)
# ensure synced
set(TIMEMORY_USE_MPI
${HOSTTRACE_USE_MPI}
CACHE BOOL "Enable MPI support" FORCE)
# default visibility settings
set(CMAKE_C_VISIBILITY_PRESET "default")
set(CMAKE_CXX_VISIBILITY_PRESET "default")
set(CMAKE_VISIBILITY_INLINES_HIDDEN OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
include(Formatting) # format target
include(Packages) # finds third-party libraries
if(HOSTTRACE_USE_ROCTRACER)
find_package(HIP QUIET)
if(HIP_VERSION_MAJOR GREATER_EQUAL 4 AND HIP_VERSION_MINOR GREATER 3)
set(roctracer_kfdwrapper_LIBRARY)
endif()
else()
set(HIP_VERSION "0.0.0")
set(HIP_VERSION_MAJOR 0)
set(HIP_VERSION_MINOR 0)
set(HIP_VERSION_PATCH 0)
endif()
configure_file(${PROJECT_SOURCE_DIR}/include/library/defines.hpp.in
${PROJECT_BINARY_DIR}/include/library/defines.hpp @ONLY)
hosttrace_activate_clang_tidy()
# custom visibility settings
set(CMAKE_C_VISIBILITY_PRESET "hidden")
set(CMAKE_CXX_VISIBILITY_PRESET "hidden")
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
if(HOSTTRACE_BUILD_LTO)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
endif()
# ------------------------------------------------------------------------------#
#
# hosttrace-library target
#
# ------------------------------------------------------------------------------#
add_library(
hosttrace-library SHARED
${CMAKE_CURRENT_LIST_DIR}/src/library.cpp ${CMAKE_CURRENT_LIST_DIR}/src/libmisc.cpp
${CMAKE_CURRENT_LIST_DIR}/include/library.hpp ${perfetto_DIR}/sdk/perfetto.cc)
set(library_sources
${CMAKE_CURRENT_LIST_DIR}/src/library.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/config.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/critical_trace.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/fork_gotcha.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/hosttrace_component.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/mpi_gotcha.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/perfetto.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/ptl.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/thread_data.cpp
${CMAKE_CURRENT_LIST_DIR}/src/library/timemory.cpp
${perfetto_DIR}/sdk/perfetto.cc)
set(library_headers
${CMAKE_CURRENT_LIST_DIR}/include/library.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/api.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/config.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/common.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/critical_trace.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/debug.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/fork_gotcha.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/hosttrace_component.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/mpi_gotcha.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/perfetto.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/ptl.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/state.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/thread_data.hpp
${CMAKE_CURRENT_LIST_DIR}/include/library/timemory.hpp
${perfetto_DIR}/sdk/perfetto.h)
if(NOT TIMEMORY_USE_PERFETTO)
endif()
add_library(hosttrace-library SHARED ${library_sources} ${library_headers})
if(HOSTTRACE_USE_ROCTRACER)
target_sources(
hosttrace-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include/roctracer.hpp
${CMAKE_CURRENT_LIST_DIR}/src/roctracer.cpp)
hosttrace-library
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include/library/roctracer.hpp
${CMAKE_CURRENT_LIST_DIR}/src/library/roctracer.cpp
${CMAKE_CURRENT_LIST_DIR}/include/library/roctracer_callbacks.hpp
${CMAKE_CURRENT_LIST_DIR}/src/library/roctracer_callbacks.cpp)
endif()
target_include_directories(hosttrace-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
target_include_directories(hosttrace-library SYSTEM PRIVATE ${perfetto_DIR}/sdk)
target_compile_definitions(
@@ -101,10 +176,12 @@ target_compile_definitions(
target_link_libraries(
hosttrace-library
PRIVATE hosttrace::hosttrace-threading
PRIVATE hosttrace::hosttrace-headers
hosttrace::hosttrace-threading
hosttrace::hosttrace-compile-options
hosttrace::hosttrace-roctracer
hosttrace::hosttrace-mpi
hosttrace::hosttrace-ptl
$<BUILD_INTERFACE:timemory::timemory-headers>
$<BUILD_INTERFACE:timemory::timemory-gotcha>
$<BUILD_INTERFACE:timemory::timemory-cxx-shared>
@@ -134,13 +211,13 @@ add_executable(
hosttrace-exe
${_EXCLUDE} ${CMAKE_CURRENT_LIST_DIR}/src/hosttrace.cpp
${CMAKE_CURRENT_LIST_DIR}/include/hosttrace.hpp
${CMAKE_CURRENT_LIST_DIR}/src/hosttrace-details.cpp)
target_include_directories(hosttrace-exe PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
${CMAKE_CURRENT_LIST_DIR}/src/hosttrace/details.cpp)
target_link_libraries(
hosttrace-exe
PRIVATE hosttrace::hosttrace-dyninst hosttrace::hosttrace-compile-options
PRIVATE hosttrace::hosttrace-headers
hosttrace::hosttrace-dyninst
hosttrace::hosttrace-compile-options
$<BUILD_INTERFACE:timemory::timemory-headers>
$<IF:$<BOOL:${HOSTTRACE_USE_SANITIZER}>,hosttrace::hosttrace-sanitizer,>)
@@ -211,3 +288,11 @@ add_subdirectory(tests)
# ------------------------------------------------------------------------------#
include(ConfigCPack)
# ------------------------------------------------------------------------------#
#
# config info
#
# ------------------------------------------------------------------------------#
hosttrace_print_features()
@@ -68,7 +68,7 @@ endif()
# ----------------------------------------------------------------------------------------#
# extra flags for debug information in debug or optimized binaries
#
add_interface_library(
hosttrace_add_interface_library(
hosttrace-compile-debuginfo
"Attempts to set best flags for more expressive profiling information in debug or optimized binaries"
)
@@ -108,7 +108,7 @@ endif()
# ----------------------------------------------------------------------------------------#
# non-debug optimizations
#
add_interface_library(hosttrace-compile-extra "Extra optimization flags")
hosttrace_add_interface_library(hosttrace-compile-extra "Extra optimization flags")
if(NOT HOSTTRACE_USE_COVERAGE)
add_target_flag_if_avail(
hosttrace-compile-extra "-finline-functions" "-funroll-loops" "-ftree-vectorize"
@@ -130,21 +130,16 @@ endif()
#
add_cxx_flag_if_avail("-faligned-new")
if(HOSTTRACE_BUILD_LTO)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
endif()
hosttrace_save_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
set(CMAKE_CXX_FLAGS "-flto=thin ${CMAKE_CXX_FLAGS}")
add_interface_library(hosttrace-lto "Adds link-time-optimization flags")
hosttrace_add_interface_library(hosttrace-lto "Adds link-time-optimization flags")
add_target_flag_if_avail(hosttrace-lto "-flto=thin")
if(NOT cxx_hosttrace_lto_flto_thin)
set(CMAKE_CXX_FLAGS "-flto ${CMAKE_CXX_FLAGS}")
add_target_flag_if_avail(hosttrace-lto "-flto")
if(NOT cxx_hosttrace_lto_flto)
add_disabled_interface(hosttrace-lto)
set(hosttrace_BUILD_LTO OFF)
set(HOSTTRACE_BUILD_LTO OFF)
else()
target_link_options(hosttrace-lto INTERFACE -flto)
endif()
@@ -161,8 +156,9 @@ hosttrace_restore_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
# ----------------------------------------------------------------------------------------#
# print compilation timing reports (Clang compiler)
#
add_interface_library(hosttrace-compile-timing
"Adds compiler flags which report compilation timing metrics")
hosttrace_add_interface_library(
hosttrace-compile-timing
"Adds compiler flags which report compilation timing metrics")
if(CMAKE_CXX_COMPILER_IS_CLANG)
add_target_flag_if_avail(hosttrace-compile-timing "-ftime-trace")
if(NOT cxx_hosttrace_compile_timing_ftime_trace)
@@ -176,15 +172,10 @@ if(HOSTTRACE_USE_COMPILE_TIMING)
target_link_libraries(hosttrace-compile-options INTERFACE hosttrace-compile-timing)
endif()
if(NOT cxx_hosttrace_compile_timing_ftime_report
AND NOT cxx_hosttrace_compile_timing_ftime_trace)
add_disabled_interface(hosttrace-compile-timing)
endif()
# ----------------------------------------------------------------------------------------#
# developer build flags
#
add_interface_library(hosttrace-develop-options "Adds developer compiler flags")
hosttrace_add_interface_library(hosttrace-develop-options "Adds developer compiler flags")
if(HOSTTRACE_BUILD_DEVELOPER)
add_target_flag_if_avail(
hosttrace-develop-options
@@ -195,21 +186,15 @@ endif()
# ----------------------------------------------------------------------------------------#
# visibility build flags
#
add_interface_library(hosttrace-default-visibility
"Adds -fvisibility=default compiler flag")
add_interface_library(hosttrace-hidden-visibility
"Adds -fvisibility=hidden compiler flag")
hosttrace_add_interface_library(hosttrace-default-visibility
"Adds -fvisibility=default compiler flag")
hosttrace_add_interface_library(hosttrace-hidden-visibility
"Adds -fvisibility=hidden compiler flag")
add_target_flag_if_avail(hosttrace-default-visibility "-fvisibility=default")
add_target_flag_if_avail(hosttrace-hidden-visibility "-fvisibility=hidden"
"-fvisibility-inlines-hidden")
foreach(_TYPE default hidden)
if(NOT cxx_hosttrace_${_TYPE}_visibility_fvisibility_${_TYPE})
add_disabled_interface(hosttrace-${_TYPE}-visibility)
endif()
endforeach()
# ----------------------------------------------------------------------------------------#
# developer build flags
#
@@ -235,9 +220,9 @@ set(HOSTTRACE_SANITIZER_TYPES
alignment)
set_property(CACHE HOSTTRACE_SANITIZER_TYPE PROPERTY STRINGS
"${HOSTTRACE_SANITIZER_TYPES}")
add_interface_library(hosttrace-sanitizer-compile-options
"Adds compiler flags for sanitizers")
add_interface_library(
hosttrace_add_interface_library(hosttrace-sanitizer-compile-options
"Adds compiler flags for sanitizers")
hosttrace_add_interface_library(
hosttrace-sanitizer
"Adds compiler flags to enable ${HOSTTRACE_SANITIZER_TYPE} sanitizer (-fsanitizer=${HOSTTRACE_SANITIZER_TYPE})"
)
@@ -248,8 +233,9 @@ add_target_flag(hosttrace-sanitizer-compile-options ${COMMON_SANITIZER_FLAGS})
foreach(_TYPE ${HOSTTRACE_SANITIZER_TYPES})
set(_FLAG "-fsanitize=${_TYPE}")
add_interface_library(hosttrace-${_TYPE}-sanitizer
"Adds compiler flags to enable ${_TYPE} sanitizer (${_FLAG})")
hosttrace_add_interface_library(
hosttrace-${_TYPE}-sanitizer
"Adds compiler flags to enable ${_TYPE} sanitizer (${_FLAG})")
add_target_flag(hosttrace-${_TYPE}-sanitizer ${_FLAG})
target_link_libraries(hosttrace-${_TYPE}-sanitizer
INTERFACE hosttrace-sanitizer-compile-options)
@@ -273,7 +259,6 @@ if(HOSTTRACE_USE_SANITIZER)
endforeach()
else()
set(HOSTTRACE_USE_SANITIZER OFF)
inform_empty_interface(hosttrace-sanitizer "${HOSTTRACE_SANITIZER_TYPE} sanitizer")
endif()
if(MSVC)
@@ -31,8 +31,8 @@ if("${LIBNAME}" STREQUAL "")
string(TOLOWER "${PROJECT_NAME}" LIBNAME)
endif()
add_interface_library(${LIBNAME}-compile-options
"Adds the standard set of compiler flags used by timemory")
hosttrace_add_interface_library(
${LIBNAME}-compile-options "Adds the standard set of compiler flags used by timemory")
# ----------------------------------------------------------------------------------------#
# macro converting string to list
@@ -0,0 +1,60 @@
include_guard(DIRECTORY)
# ----------------------------------------------------------------------------------------#
#
# Clang Tidy
#
# ----------------------------------------------------------------------------------------#
# clang-tidy
macro(HOSTTRACE_ACTIVATE_CLANG_TIDY)
if(HOSTTRACE_USE_CLANG_TIDY)
find_program(CLANG_TIDY_COMMAND NAMES clang-tidy)
hosttrace_add_feature(CLANG_TIDY_COMMAND "Path to clang-tidy command")
if(NOT CLANG_TIDY_COMMAND)
timemory_message(
WARNING "HOSTTRACE_USE_CLANG_TIDY is ON but clang-tidy is not found!")
set(HOSTTRACE_USE_CLANG_TIDY OFF)
else()
set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_COMMAND})
# Create a preprocessor definition that depends on .clang-tidy content so the
# compile command will change when .clang-tidy changes. This ensures that a
# subsequent build re-runs clang-tidy on all sources even if they do not
# otherwise need to be recompiled. Nothing actually uses this definition. We
# add it to targets on which we run clang-tidy just to get the build
# dependency on the .clang-tidy file.
file(SHA1 ${CMAKE_CURRENT_LIST_DIR}/.clang-tidy clang_tidy_sha1)
set(CLANG_TIDY_DEFINITIONS "CLANG_TIDY_SHA1=${clang_tidy_sha1}")
unset(clang_tidy_sha1)
endif()
endif()
endmacro()
# ------------------------------------------------------------------------------#
#
# clang-format target
#
# ------------------------------------------------------------------------------#
find_program(HOSTTRACE_CLANG_FORMAT_EXE NAMES clang-format-11 clang-format-mp-11
clang-format)
if(HOSTTRACE_CLANG_FORMAT_EXE)
file(GLOB_RECURSE sources ${PROJECT_SOURCE_DIR}/src/*.cpp)
file(GLOB_RECURSE headers ${PROJECT_SOURCE_DIR}/include/*.hpp)
file(GLOB_RECURSE examples ${PROJECT_SOURCE_DIR}/examples/*.cpp
${PROJECT_SOURCE_DIR}/examples/*.hpp)
add_custom_target(
format-hosttrace
${HOSTTRACE_CLANG_FORMAT_EXE} -i ${sources} ${headers} ${examples}
COMMENT "Running C++ formatter ${HOSTTRACE_CLANG_FORMAT_EXE}...")
if(NOT TARGET format)
add_custom_target(format)
endif()
add_dependencies(format format-hosttrace)
else()
message(
AUTHOR_WARNING
"clang-format could not be found. format build target not available.")
endif()
Datei-Diff unterdrückt, da er zu groß ist Diff laden
Datei-Diff unterdrückt, da er zu groß ist Diff laden
@@ -100,10 +100,6 @@ if(roctracer_FOUND)
if(roctracer_kfdwrapper_LIBRARY)
list(APPEND roctracer_LIBRARIES ${roctracer_kfdwrapper_LIBRARY})
target_compile_definitions(
roctracer::roctracer
INTERFACE
HOSTTRACE_ROCTRACER_LIBKFDWRAPPER=\"${roctracer_kfdwrapper_LIBRARY}\")
target_link_libraries(roctracer::roctracer
INTERFACE ${roctracer_kfdwrapper_LIBRARY})
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_kfdwrapper_LIBRARY})
@@ -7,14 +7,19 @@ include_guard(DIRECTORY)
#
# ########################################################################################
add_interface_library(hosttrace-headers
"Provides minimal set of include flags to compile with hosttrace")
add_interface_library(hosttrace-threading "Enables multithreading support")
add_interface_library(
hosttrace_add_interface_library(
hosttrace-headers "Provides minimal set of include flags to compile with hosttrace")
hosttrace_add_interface_library(hosttrace-threading "Enables multithreading support")
hosttrace_add_interface_library(
hosttrace-dyninst
"Provides flags and libraries for Dyninst (dynamic instrumentation)")
add_interface_library(hosttrace-roctracer "Provides flags and libraries for roctracer")
add_interface_library(hosttrace-mpi "Provides MPI or MPI headers")
hosttrace_add_interface_library(hosttrace-roctracer
"Provides flags and libraries for roctracer")
hosttrace_add_interface_library(hosttrace-mpi "Provides MPI or MPI headers")
hosttrace_add_interface_library(hosttrace-ptl "Enables PTL support (tasking)")
target_include_directories(hosttrace-headers INTERFACE ${PROJECT_SOURCE_DIR}/include
${PROJECT_BINARY_DIR}/include)
# include threading because of rooflines
target_link_libraries(hosttrace-headers INTERFACE hosttrace-threading)
@@ -80,7 +85,7 @@ endif()
# ----------------------------------------------------------------------------------------#
if(HOSTTRACE_BUILD_DYNINST)
checkout_git_submodule(
hosttrace_checkout_git_submodule(
RELATIVE_PATH external/dyninst
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/jrmadsen/dyninst.git
@@ -145,7 +150,7 @@ else()
hosttrace-dyninst INTERFACE DYNINST_API_RT="${HOSTTRACE_DYNINST_API_RT}")
endif()
add_rpath(${Dyninst_LIBRARIES})
hosttrace_add_rpath(${Dyninst_LIBRARIES})
target_link_libraries(hosttrace-dyninst INTERFACE Dyninst::Dyninst)
else() # updated Dyninst CMake system was not found
set(_BOOST_COMPONENTS atomic system thread date_time)
@@ -204,7 +209,7 @@ else()
endif()
endif()
add_rpath(${DYNINST_LIBRARIES} ${Boost_LIBRARIES})
hosttrace_add_rpath(${DYNINST_LIBRARIES} ${Boost_LIBRARIES})
target_link_libraries(hosttrace-dyninst INTERFACE ${DYNINST_LIBRARIES}
${Boost_LIBRARIES})
foreach(
@@ -242,7 +247,7 @@ endif()
# ----------------------------------------------------------------------------------------#
set(perfetto_DIR ${PROJECT_SOURCE_DIR}/external/perfetto)
checkout_git_submodule(
hosttrace_checkout_git_submodule(
RELATIVE_PATH external/perfetto
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://android.googlesource.com/platform/external/perfetto
@@ -256,7 +261,7 @@ checkout_git_submodule(
# ----------------------------------------------------------------------------------------#
if(HOSTTRACE_BUILD_DEVICETRACE)
checkout_git_submodule(
hosttrace_checkout_git_submodule(
RELATIVE_PATH external/elfio
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/jrmadsen/ELFIO.git
@@ -267,63 +272,10 @@ endif()
# ----------------------------------------------------------------------------------------#
#
# Clang Tidy
# timemory submodule
#
# ----------------------------------------------------------------------------------------#
# clang-tidy
macro(HOSTTRACE_ACTIVATE_CLANG_TIDY)
if(HOSTTRACE_USE_CLANG_TIDY)
find_program(CLANG_TIDY_COMMAND NAMES clang-tidy)
add_feature(CLANG_TIDY_COMMAND "Path to clang-tidy command")
if(NOT CLANG_TIDY_COMMAND)
timemory_message(
WARNING "HOSTTRACE_USE_CLANG_TIDY is ON but clang-tidy is not found!")
set(HOSTTRACE_USE_CLANG_TIDY OFF)
else()
set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_COMMAND})
# Create a preprocessor definition that depends on .clang-tidy content so the
# compile command will change when .clang-tidy changes. This ensures that a
# subsequent build re-runs clang-tidy on all sources even if they do not
# otherwise need to be recompiled. Nothing actually uses this definition. We
# add it to targets on which we run clang-tidy just to get the build
# dependency on the .clang-tidy file.
file(SHA1 ${CMAKE_CURRENT_LIST_DIR}/.clang-tidy clang_tidy_sha1)
set(CLANG_TIDY_DEFINITIONS "CLANG_TIDY_SHA1=${clang_tidy_sha1}")
unset(clang_tidy_sha1)
endif()
endif()
endmacro()
# ------------------------------------------------------------------------------#
#
# clang-format target
#
# ------------------------------------------------------------------------------#
find_program(HOSTTRACE_CLANG_FORMAT_EXE NAMES clang-format-11 clang-format-mp-11
clang-format)
if(HOSTTRACE_CLANG_FORMAT_EXE)
file(GLOB sources ${PROJECT_SOURCE_DIR}/src/*.cpp)
file(GLOB headers ${PROJECT_SOURCE_DIR}/include/*.hpp)
file(GLOB_RECURSE examples ${PROJECT_SOURCE_DIR}/examples/*.cpp
${PROJECT_SOURCE_DIR}/examples/*.hpp)
add_custom_target(
format
${HOSTTRACE_CLANG_FORMAT_EXE} -i ${sources} ${headers} ${examples}
COMMENT "Running ${HOSTTRACE_CLANG_FORMAT_EXE}...")
else()
message(
AUTHOR_WARNING
"clang-format could not be found. format build target not available.")
endif()
# ----------------------------------------------------------------------------------------#
# configure submodule
# ----------------------------------------------------------------------------------------#
set(TIMEMORY_INSTALL_HEADERS
OFF
CACHE BOOL "Disable timemory header install")
@@ -365,7 +317,7 @@ set(TIMEMORY_TLS_MODEL
"global-dynamic"
CACHE STRING "Thread-local static model" FORCE)
checkout_git_submodule(
hosttrace_checkout_git_submodule(
RELATIVE_PATH external/timemory
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/NERSC/timemory.git
@@ -384,3 +336,41 @@ add_subdirectory(external/timemory)
hosttrace_restore_variables(BUILD_CONFIG VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS
CMAKE_POSITION_INDEPENDENT_CODE)
# ----------------------------------------------------------------------------------------#
#
# PTL (Parallel Tasking Library) submodule
#
# ----------------------------------------------------------------------------------------#
# timemory might provide PTL::ptl-shared
if(NOT TARGET PTL::ptl-shared)
hosttrace_checkout_git_submodule(
RELATIVE_PATH external/PTL
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/jrmadsen/PTL.git
REPO_BRANCH master)
set(PTL_BUILD_EXAMPLES OFF)
set(PTL_USE_TBB OFF)
set(PTL_USE_GPU OFF)
set(PTL_DEVELOPER_INSTALL OFF)
hosttrace_save_variables(
BUILD_CONFIG
VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS CMAKE_POSITION_INDEPENDENT_CODE
CMAKE_CXX_VISIBILITY_PRESET CMAKE_VISIBILITY_INLINES_HIDDEN)
set(BUILD_SHARED_LIBS ON)
set(BUILD_STATIC_LIBS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
add_subdirectory(external/PTL)
hosttrace_restore_variables(
BUILD_CONFIG
VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS CMAKE_POSITION_INDEPENDENT_CODE
CMAKE_CXX_VISIBILITY_PRESET CMAKE_VISIBILITY_INLINES_HIDDEN)
endif()
target_link_libraries(hosttrace-ptl INTERFACE PTL::ptl-shared)
@@ -38,7 +38,10 @@ main(int argc, char** argv)
std::vector<std::thread> threads{};
for(size_t i = 0; i < nthread; ++i)
threads.emplace_back(&run, nitr, nfib);
{
size_t _nitr = ((i % 2) == 1) ? (nitr - (0.1 * nitr)) : (nitr + (0.1 * nitr));
threads.emplace_back(&run, _nitr, nfib);
}
for(auto& itr : threads)
itr.join();
@@ -29,6 +29,7 @@ THE SOFTWARE.
#include <fstream>
#include <iomanip>
#include <iostream>
#include <thread>
#include <vector>
#define HIP_API_CALL(CALL) \
@@ -88,8 +89,8 @@ run(int rank, int argc, char** argv)
{
(void) argc;
(void) argv;
unsigned int M = 4960;
unsigned int N = 4960;
unsigned int M = 4960 * 2;
unsigned int N = 4960 * 2;
std::cout << "[" << rank << "] M: " << M << " N: " << N << std::endl;
size_t size = sizeof(int) * M * N;
@@ -102,29 +103,30 @@ run(int rank, int argc, char** argv)
HIP_API_CALL(hipMalloc(&in, size));
HIP_API_CALL(hipMalloc(&out, size));
check_hip_error();
HIP_API_CALL(hipMemset(in, 0, size));
HIP_API_CALL(hipMemset(out, 0, size));
HIP_API_CALL(hipMemcpy(in, matrix, size, hipMemcpyHostToDevice));
HIP_API_CALL(hipDeviceSynchronize());
check_hip_error();
hipDeviceProp_t props;
HIP_API_CALL(hipGetDeviceProperties(&props, 0));
dim3 grid(M / 32, N / 32, 1);
dim3 block(32, 32, 1); // transpose_a
// warmup
hipLaunchKernelGGL(transpose_a, grid, block, 0, 0, in, out, M, N);
check_hip_error();
t1 = std::chrono::high_resolution_clock::now();
const unsigned times = 10000;
for(size_t i = 0; i < times; i++)
{
hipLaunchKernelGGL(transpose_a, grid, block, 0, 0, in, out, M, N);
}
check_hip_error();
auto _func = [&](hipStream_t stream) {
for(size_t i = 0; i < times / 2; i++)
{
transpose_a<<<grid, block, 0, stream>>>(in, out, M, N);
check_hip_error();
}
HIP_API_CALL(hipStreamSynchronize(stream));
};
hipStream_t _stream{};
HIP_API_CALL(hipStreamCreate(&_stream));
std::thread _t{ _func, _stream };
_t.join();
_func(0);
HIP_API_CALL(hipDeviceSynchronize());
t2 = std::chrono::high_resolution_clock::now();
double time =
@@ -136,14 +138,12 @@ run(int rank, int argc, char** argv)
int* out_matrix = (int*) malloc(size);
HIP_API_CALL(hipMemcpy(out_matrix, out, size, hipMemcpyDeviceToHost));
check_hip_error();
// cpu_transpose(matrix, out_matrix, M, N);
verify(matrix, out_matrix, M, N);
HIP_API_CALL(hipFree(in));
HIP_API_CALL(hipFree(out));
check_hip_error();
free(matrix);
free(out_matrix);
@@ -171,12 +171,32 @@ do_a2a(int rank)
int
main(int argc, char** argv)
{
int rank = 0;
int rank = 0;
int nthreads = 2;
if(argc > 1) nthreads = atoi(argv[1]);
#if defined(USE_MPI)
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#endif
if(rank == 0) run(rank, argc, argv);
// this is a temporary workaround in hosttrace when HIP + MPI is enabled
int ndevice = 0;
int devid = rank;
HIP_API_CALL(hipGetDeviceCount(&ndevice));
if(ndevice > 0)
{
devid = rank % ndevice;
HIP_API_CALL(hipSetDevice(devid));
}
if(rank == devid && rank < ndevice)
{
std::vector<std::thread> _threads{};
for(int i = 1; i < nthreads; ++i)
_threads.emplace_back(run, rank, argc, argv);
run(rank, argc, argv);
for(auto& itr : _threads)
itr.join();
}
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
do_a2a(rank);
Submodul projects/rocprofiler-systems/external/PTL hinzugefügt bei dd1b67829c
Submodul projects/rocprofiler-systems/external/timemory aktualisiert: 11183bbdd7...c040fe7022
@@ -1,27 +1,30 @@
// MIT License
//
// Copyright (c) 2020, The Regents of the University of California,
// through Lawrence Berkeley National Laboratory (subject to receipt of any
// required approvals from the U.S. Dept. of Energy). All rights reserved.
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
@@ -274,7 +277,7 @@ struct function_signature
bool m_info_end = false;
location_t m_row = { 0, 0 };
location_t m_col = { 0, 0 };
string_t m_return = "void";
string_t m_return = {};
string_t m_name = {};
string_t m_params = "()";
string_t m_file = {};
@@ -318,7 +321,7 @@ struct function_signature
string_t get() const
{
std::stringstream ss;
if(use_return_info) ss << m_return << " ";
if(use_return_info && !m_return.empty()) ss << m_return << " ";
ss << m_name;
if(use_args_info) ss << m_params;
if(m_loop && m_info_beg)
@@ -1,275 +1,112 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#if !defined(TIMEMORY_USE_PERFETTO)
# include <perfetto.h>
# define PERFETTO_CATEGORIES \
perfetto::Category("host").SetDescription("Host-side function tracing"), \
perfetto::Category("device").SetDescription("Device-side function tracing")
#else
# define PERFETTO_CATEGORIES \
perfetto::Category("host").SetDescription("Host-side function tracing"), \
perfetto::Category("device").SetDescription("Device-side function tracing")
perfetto::Category("timemory")
.SetDescription("Events from the timemory API")
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
#endif
// this always needs to included first
// clang-format off
#include "library/perfetto.hpp"
// clang-format on
#include "library/timemory.hpp"
#include "library/roctracer.hpp"
#include "library/api.hpp"
#include "library/fork_gotcha.hpp"
#include "library/mpi_gotcha.hpp"
#include "library/api.hpp"
#include "library/common.hpp"
#include "library/state.hpp"
#include "library/config.hpp"
#include "library/thread_data.hpp"
#include "library/ptl.hpp"
#include "library/debug.hpp"
#include "library/critical_trace.hpp"
#include "timemory/macros/language.hpp"
#include "timemory/utility/utility.hpp"
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <memory>
#include <mutex>
#include <string>
#include <sys/types.h>
#include <thread>
#include <unistd.h>
#include <utility>
#include <vector>
#include "timemory/api.hpp"
#include "timemory/backends/mpi.hpp"
#include "timemory/backends/process.hpp"
#include "timemory/backends/threading.hpp"
#include "timemory/components.hpp"
#include "timemory/components/gotcha/mpip.hpp"
#include "timemory/components/papi/papi_tuple.hpp"
#include "timemory/config.hpp"
#include "timemory/environment.hpp"
#include "timemory/manager.hpp"
#include "timemory/mpl/apply.hpp"
#include "timemory/operations.hpp"
#include "timemory/runtime.hpp"
#include "timemory/settings.hpp"
#include "timemory/storage.hpp"
#include "timemory/variadic.hpp"
#include "roctracer.hpp"
// forward decl of the API
extern "C"
template <critical_trace::Device DevID, critical_trace::Phase PhaseID,
bool UpdateStack = true>
inline void
add_critical_trace(int64_t _tid, size_t _cpu_cid, size_t _gpu_cid, size_t _parent_cid,
int64_t _ts_beg, int64_t _ts_val, size_t _hash, uint16_t _depth,
uint16_t _prio = 0)
{
void hosttrace_push_trace(const char* name) TIMEMORY_VISIBILITY("default");
void hosttrace_pop_trace(const char* name) TIMEMORY_VISIBILITY("default");
void hosttrace_trace_init(const char*, bool, const char*)
TIMEMORY_VISIBILITY("default");
void hosttrace_trace_finalize(void) TIMEMORY_VISIBILITY("default");
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
TIMEMORY_VISIBILITY("default");
void hosttrace_trace_set_mpi(bool use, bool attached) TIMEMORY_VISIBILITY("default");
}
if(!get_use_critical_trace()) return;
//--------------------------------------------------------------------------------------//
// clang-format off
// these are used to create unique type mutexes
struct critical_insert {};
struct cpu_cid_stack {};
// clang-format on
// same sort of functionality as python's " ".join([...])
#if !defined(JOIN)
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
#endif
using tim::type_mutex;
using auto_lock_t = tim::auto_lock_t;
static constexpr auto num_mutexes = max_supported_threads;
static auto _update_freq = critical_trace::get_update_frequency();
#define HOSTTRACE_DEBUG(...) \
if(get_debug()) \
{ \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
//--------------------------------------------------------------------------------------//
namespace audit = tim::audit;
namespace comp = tim::component;
namespace quirk = tim::quirk;
namespace threading = tim::threading;
namespace scope = tim::scope;
namespace dmp = tim::dmp;
namespace process = tim::process;
namespace units = tim::units;
namespace trait = tim::trait;
// this is used to wrap fork()
struct fork_gotcha : comp::base<fork_gotcha, void>
{
using gotcha_data_t = comp::gotcha_data;
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
// this will get called right before fork
void audit(const gotcha_data_t& _data, audit::incoming);
// this will get called right after fork with the return value
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
};
// this is used to wrap MPI_Init and MPI_Init_thread
struct mpi_gotcha : comp::base<mpi_gotcha, void>
{
using gotcha_data_t = comp::gotcha_data;
TIMEMORY_DEFAULT_OBJECT(mpi_gotcha)
// this will get called right before MPI_Init with that functions arguments
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***);
// this will get called right before MPI_Init_thread with that functions arguments
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*);
// this will get called right after MPI_Init and MPI_Init_thread with the return value
void audit(const gotcha_data_t& _data, audit::outgoing, int _retval);
// this will get called right before MPI_Finalize
void audit(const gotcha_data_t& _data, audit::incoming);
};
// timemory api struct
struct hosttrace : tim::concepts::api
{};
// timemory component which calls hosttrace functions
// (used in gotcha wrappers)
struct hosttrace_component : tim::component::base<hosttrace_component, void>
{
void start();
void stop();
void set_prefix(const char*);
private:
const char* m_prefix = nullptr;
};
using papi_tot_ins = comp::papi_tuple<PAPI_TOT_INS>;
using fork_gotcha_t = comp::gotcha<4, tim::component_tuple<fork_gotcha>, hosttrace>;
using mpi_gotcha_t = comp::gotcha<4, tim::component_tuple<mpi_gotcha>, hosttrace>;
using hosttrace_bundle_t =
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
comp::cpu_util, comp::roctracer, papi_tot_ins,
comp::user_global_bundle, fork_gotcha_t, mpi_gotcha_t>;
using hosttrace_thread_bundle_t =
tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
comp::thread_cpu_util, papi_tot_ins>;
using bundle_t =
tim::component_bundle<hosttrace, comp::wall_clock*, comp::user_global_bundle*>;
using bundle_allocator_t = tim::data::ring_buffer_allocator<bundle_t>;
//--------------------------------------------------------------------------------------//
#if !defined(TIMEMORY_USE_PERFETTO)
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
#endif
#if defined(CUSTOM_DATA_SOURCE)
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
{
public:
void OnSetup(const SetupArgs&) override
if constexpr(PhaseID != critical_trace::Phase::NONE)
{
// Use this callback to apply any custom configuration to your data source
// based on the TraceConfig in SetupArgs.
PRINT_HERE("%s", "setup");
// unique lock per thread
auto& _mtx = type_mutex<critical_insert, hosttrace, num_mutexes>(_tid);
auto_lock_t _lk{ _mtx };
auto& _critical_trace = critical_trace::get(_tid);
_critical_trace->emplace_back(
critical_trace::entry{ _prio, DevID, PhaseID, _depth, _tid, _cpu_cid,
_gpu_cid, _parent_cid, _ts_beg, _ts_val, _hash });
}
void OnStart(const StartArgs&) override
if constexpr(UpdateStack)
{
// This notification can be used to initialize the GPU driver, enable
// counters, etc. StartArgs will contains the DataSourceDescriptor,
// which can be extended.
PRINT_HERE("%s", "start");
// unique lock per thread
auto& _mtx = type_mutex<cpu_cid_stack, hosttrace, num_mutexes>(_tid);
if constexpr(PhaseID == critical_trace::Phase::NONE)
{
auto_lock_t _lk{ _mtx };
get_cpu_cid_stack(_tid)->emplace_back(_cpu_cid);
}
else if constexpr(PhaseID == critical_trace::Phase::BEGIN)
{
auto_lock_t _lk{ _mtx };
get_cpu_cid_stack(_tid)->emplace_back(_cpu_cid);
}
else if constexpr(PhaseID == critical_trace::Phase::END)
{
auto_lock_t _lk{ _mtx };
get_cpu_cid_stack(_tid)->pop_back();
if(_gpu_cid == 0 && _cpu_cid % _update_freq == (_update_freq - 1))
critical_trace::update(_tid);
}
}
void OnStop(const StopArgs&) override
{
// Undo any initialization done in OnStart.
PRINT_HERE("%s", "stop");
}
// Data sources can also have per-instance state.
int my_custom_state = 0;
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif
//--------------------------------------------------------------------------------------//
// used for specifying the state of hosttrace
enum class State : unsigned short
{
DelayedInit = 0,
PreInit,
Active,
Finalized
};
bool
get_debug();
State&
get_state();
std::unique_ptr<hosttrace_bundle_t>&
get_main_bundle();
bool
get_use_perfetto();
bool
get_use_timemory();
//--------------------------------------------------------------------------------------//
template <typename Tp, size_t MaxThreads = 1024>
struct hosttrace_thread_data
{
static constexpr size_t max_supported_threads = MaxThreads;
using instance_array_t = std::array<std::unique_ptr<Tp>, max_supported_threads>;
template <typename... Args>
static void construct(Args&&...);
static std::unique_ptr<Tp>& instance();
static instance_array_t& instances();
};
template <typename Tp, size_t MaxThreads>
template <typename... Args>
void
hosttrace_thread_data<Tp, MaxThreads>::construct(Args&&... _args)
{
static thread_local bool _v = [&_args...]() {
instances().at(threading::get_id()) =
std::make_unique<Tp>(std::forward<Args>(_args)...);
return true;
}();
(void) _v;
tim::consume_parameters(_tid, _cpu_cid, _gpu_cid, _parent_cid, _ts_beg, _ts_val,
_hash, _depth, _prio);
}
template <typename Tp, size_t MaxThreads>
std::unique_ptr<Tp>&
hosttrace_thread_data<Tp, MaxThreads>::instance()
{
return instances().at(threading::get_id());
}
template <typename Tp, size_t MaxThreads>
typename hosttrace_thread_data<Tp, MaxThreads>::instance_array_t&
hosttrace_thread_data<Tp, MaxThreads>::instances()
{
static auto _v = instance_array_t{};
return _v;
}
//--------------------------------------------------------------------------------------//
// there are currently some strange things that happen with vector<bundle_t> so using
// vector<bundle_t*> and timemory's ring_buffer_allocator to create contiguous memory-page
// aligned instances of the bundle
struct hosttrace_timemory_data
{
static constexpr size_t max_supported_threads = 1024;
using instance_array_t = std::array<hosttrace_timemory_data, max_supported_threads>;
bundle_allocator_t allocator{};
std::vector<bundle_t*> bundles{};
static instance_array_t& instances();
};
//--------------------------------------------------------------------------------------//
@@ -0,0 +1,44 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include <timemory/compat/macros.h>
// forward decl of the API
extern "C"
{
void hosttrace_push_trace(const char* name) TIMEMORY_VISIBILITY("default");
void hosttrace_pop_trace(const char* name) TIMEMORY_VISIBILITY("default");
void hosttrace_trace_init(const char*, bool, const char*)
TIMEMORY_VISIBILITY("default");
void hosttrace_trace_finalize(void) TIMEMORY_VISIBILITY("default");
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
TIMEMORY_VISIBILITY("default");
void hosttrace_trace_set_mpi(bool use, bool attached) TIMEMORY_VISIBILITY("default");
}
@@ -0,0 +1,50 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include <timemory/api.hpp>
#include <timemory/backends/dmp.hpp>
#include <timemory/backends/process.hpp>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <memory>
#include <mutex>
#include <string>
#include <sys/types.h>
#include <thread>
#include <unistd.h>
#include <utility>
#include <vector>
// timemory api struct
struct hosttrace : tim::concepts::api
{};
@@ -0,0 +1,163 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/api.hpp"
#include "library/common.hpp"
#include "library/fork_gotcha.hpp"
#include "library/mpi_gotcha.hpp"
#include "library/roctracer.hpp"
#include "library/state.hpp"
#include "library/timemory.hpp"
#include <timemory/backends/threading.hpp>
#include <string_view>
// bundle of components around hosttrace_init and hosttrace_finalize
using main_bundle_t =
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
comp::cpu_util, comp::roctracer, papi_tot_ins,
comp::user_global_bundle, fork_gotcha_t, mpi_gotcha_t>;
// bundle of components used in instrumentation
using instrumentation_bundle_t =
tim::component_bundle<hosttrace, comp::wall_clock*, comp::user_global_bundle*>;
// allocator for instrumentation_bundle_t
using bundle_allocator_t = tim::data::ring_buffer_allocator<instrumentation_bundle_t>;
// bundle of components around each thread
using hosttrace_thread_bundle_t =
tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
comp::thread_cpu_util,
#if defined(TIMEMORY_RUSAGE_THREAD) && TIMEMORY_RUSAGE_THREAD > 0
comp::peak_rss,
#endif
papi_tot_ins>;
//
// Initialization routines
//
void
configure_settings();
void
print_config_settings(std::ostream& _os,
std::function<bool(const std::string_view&)>&& _filter);
std::string&
get_exe_name();
//
// User-configurable settings
//
std::string
get_config_file();
bool
get_debug();
bool
get_use_perfetto();
bool
get_use_timemory();
bool&
get_use_pid();
bool
get_use_mpip();
bool
get_use_critical_trace();
bool
get_roctracer_timeline_profile();
bool
get_roctracer_flat_profile();
bool
get_trace_hsa_api();
bool
get_trace_hsa_activity();
bool
get_critical_trace_debug();
bool
get_critical_trace_serialize_names();
size_t
get_perfetto_shmem_size_hint();
size_t
get_perfetto_buffer_size();
uint64_t
get_critical_trace_update_freq();
uint64_t
get_critical_trace_num_threads();
std::string
get_trace_hsa_api_types();
std::string&
get_backend();
std::string
get_perfetto_output_filename();
int64_t
get_critical_trace_count();
size_t&
get_sample_rate();
int64_t
get_critical_trace_per_row();
//
// Runtime configuration data
//
State&
get_state();
std::unique_ptr<main_bundle_t>&
get_main_bundle();
std::atomic<uint64_t>&
get_cpu_cid();
std::unique_ptr<std::vector<uint64_t>>&
get_cpu_cid_stack(int64_t _tid = threading::get_id());
@@ -0,0 +1,209 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/config.hpp"
#include "library/thread_data.hpp"
#include "timemory/tpls/cereal/cereal/cereal.hpp"
#include <cstdint>
#include <cstdlib>
#include <ostream>
#include <string>
#include <vector>
namespace critical_trace
{
enum class Device : short
{
NONE = 0,
CPU,
GPU,
};
enum class Phase : short
{
NONE = 0,
BEGIN,
END,
DELTA,
};
struct entry
{
entry() = default;
~entry() = default;
entry(const entry&) = default;
entry(entry&&) noexcept = default;
entry& operator=(const entry&) = default;
entry& operator=(entry&&) noexcept = default;
uint16_t priority = 0; // priority value (for sorting)
Device device = Device::CPU; // which device it executed on
Phase phase = Phase::NONE; // start / stop / unspecified
uint16_t depth = 0; // call-stack depth
int64_t tid = 0; // thread id it was registered on
uint64_t cpu_cid = 0; // CPU correlation id
uint64_t gpu_cid = 0; // GPU correlation id
uint64_t parent_cid = 0; // parent CPU correlation id
int64_t begin_ns = 0; // timestamp of start
int64_t end_ns = 0; // timestamp of end
size_t hash = 0; // hash for name
bool operator==(const entry& rhs) const;
bool operator!=(const entry& rhs) const { return !(*this == rhs); }
bool operator<(const entry& rhs) const;
bool operator>(const entry& rhs) const;
bool operator<=(const entry& rhs) const { return !(*this > rhs); }
bool operator>=(const entry& rhs) const { return !(*this < rhs); }
entry& operator+=(const entry& rhs);
size_t get_hash() const;
int64_t get_timestamp() const;
int64_t get_cost() const;
bool is_bounded(const entry& rhs) const;
int64_t get_overlap(const entry& rhs) const;
int64_t get_independent(const entry& rhs) const;
int64_t get_overlap(const entry& rhs, int64_t _tid) const;
int64_t get_independent(const entry& rhs, int64_t _tid) const;
bool is_bounded(const entry& rhs, int64_t _tid) const;
void write(std::ostream& _os) const;
static bool is_delta(const entry&, const std::string_view&);
friend std::ostream& operator<<(std::ostream& _os, const entry& _v)
{
_v.write(_os);
return _os;
}
template <typename Archive>
void serialize(Archive& ar, unsigned int);
};
template <typename Archive>
void
entry::serialize(Archive& ar, unsigned int)
{
namespace cereal = tim::cereal;
ar(cereal::make_nvp("priority", priority), cereal::make_nvp("device", device),
cereal::make_nvp("phase", phase), cereal::make_nvp("depth", depth),
cereal::make_nvp("tid", tid), cereal::make_nvp("cpu_cid", cpu_cid),
cereal::make_nvp("gpu_cid", gpu_cid), cereal::make_nvp("parent_cid", parent_cid),
cereal::make_nvp("begin_ns", begin_ns), cereal::make_nvp("end_ns", end_ns),
cereal::make_nvp("hash", hash));
if(get_critical_trace_serialize_names())
{
std::string _name{};
if(hash > 0) _name = tim::demangle(tim::get_hash_identifier(hash));
ar(cereal::make_nvp("name", _name));
}
}
struct call_chain : private std::vector<entry>
{
using base_type = std::vector<entry>;
using base_type::at;
using base_type::back;
using base_type::begin;
using base_type::cbegin;
using base_type::cend;
using base_type::clear;
using base_type::emplace_back;
using base_type::empty;
using base_type::end;
using base_type::erase;
using base_type::front;
using base_type::pop_back;
using base_type::push_back;
using base_type::rbegin;
using base_type::rend;
using base_type::reserve;
using base_type::size;
size_t get_hash() const;
int64_t get_cost(int64_t _tid = -1) const;
int64_t get_overlap(int64_t _tid = -1) const;
int64_t get_independent(int64_t _tid = -1) const;
static std::vector<call_chain>& get_top_chains();
bool operator==(const call_chain& rhs) const;
bool operator!=(const call_chain& rhs) const { return !(*this == rhs); }
friend std::ostream& operator<<(std::ostream& _os, const call_chain& _v)
{
size_t _n = 0;
for(const auto& itr : _v)
_os << " [" << _n++ << "] " << itr << "\n";
return _os;
}
template <typename Archive>
void serialize(Archive& ar, unsigned int)
{
namespace cereal = tim::cereal;
ar(cereal::make_nvp("call_chain", static_cast<base_type&>(*this)));
}
template <Device DevT>
void generate_perfetto(std::set<entry>& _used) const;
template <bool BoolV = true, typename FuncT>
bool query(FuncT&&) const;
};
using hash_ids = std::unordered_set<std::string>;
uint64_t
get_update_frequency();
std::unique_ptr<call_chain>&
get(int64_t _tid = threading::get_id());
size_t
add_hash_id(const std::string& _label);
void
add_hash_id(const hash_ids&);
void
update(int64_t _tid = threading::get_id());
void
compute(int64_t _tid = threading::get_id());
struct id
{};
} // namespace critical_trace
@@ -0,0 +1,80 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include <cstdio>
#include <timemory/api.hpp>
#include <timemory/backends/dmp.hpp>
#include <timemory/backends/process.hpp>
#include <timemory/utility/utility.hpp>
bool
get_debug();
bool
get_critical_trace_debug();
#if defined(TIMEMORY_USE_MPI)
# define HOSTTRACE_CONDITIONAL_PRINT(COND, ...) \
if(COND) \
{ \
fflush(stderr); \
tim::auto_lock_t _lk{ tim::type_mutex<decltype(std::cerr)>() }; \
fprintf(stderr, "[hosttrace][%i][%li] ", static_cast<int>(tim::dmp::rank()), \
tim::threading::get_id()); \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
#else
# define HOSTTRACE_CONDITIONAL_PRINT(COND, ...) \
if(COND) \
{ \
fflush(stderr); \
tim::auto_lock_t _lk{ tim::type_mutex<decltype(std::cerr)>() }; \
fprintf(stderr, "[hosttrace][%i][%li] ", \
static_cast<int>(tim::process::get_id()), tim::threading::get_id()); \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
#endif
#define HOSTTRACE_CONDITIONAL_BASIC_PRINT(COND, ...) \
if(COND) \
{ \
fflush(stderr); \
tim::auto_lock_t _lk{ tim::type_mutex<decltype(std::cerr)>() }; \
fprintf(stderr, "[hosttrace] "); \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
#define HOSTTRACE_DEBUG(...) HOSTTRACE_CONDITIONAL_PRINT(get_debug(), __VA_ARGS__)
#define HOSTTRACE_PRINT(...) HOSTTRACE_CONDITIONAL_PRINT(true, __VA_ARGS__)
#define HOSTTRACE_CT_DEBUG(...) \
HOSTTRACE_CONDITIONAL_PRINT(get_critical_trace_debug(), __VA_ARGS__)
@@ -0,0 +1,42 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
// clang-format off
#define HOSTTRACE_HIP_VERSION_STRING "@HIP_VERSION@"
#define HOSTTRACE_HIP_VERSION_MAJOR @HIP_VERSION_MAJOR@
#define HOSTTRACE_HIP_VERSION_MINOR @HIP_VERSION_MINOR@
#define HOSTTRACE_HIP_VERSION_PATCH @HIP_VERSION_PATCH@
// clang-format on
#if defined(HOSTTRACE_USE_ROCTRACER)
# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "@roctracer_kfdwrapper_LIBRARY@"
#else
# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "/opt/rocm/roctracer/lib/libkfdwrapper64.so"
#endif
@@ -0,0 +1,71 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/debug.hpp"
#include <dlfcn.h>
#include <string>
#include <timemory/environment.hpp>
struct dynamic_library
{
dynamic_library() = delete;
dynamic_library(const dynamic_library&) = delete;
dynamic_library(dynamic_library&&) noexcept = default;
dynamic_library& operator=(const dynamic_library&) = delete;
dynamic_library& operator=(dynamic_library&&) noexcept = default;
dynamic_library(const char* _env, const char* _fname,
int _flags = (RTLD_NOW | RTLD_GLOBAL), bool _store = false)
: envname{ _env }
, filename{ tim::get_env<std::string>(_env, _fname, _store) }
, flags{ _flags }
{
if(!filename.empty())
{
handle = dlopen(filename.c_str(), flags);
if(!handle)
{
HOSTTRACE_DEBUG("%s\n", dlerror());
}
dlerror(); // Clear any existing error
}
}
~dynamic_library()
{
if(handle) dlclose(handle);
}
std::string envname = {};
std::string filename = {};
int flags = 0;
void* handle = nullptr;
};
@@ -0,0 +1,48 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/common.hpp"
#include "library/timemory.hpp"
// this is used to wrap fork()
struct fork_gotcha : comp::base<fork_gotcha, void>
{
using gotcha_data_t = comp::gotcha_data;
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
// this will get called right before fork
void audit(const gotcha_data_t& _data, audit::incoming);
// this will get called right after fork with the return value
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
};
using fork_gotcha_t = comp::gotcha<4, tim::component_tuple<fork_gotcha>, hosttrace>;
@@ -0,0 +1,43 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/timemory.hpp"
// timemory component which calls hosttrace functions
// (used in gotcha wrappers)
struct hosttrace_component : comp::base<hosttrace_component, void>
{
void start();
void stop();
void set_prefix(const char*);
private:
const char* m_prefix = nullptr;
};
@@ -0,0 +1,54 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/common.hpp"
#include "library/timemory.hpp"
// this is used to wrap MPI_Init and MPI_Init_thread
struct mpi_gotcha : comp::base<mpi_gotcha, void>
{
using gotcha_data_t = comp::gotcha_data;
TIMEMORY_DEFAULT_OBJECT(mpi_gotcha)
// this will get called right before MPI_Init with that functions arguments
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***);
// this will get called right before MPI_Init_thread with that functions arguments
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*);
// this will get called right after MPI_Init and MPI_Init_thread with the return value
void audit(const gotcha_data_t& _data, audit::outgoing, int _retval);
// this will get called right before MPI_Finalize
void audit(const gotcha_data_t& _data, audit::incoming);
};
using mpi_gotcha_t = comp::gotcha<4, tim::component_tuple<mpi_gotcha>, hosttrace>;
@@ -0,0 +1,91 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#if defined(PERFETTO_CATEGORIES)
# error "PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files"
#endif
#if !defined(TIMEMORY_USE_PERFETTO)
# include <perfetto.h>
# define PERFETTO_CATEGORIES \
perfetto::Category("host").SetDescription("Host-side function tracing"), \
perfetto::Category("device").SetDescription("Device-side function tracing"), \
perfetto::Category("host-critical-trace") \
.SetDescription("Host-side critical traces"), \
perfetto::Category("device-critical-trace") \
.SetDescription("Device-side critical traces")
#else
# define PERFETTO_CATEGORIES \
perfetto::Category("host").SetDescription("Host-side function tracing"), \
perfetto::Category("device").SetDescription("Device-side function tracing"), \
perfetto::Category("host-critical-trace") \
.SetDescription("Host-side critical traces"), \
perfetto::Category("device-critical-trace") \
.SetDescription("Device-side critical traces"), \
perfetto::Category("timemory") \
.SetDescription("Events from the timemory API")
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
#endif
#if !defined(TIMEMORY_USE_PERFETTO)
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
#endif
#if defined(CUSTOM_DATA_SOURCE)
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
{
public:
void OnSetup(const SetupArgs&) override
{
// Use this callback to apply any custom configuration to your data source
// based on the TraceConfig in SetupArgs.
PRINT_HERE("%s", "setup");
}
void OnStart(const StartArgs&) override
{
// This notification can be used to initialize the GPU driver, enable
// counters, etc. StartArgs will contains the DataSourceDescriptor,
// which can be extended.
PRINT_HERE("%s", "start");
}
void OnStop(const StopArgs&) override
{
// Undo any initialization done in OnStart.
PRINT_HERE("%s", "stop");
}
// Data sources can also have per-instance state.
int my_custom_state = 0;
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif
@@ -0,0 +1,55 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "PTL/PTL.hh"
#include "timemory/macros/attributes.hpp"
#include <mutex>
namespace tasking
{
std::mutex&
get_roctracer_mutex();
PTL::ThreadPool&
get_roctracer_thread_pool();
PTL::TaskGroup<void>&
get_roctracer_task_group();
std::mutex&
get_critical_trace_mutex();
PTL::ThreadPool&
get_critical_trace_thread_pool();
PTL::TaskGroup<void>&
get_critical_trace_task_group();
} // namespace tasking
@@ -1,3 +1,30 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
@@ -0,0 +1,96 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/config.hpp"
#include "library/debug.hpp"
#include "library/dynamic_library.hpp"
#include "library/perfetto.hpp"
#include "library/ptl.hpp"
#include "library/roctracer.hpp"
#include <roctracer.h>
#include <roctracer_ext.h>
#include <roctracer_hcc.h>
#include <roctracer_hip.h>
#define AMD_INTERNAL_BUILD 1
#include <ext/hsa_rt_utils.hpp>
#include <roctracer_hsa.h>
#include <iostream>
#include <memory>
// Macro to check ROC-tracer calls status
#define ROCTRACER_CALL(call) \
do \
{ \
int err = call; \
if(err != 0) \
{ \
std::cerr << roctracer_error_string() << " in: " << #call << std::flush; \
} \
} while(0)
using hsa_timer_t = hsa_rt_utils::Timer;
using timestamp_t = hsa_timer_t::timestamp_t;
using roctracer_bundle_t = tim::component_bundle<hosttrace, comp::roctracer_data,
comp::wall_clock, quirk::explicit_pop>;
using roctracer_hsa_bundle_t = tim::component_bundle<hosttrace, comp::roctracer_data>;
using roctracer_functions_t = std::vector<std::pair<std::string, std::function<void()>>>;
std::unique_ptr<hsa_timer_t>&
get_hsa_timer();
// HSA API callback function
void
hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
void
hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg);
void
hip_exec_activity_callbacks(int64_t _tid);
// HIP API callback function
void
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
// Activity tracing callback
void
hip_activity_callback(const char* begin, const char* end, void*);
bool&
roctracer_is_setup();
roctracer_functions_t&
roctracer_setup_routines();
roctracer_functions_t&
roctracer_tear_down_routines();
@@ -0,0 +1,38 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
// used for specifying the state of hosttrace
enum class State : unsigned short
{
DelayedInit = 0,
PreInit,
Active,
Finalized
};
@@ -0,0 +1,126 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include "library/config.hpp"
#include <array>
#include <cstdint>
#include <cstdlib>
#include <memory>
#include <type_traits>
#if !defined(HOSTTRACE_MAX_THREADS)
# define HOSTTRACE_MAX_THREADS 1024
#endif
static constexpr size_t max_supported_threads = HOSTTRACE_MAX_THREADS;
template <typename Tp, typename Tag = void, size_t MaxThreads = max_supported_threads>
struct hosttrace_thread_data
{
using instance_array_t = std::array<std::unique_ptr<Tp>, MaxThreads>;
using construct_on_init = std::true_type;
template <typename... Args>
static void construct(Args&&...);
static std::unique_ptr<Tp>& instance();
static instance_array_t& instances();
template <typename... Args>
static std::unique_ptr<Tp>& instance(construct_on_init, Args&&...);
template <typename... Args>
static instance_array_t& instances(construct_on_init, Args&&...);
};
template <typename Tp, typename Tag, size_t MaxThreads>
template <typename... Args>
void
hosttrace_thread_data<Tp, Tag, MaxThreads>::construct(Args&&... _args)
{
static thread_local bool _v = [&_args...]() {
instances().at(threading::get_id()) =
std::make_unique<Tp>(std::forward<Args>(_args)...);
return true;
}();
(void) _v;
}
template <typename Tp, typename Tag, size_t MaxThreads>
std::unique_ptr<Tp>&
hosttrace_thread_data<Tp, Tag, MaxThreads>::instance()
{
return instances().at(threading::get_id());
}
template <typename Tp, typename Tag, size_t MaxThreads>
typename hosttrace_thread_data<Tp, Tag, MaxThreads>::instance_array_t&
hosttrace_thread_data<Tp, Tag, MaxThreads>::instances()
{
static auto _v = instance_array_t{};
return _v;
}
template <typename Tp, typename Tag, size_t MaxThreads>
template <typename... Args>
std::unique_ptr<Tp>&
hosttrace_thread_data<Tp, Tag, MaxThreads>::instance(construct_on_init, Args&&... _args)
{
construct(std::forward<Args>(_args)...);
return instances().at(threading::get_id());
}
template <typename Tp, typename Tag, size_t MaxThreads>
template <typename... Args>
typename hosttrace_thread_data<Tp, Tag, MaxThreads>::instance_array_t&
hosttrace_thread_data<Tp, Tag, MaxThreads>::instances(construct_on_init, Args&&... _args)
{
static auto _v = [&]() {
auto _internal = instance_array_t{};
for(size_t i = 0; i < MaxThreads; ++i)
_internal.at(i) = std::make_unique<Tp>(std::forward<Args>(_args)...);
return _internal;
}();
return _v;
}
//--------------------------------------------------------------------------------------//
// there are currently some strange things that happen with
// vector<instrumentation_bundle_t> so using vector<instrumentation_bundle_t*> and
// timemory's ring_buffer_allocator to create contiguous memory-page aligned instances of
// the bundle
struct instrumentation_bundles
{
using instance_array_t = std::array<instrumentation_bundles, max_supported_threads>;
bundle_allocator_t allocator{};
std::vector<instrumentation_bundle_t*> bundles{};
static instance_array_t& instances();
};
@@ -0,0 +1,63 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include <timemory/api.hpp>
#include <timemory/backends/mpi.hpp>
#include <timemory/backends/process.hpp>
#include <timemory/backends/threading.hpp>
#include <timemory/components.hpp>
#include <timemory/components/gotcha/mpip.hpp>
#include <timemory/components/papi/papi_tuple.hpp>
#include <timemory/config.hpp>
#include <timemory/environment.hpp>
#include <timemory/manager.hpp>
#include <timemory/mpl/apply.hpp>
#include <timemory/operations.hpp>
#include <timemory/runtime.hpp>
#include <timemory/settings.hpp>
#include <timemory/storage.hpp>
#include <timemory/variadic.hpp>
namespace audit = tim::audit;
namespace comp = tim::component;
namespace quirk = tim::quirk;
namespace threading = tim::threading;
namespace scope = tim::scope;
namespace dmp = tim::dmp;
namespace process = tim::process;
namespace units = tim::units;
namespace trait = tim::trait;
// same sort of functionality as python's " ".join([...])
#if !defined(JOIN)
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
#endif
using papi_tot_ins = comp::papi_tuple<PAPI_TOT_INS>;
@@ -0,0 +1,29 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
@@ -1,27 +1,30 @@
// MIT License
//
// Copyright (c) 2020, The Regents of the University of California,
// through Lawrence Berkeley National Laboratory (subject to receipt of any
// required approvals from the U.S. Dept. of Energy). All rights reserved.
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "hosttrace.hpp"
@@ -1183,7 +1186,7 @@ main(int argc, char** argv)
auto mpie_fini_args = hosttrace_call_expr("HOSTTRACE_MPI_FINALIZE", "OFF");
auto trace_call_args =
hosttrace_call_expr("HOSTTRACE_COMPONENTS", default_components);
auto use_mpi_call_args = hosttrace_call_expr("HOSTTRACE_USE_MPI", "ON");
auto use_mpi_call_args = hosttrace_call_expr("HOSTTRACE_USE_PID", "ON");
auto use_mpip_call_args = hosttrace_call_expr(
"HOSTTRACE_USE_MPIP", (binary_rewrite && use_mpi && use_mpip) ? "ON" : "OFF");
auto none_call_args = hosttrace_call_expr();
@@ -1777,7 +1780,7 @@ main(int argc, char** argv)
const auto& outf = outfile;
if(outf.find('/') != string_t::npos)
{
auto outdir = outf.substr(0, outf.find_last_of('/') - 1);
auto outdir = outf.substr(0, outf.find_last_of('/'));
tim::makedir(outdir);
}
@@ -1,26 +1,30 @@
// MIT License
//
// Copyright (c) 2020, The Regents of the University of California,
// through Lawrence Berkeley National Laboratory (subject to receipt of any
// required approvals from the U.S. Dept. of Energy). All rights reserved.
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "hosttrace.hpp"
@@ -47,7 +51,7 @@ get_loop_file_line_info(module_t* mutatee_module, procedure_t* f, flow_graph_t*
char fname[MUTNAMELEN];
char mname[MUTNAMELEN];
const char* typeName = nullptr;
std::string typeName = {};
mutatee_module->getName(mname, MUTNAMELEN);
@@ -73,8 +77,6 @@ get_loop_file_line_info(module_t* mutatee_module, procedure_t* f, flow_graph_t*
{
typeName = returnType->getName();
}
else
typeName = "void";
auto params = f->getParams();
std::vector<string_t> _params;
@@ -148,8 +150,8 @@ get_func_file_line_info(module_t* mutatee_module, procedure_t* f)
char fname[MUTNAMELEN];
char mname[MUTNAMELEN];
int row1, col1, row2, col2;
string_t filename;
string_t typeName;
string_t filename = {};
string_t typeName = {};
mutatee_module->getName(mname, MUTNAMELEN);
@@ -164,8 +166,6 @@ get_func_file_line_info(module_t* mutatee_module, procedure_t* f)
{
typeName = returnType->getName();
}
else
typeName = "void";
auto params = f->getParams();
std::vector<string_t> _params;
+186 -152
Datei anzeigen
@@ -1,51 +1,39 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library.hpp"
bool
get_debug()
{
static bool _v = tim::get_env("HOSTTRACE_DEBUG", false);
return _v;
}
State&
get_state()
{
static State _v{ State::PreInit };
return _v;
}
bool
get_use_perfetto()
{
// if using timemory, default to perfetto being off
static auto _default_v = !tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", false, false);
// explicit env control for using perfetto
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_PERFETTO", _default_v);
return _v;
}
bool
get_use_timemory()
{
// default to opposite of whether perfetto setting
// to use both timemory and perfetto, both HOSTTRACE_USE_TIMEMORY and
// HOSTTRACE_USE_PERFETTO must be true
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", !get_use_perfetto());
return _v;
}
//--------------------------------------------------------------------------------------//
#include "library/config.hpp"
#include "library/critical_trace.hpp"
#include "library/thread_data.hpp"
#include <string_view>
namespace
{
size_t&
get_sample_rate()
{
static auto _v = tim::get_env<size_t>("HOSTTRACE_SAMPLE_RATE", 1);
return _v;
}
std::vector<bool>&
get_sample_data()
{
@@ -53,17 +41,6 @@ get_sample_data()
return _v;
}
bool&
get_use_mpi()
{
#if defined(TIMEMORY_USE_MPI)
static bool _v = tim::get_env("HOSTTRACE_USE_MPI", false);
#else
static bool _v = false;
#endif
return _v;
}
void
setup_gotchas()
{
@@ -83,16 +60,17 @@ setup_gotchas()
mpi_gotcha_t::template configure<0, int, int*, char***>("MPI_Init");
mpi_gotcha_t::template configure<1, int, int*, char***, int, int*>(
"MPI_Init_thread");
#if defined(HOSTTRACE_USE_MPI_HEADERS)
mpi_gotcha_t::template configure<3, int>("MPI_Finalize");
#endif
};
}
auto
ensure_finalization()
ensure_finalization(bool _static_init = false)
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
if(!_static_init)
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
}
return scope::destructor{ []() { hosttrace_trace_finalize(); } };
}
@@ -103,45 +81,6 @@ get_trace_session()
return _session;
}
auto
get_perfetto_output_filename()
{
static auto _v = []() {
// default name: perfetto-trace.<pid>.proto or perfetto-trace.<rank>.proto
auto _default_fname = tim::settings::compose_output_filename(
JOIN('.', "perfetto-trace", (get_use_mpi()) ? "%rank%" : "%pid%"), "proto");
// have the default display the full path to the output file
return tim::get_env<std::string>(
"HOSTTRACE_OUTPUT_FILE",
JOIN('/', tim::get_env<std::string>("PWD", ".", false), _default_fname));
}();
auto _tmp = _v;
auto _replace = [&_tmp](const std::string& _key, auto&& _val) {
auto _pos = _tmp.find(_key);
if(_pos != std::string::npos)
_tmp.replace(_pos, _key.length(), std::to_string(_val()));
};
_replace("%pid%", []() { return process::get_id(); });
_replace("%rank%", []() { return tim::mpi::rank(); });
// backwards compatibility
_replace("%p", []() { return process::get_id(); });
return _tmp;
}
auto&
get_backend()
{
// select inprocess, system, or both (i.e. all)
static auto _v = tim::get_env_choice<std::string>(
"HOSTTRACE_BACKEND",
tim::get_env("HOSTTRACE_BACKEND_SYSTEM", false, false)
? "system" // if HOSTTRACE_BACKEND_SYSTEM is true, default to system.
: "inprocess", // Otherwise, default to inprocess
{ "inprocess", "system", "all" });
return _v;
}
auto
is_system_backend()
{
@@ -150,10 +89,10 @@ is_system_backend()
}
auto&
get_timemory_data()
get_instrumentation_bundles()
{
static thread_local auto& _v =
hosttrace_timemory_data::instances().at(threading::get_id());
instrumentation_bundles::instances().at(threading::get_id());
return _v;
}
@@ -166,6 +105,17 @@ get_functors()
return _v;
}
auto&
get_cpu_cid_parents()
{
static thread_local auto _v =
std::unordered_map<uint64_t, std::tuple<uint64_t, uint16_t>>{};
return _v;
}
using Device = critical_trace::Device;
using Phase = critical_trace::Phase;
bool
hosttrace_init_tooling()
{
@@ -184,26 +134,17 @@ hosttrace_init_tooling()
return false;
}
// always initialize timemory because gotcha wrappers are always used
tim::settings::flamegraph_output() = false;
tim::settings::cout_output() = false;
tim::settings::file_output() = true;
tim::settings::enable_signal_handler() = true;
tim::settings::collapse_processes() = false;
tim::settings::collapse_threads() = false;
tim::settings::max_thread_bookmarks() = 1;
tim::settings::global_components() = tim::get_env<std::string>(
"HOSTTRACE_COMPONENTS", "wall_clock", get_use_timemory());
int _threadpool_verbose = (get_debug()) ? 4 : -1;
tasking::get_roctracer_thread_pool().set_verbose(_threadpool_verbose);
tasking::get_critical_trace_thread_pool().set_verbose(_threadpool_verbose);
// enable timestamp directories when perfetto + mpi is activated
if(get_use_perfetto() && get_use_mpi()) tim::settings::time_output() = true;
// below will effectively do:
// get_cpu_cid_stack(0)->emplace_back(-1);
// plus query some env variables
add_critical_trace<Device::CPU, Phase::NONE>(0, -1, 0, 0, 0, 0, 0, 0);
auto _cmd = tim::read_command_line(process::get_id());
auto _exe = (_cmd.empty()) ? "hosttrace" : _cmd.front();
auto _pos = _exe.find_last_of('/');
if(_pos < _exe.length() - 1) _exe = _exe.substr(_pos + 1);
tim::timemory_init({ _exe }, "hosttrace-");
// configure the settings
configure_settings();
if(get_sample_rate() < 1) get_sample_rate() = 1;
get_sample_data().reserve(512);
@@ -218,16 +159,18 @@ hosttrace_init_tooling()
if(_comps.size() == 1 && _comps.find(TIMEMORY_WALL_CLOCK) != _comps.end())
{
// using wall_clock directly is lower overhead than using it via user_bundle
bundle_t::get_initializer() = [](bundle_t& _bundle) {
_bundle.initialize<comp::wall_clock>();
};
instrumentation_bundle_t::get_initializer() =
[](instrumentation_bundle_t& _bundle) {
_bundle.initialize<comp::wall_clock>();
};
}
else if(!_comps.empty())
{
// use user_bundle for other than wall-clock
bundle_t::get_initializer() = [](bundle_t& _bundle) {
_bundle.initialize<comp::user_global_bundle>();
};
instrumentation_bundle_t::get_initializer() =
[](instrumentation_bundle_t& _bundle) {
_bundle.initialize<comp::user_global_bundle>();
};
}
else
{
@@ -252,9 +195,8 @@ hosttrace_init_tooling()
if(get_use_perfetto())
{
// environment settings
auto shmem_size_hint =
tim::get_env<size_t>("HOSTTRACE_SHMEM_SIZE_HINT_KB", 40960);
auto buffer_size = tim::get_env<size_t>("HOSTTRACE_BUFFER_SIZE_KB", 1024000);
auto shmem_size_hint = get_perfetto_shmem_size_hint();
auto buffer_size = get_perfetto_buffer_size();
auto* buffer_config = cfg.add_buffers();
buffer_config->set_size_kb(buffer_size);
@@ -276,6 +218,7 @@ hosttrace_init_tooling()
(void) get_perfetto_output_filename();
}
auto _exe = get_exe_name();
static auto _thread_init = [_exe]() {
hosttrace_thread_data<hosttrace_thread_bundle_t>::construct(
TIMEMORY_JOIN("", _exe, "/thread-", threading::get_id()),
@@ -285,10 +228,11 @@ hosttrace_init_tooling()
} };
(void) _dtor;
};
// functors for starting and stopping timemory
static auto _push_timemory = [](const char* name) {
_thread_init();
auto& _data = get_timemory_data();
auto& _data = get_instrumentation_bundles();
// this generates a hash for the raw string array
auto _hash = tim::add_hash_id(tim::string_view_t{ name });
auto* _bundle = _data.allocator.allocate(1);
@@ -311,7 +255,7 @@ hosttrace_init_tooling()
};
static auto _pop_timemory = [](const char* name) {
auto& _data = get_timemory_data();
auto& _data = get_instrumentation_bundles();
if(_data.bundles.empty())
{
HOSTTRACE_DEBUG("[%s] skipped %s :: empty bundle stack\n",
@@ -358,8 +302,22 @@ hosttrace_init_tooling()
if(dmp::rank() == 0)
{
tim::print_env(std::cerr,
[](const std::string& _v) { return _v.find("HOSTTRACE_") == 0; });
// generic filter for filtering relevant options
auto _is_hosttrace_option = [](const auto& _v) {
#if !defined(HOSTTRACE_USE_ROCTRACER)
if(_v.find("HOSTTRACE_ROCTRACER_") == 0) return false;
#endif
if(!get_use_critical_trace() && _v.find("HOSTTRACE_CRITICAL_TRACE_") == 0)
return false;
return (_v.find("HOSTTRACE_") == 0) ||
((_v.find("TIMEMORY_") != 0) && (_v.find("SIGNAL_") != 0));
};
tim::print_env(std::cerr, [_is_hosttrace_option](const std::string& _v) {
return _is_hosttrace_option(_v);
});
print_config_settings(std::cerr, _is_hosttrace_option);
}
if(get_use_perfetto() && !is_system_backend())
@@ -421,6 +379,21 @@ extern "C"
auto _enabled = (_sample_idx++ % _sample_rate == 0);
get_sample_data().emplace_back(_enabled);
if(_enabled) get_functors().first(name);
if(get_use_critical_trace())
{
auto _ts = comp::wall_clock::record();
auto _cid = get_cpu_cid()++;
uint16_t _depth = (get_cpu_cid_stack()->empty())
? get_cpu_cid_stack(0)->size()
: get_cpu_cid_stack()->size() - 1;
auto _parent_cid = (get_cpu_cid_stack()->empty())
? get_cpu_cid_stack(0)->back()
: get_cpu_cid_stack()->back();
get_cpu_cid_parents().emplace(_cid, std::make_tuple(_parent_cid, _depth));
add_critical_trace<Device::CPU, Phase::BEGIN>(
threading::get_id(), _cid, 0, _parent_cid, _ts, 0,
critical_trace::add_hash_id(name), _depth);
}
}
void hosttrace_pop_trace(const char* name)
@@ -434,6 +407,20 @@ extern "C"
if(_sample_data.back()) get_functors().second(name);
_sample_data.pop_back();
}
if(get_use_critical_trace())
{
if(get_cpu_cid_stack() && !get_cpu_cid_stack()->empty())
{
auto _ts = comp::wall_clock::record();
auto _cid = get_cpu_cid_stack()->back();
uint64_t _parent_cid = 0;
uint16_t _depth = 0;
std::tie(_parent_cid, _depth) = get_cpu_cid_parents().at(_cid);
add_critical_trace<Device::CPU, Phase::END>(
threading::get_id(), _cid, 0, _parent_cid, _ts, _ts,
critical_trace::add_hash_id(name), _depth);
}
}
}
else
{
@@ -463,15 +450,19 @@ extern "C"
comp::roctracer::tear_down();
#endif
// join extra thread(s) used by roctracer
HOSTTRACE_DEBUG("[%s] waiting for all roctracer tasks to complete...\n",
__FUNCTION__);
tasking::get_roctracer_task_group().join();
// stop the main bundle and report the high-level metrics
if(get_main_bundle())
{
get_main_bundle()->stop();
int64_t _id = (get_use_mpi()) ? dmp::rank() : process::get_id();
std::stringstream _ss{};
_ss << "[" << __FUNCTION__ << "][" << _id << "] " << *get_main_bundle()
<< "\n";
std::cerr << _ss.str();
std::string _msg = JOIN("", *get_main_bundle());
auto _pos = _msg.find(">>> ");
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
HOSTTRACE_PRINT("%s\n", _msg.c_str());
get_main_bundle().reset();
}
@@ -484,14 +475,15 @@ extern "C"
if(itr && itr->get<comp::wall_clock>() &&
!itr->get<comp::wall_clock>()->get_is_running())
{
std::stringstream _ss{};
_ss << *itr << "\n";
std::cerr << _ss.str();
std::string _msg = JOIN("", *itr);
auto _pos = _msg.find(">>> ");
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
HOSTTRACE_PRINT("%s\n", _msg.c_str());
}
}
// ensure that all the MT instances are flushed
for(auto& itr : hosttrace_timemory_data::instances())
for(auto& itr : instrumentation_bundles::instances())
{
while(!itr.bundles.empty())
{
@@ -503,6 +495,44 @@ extern "C"
}
}
if(get_use_critical_trace())
{
// increase the thread-pool size
tasking::get_critical_trace_thread_pool().initialize_threadpool(
get_critical_trace_num_threads());
for(size_t i = 0; i < max_supported_threads; ++i)
{
using critical_trace_hash_data =
hosttrace_thread_data<critical_trace::hash_ids, critical_trace::id>;
if(critical_trace_hash_data::instances().at(i))
critical_trace::add_hash_id(
*critical_trace_hash_data::instances().at(i));
}
for(size_t i = 0; i < max_supported_threads; ++i)
{
using critical_trace_chain_data =
hosttrace_thread_data<critical_trace::call_chain>;
if(critical_trace_chain_data::instances().at(i))
critical_trace::update(i); // launch update task
}
// make sure outstanding hash tasks completed before compute
HOSTTRACE_PRINT("[%s] waiting for all critical trace tasks to complete...\n",
__FUNCTION__);
tasking::get_critical_trace_task_group().join();
// launch compute task
HOSTTRACE_PRINT("[%s] launching critical trace compute task...\n",
__FUNCTION__);
critical_trace::compute();
}
tasking::get_critical_trace_task_group().join();
bool _perfetto_output_error = false;
if(get_use_perfetto() && !is_system_backend())
{
@@ -530,20 +560,27 @@ extern "C"
static_cast<double>(trace_data.size()) / units::KB,
static_cast<double>(trace_data.size()) / units::MB,
static_cast<double>(trace_data.size()) / units::GB);
std::ofstream output{};
output.open(get_perfetto_output_filename(), std::ios::out | std::ios::binary);
if(!output)
std::ofstream ofs{};
if(!tim::filepath::open(ofs, get_perfetto_output_filename(),
std::ios::out | std::ios::binary))
{
fprintf(stderr, "[%s]> Error opening '%s'...\n", __FUNCTION__,
get_perfetto_output_filename().c_str());
_perfetto_output_error = true;
}
else
output.write(&trace_data[0], trace_data.size());
output.close();
ofs.write(&trace_data[0], trace_data.size());
ofs.close();
}
// these should be destroyed before timemory is finalized, especially the
// roctracer thread-pool
tasking::get_roctracer_thread_pool().destroy_threadpool();
tasking::get_critical_trace_thread_pool().destroy_threadpool();
HOSTTRACE_DEBUG("Finalizing timemory...\n");
tim::timemory_finalize();
HOSTTRACE_DEBUG("Finalizing timemory... Done\n");
if(_perfetto_output_error)
throw std::runtime_error("Unable to create perfetto output file");
@@ -564,20 +601,17 @@ extern "C"
{
auto& _main_bundle = get_main_bundle();
_main_bundle->start();
#if defined(TIMEMORY_USE_MPI)
tim::set_env("HOSTTRACE_USE_MPI", "ON", 1);
get_use_mpi() = true;
#endif
get_state() = State::DelayedInit;
get_use_pid() = true;
get_state() = State::DelayedInit;
}
}
}
std::unique_ptr<hosttrace_bundle_t>&
std::unique_ptr<main_bundle_t>&
get_main_bundle()
{
static auto _v =
(setup_gotchas(), std::make_unique<hosttrace_bundle_t>(
(setup_gotchas(), std::make_unique<main_bundle_t>(
"hosttrace", quirk::config<quirk::auto_start>{}));
return _v;
}
@@ -587,5 +621,5 @@ namespace
// if static objects are destroyed randomly (relatively uncommon behavior)
// this might call finalization before perfetto ends the tracing session
// but static variable in hosttrace_init_tooling is more likely
auto _ensure_finalization = ensure_finalization();
auto _ensure_finalization = ensure_finalization(true);
} // namespace
@@ -0,0 +1,528 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/config.hpp"
#include "library/debug.hpp"
#include "library/thread_data.hpp"
#include "timemory/backends/dmp.hpp"
#include "timemory/backends/process.hpp"
#include "timemory/settings/types.hpp"
#include "timemory/utility/argparse.hpp"
#include <array>
#include <cstdint>
#include <cstdlib>
#include <numeric>
#include <ostream>
#include <string>
#include <timemory/environment.hpp>
#include <timemory/settings.hpp>
using settings = tim::settings;
namespace
{
auto
get_config()
{
static auto _once = (configure_settings(), true);
return settings::shared_instance();
(void) _once;
}
#define HOSTTRACE_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE) \
_config->insert<TYPE, TYPE>(ENV_NAME, ENV_NAME, DESCRIPTION, INITIAL_VALUE, \
std::vector<std::string>{})
} // namespace
void
configure_settings()
{
static bool _once = false;
if(_once) return;
_once = true;
static auto _config = settings::shared_instance();
// auto* _config = settings::instance();
// if using timemory, default to perfetto being off
auto _default_perfetto_v =
!tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", false, false);
auto _default_config_file =
JOIN("/", tim::get_env<std::string>("HOME", "."), "hosttrace.cfg");
auto _system_backend = tim::get_env("HOSTTRACE_BACKEND_SYSTEM", false, false);
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_CONFIG_FILE",
"Configuration file of hosttrace and timemory settings",
_default_config_file);
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_DEBUG", "Enable debugging output",
_config->get_debug());
auto _hosttrace_debug = _config->get<bool>("HOSTTRACE_DEBUG");
if(_hosttrace_debug) tim::set_env("TIMEMORY_DEBUG_SETTINGS", "1", 0);
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_USE_PERFETTO", "Enable perfetto backend",
_default_perfetto_v);
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_USE_TIMEMORY", "Enable timemory backend",
!_config->get<bool>("HOSTTRACE_USE_PERFETTO"));
HOSTTRACE_CONFIG_SETTING(
bool, "HOSTTRACE_USE_PID",
"Enable tagging filenames with process identifier (either MPI rank or pid)",
true);
HOSTTRACE_CONFIG_SETTING(
size_t, "HOSTTRACE_SAMPLE_RATE",
"Counts every function call (N), only record function if (N % <VALUE> == 0)", 1);
auto _backend = tim::get_env_choice<std::string>(
"HOSTTRACE_BACKEND",
(_system_backend)
? "system" // if HOSTTRACE_BACKEND_SYSTEM is true, default to system.
: "inprocess", // Otherwise, default to inprocess
{ "inprocess", "system", "all" }, false);
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_BACKEND",
"Specify the perfetto backend to activate. Options are: "
"'inprocess', 'system', or 'all'",
_backend);
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_CRITICAL_TRACE",
"Enable generation of the critical trace", false);
HOSTTRACE_CONFIG_SETTING(
bool, "HOSTTRACE_ROCTRACER_TIMELINE_PROFILE",
"Create unique entries for every kernel with timemory backend",
_config->get_timeline_profile());
HOSTTRACE_CONFIG_SETTING(
bool, "HOSTTRACE_ROCTRACER_FLAT_PROFILE",
"Ignore hierarchy in all kernels entries with timemory backend",
_config->get_flat_profile());
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_ROCTRACER_HSA_ACTIVITY",
"Enable HSA activity tracing support", false);
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_ROCTRACER_HSA_API",
"Enable HSA API tracing support", false);
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_ROCTRACER_HSA_API_TYPES",
"HSA API type to collect", "");
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_CRITICAL_TRACE_DEBUG",
"Enable debugging for critical trace", _hosttrace_debug);
HOSTTRACE_CONFIG_SETTING(
bool, "HOSTTRACE_CRITICAL_TRACE_SERIALIZE_NAMES",
"Include names in serialization of critical trace (mainly for debugging)",
_hosttrace_debug);
HOSTTRACE_CONFIG_SETTING(size_t, "HOSTTRACE_SHMEM_SIZE_HINT_KB",
"Hint for shared-memory buffer size in perfetto (in KB)",
40960);
HOSTTRACE_CONFIG_SETTING(size_t, "HOSTTRACE_BUFFER_SIZE_KB",
"Size of perfetto buffer (in KB)", 1024000);
HOSTTRACE_CONFIG_SETTING(int64_t, "HOSTTRACE_CRITICAL_TRACE_COUNT",
"Number of critical trace to export (0 == all)", 0);
HOSTTRACE_CONFIG_SETTING(uint64_t, "HOSTTRACE_CRITICAL_TRACE_BUFFER_COUNT",
"Number of critical trace records to store in thread-local "
"memory before submitting to shared buffer",
2000);
HOSTTRACE_CONFIG_SETTING(
uint64_t, "HOSTTRACE_CRITICAL_TRACE_NUM_THREADS",
"Number of threads to use when generating the critical trace",
std::min<uint64_t>(8, std::thread::hardware_concurrency()));
HOSTTRACE_CONFIG_SETTING(
int64_t, "HOSTTRACE_CRITICAL_TRACE_PER_ROW",
"How many critical traces per row in perfetto (0 == all in one row)", 0);
HOSTTRACE_CONFIG_SETTING(
std::string, "HOSTTRACE_COMPONENTS",
"List of components to collect via timemory (see timemory-avail)", "wall_clock");
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_OUTPUT_FILE", "Perfetto filename",
"");
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_SETTINGS_DESC",
"Provide descriptions when printing settings", false);
_config->get_flamegraph_output() = false;
_config->get_cout_output() = false;
_config->get_file_output() = true;
_config->get_json_output() = true;
_config->get_tree_output() = true;
_config->get_enable_signal_handler() = true;
_config->get_collapse_processes() = false;
_config->get_collapse_threads() = false;
_config->get_stack_clearing() = false;
_config->get_time_output() = true;
_config->get_timing_precision() = 6;
for(auto&& itr :
tim::delimit(_config->get<std::string>("HOSTTRACE_CONFIG_FILE"), ";:"))
{
HOSTTRACE_CONDITIONAL_BASIC_PRINT(true, "Reading config file %s\n", itr.c_str());
_config->read(itr);
}
_config->get_global_components() = _config->get<std::string>("HOSTTRACE_COMPONENTS");
// always initialize timemory because gotcha wrappers are always used
auto _cmd = tim::read_command_line(process::get_id());
auto _exe = (_cmd.empty()) ? "exe" : _cmd.front();
auto _pos = _exe.find_last_of('/');
if(_pos < _exe.length() - 1) _exe = _exe.substr(_pos + 1);
get_exe_name() = _exe;
scope::get_fields()[scope::flat::value] = tim::settings::flat_profile();
scope::get_fields()[scope::timeline::value] = tim::settings::timeline_profile();
bool _found_sep = false;
for(const auto& itr : _cmd)
{
if(itr == "--") _found_sep = true;
}
if(!_found_sep && _cmd.size() > 1) _cmd.insert(_cmd.begin() + 1, "--");
using argparser_t = tim::argparse::argument_parser;
argparser_t _parser{ _exe };
tim::timemory_init(_cmd, _parser, "hosttrace-");
settings::suppress_parsing() = true;
settings::suppress_config() = true;
settings::use_output_suffix() = _config->get<bool>("HOSTTRACE_USE_PID");
}
void
print_config_settings(std::ostream& _os,
std::function<bool(const std::string_view&)>&& _filter)
{
auto _flags = _os.flags();
constexpr size_t nfields = 3;
using str_array_t = std::array<std::string, nfields>;
std::vector<str_array_t> _data{};
std::array<size_t, nfields> _widths{};
_widths.fill(0);
for(const auto& itr : *get_config())
{
if(_filter(itr.first))
{
auto _disp = itr.second->get_display(std::ios::boolalpha);
_data.emplace_back(str_array_t{ _disp.at("name"), _disp.at("value"),
_disp.at("description") });
for(size_t i = 0; i < nfields; ++i)
_widths.at(i) =
std::max<size_t>(_widths.at(i), _data.back().at(i).length());
}
}
std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) {
auto _npos = std::string::npos;
// HOSTTRACE_CONFIG_FILE always first
if(lhs.at(0).find("HOSTTRACE_CONFIG") != _npos) return true;
if(rhs.at(0).find("HOSTTRACE_CONFIG") != _npos) return false;
// HOSTTRACE_USE_* prioritized
auto _lhs_use = lhs.at(0).find("HOSTTRACE_USE_");
auto _rhs_use = rhs.at(0).find("HOSTTRACE_USE_");
if(_lhs_use != _rhs_use && _lhs_use < _rhs_use) return true;
if(_lhs_use != _rhs_use && _lhs_use > _rhs_use) return false;
// length sort followed by alphabetical sort
return (lhs.at(0).length() == rhs.at(0).length())
? (lhs.at(0) < rhs.at(0))
: (lhs.at(0).length() < rhs.at(0).length());
});
bool _print_desc = get_debug() || get_config()->get<bool>("HOSTTRACE_SETTINGS_DESC");
auto tot_width = std::accumulate(_widths.begin(), _widths.end(), 0);
if(!_print_desc) tot_width -= _widths.back() + 4;
std::stringstream _spacer{};
_spacer.fill('-');
_spacer << "#" << std::setw(tot_width + 11) << ""
<< "#";
_os << _spacer.str() << "\n";
// _os << "# Hosttrace settings:" << std::setw(tot_width - 8) << "#" << "\n";
for(const auto& itr : _data)
{
_os << "# ";
for(size_t i = 0; i < nfields; ++i)
{
switch(i)
{
case 0: _os << std::left; break;
case 1: _os << std::left; break;
case 2: _os << std::left; break;
}
_os << std::setw(_widths.at(i)) << itr.at(i) << " ";
if(!_print_desc && i == 1) break;
switch(i)
{
case 0: _os << "= "; break;
case 1: _os << "[ "; break;
case 2: _os << "]"; break;
}
}
_os << " #\n";
}
_os << _spacer.str() << "\n";
_os.setf(_flags);
}
std::string&
get_exe_name()
{
static std::string _v = {};
return _v;
}
std::string
get_config_file()
{
static auto _v = get_config()->find("HOSTTRACE_CONFIG_FILE");
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
}
bool
get_debug()
{
static auto _v = get_config()->find("HOSTTRACE_DEBUG");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_use_perfetto()
{
static auto _v = get_config()->find("HOSTTRACE_USE_PERFETTO");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_use_timemory()
{
static auto _v = get_config()->find("HOSTTRACE_USE_TIMEMORY");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool&
get_use_pid()
{
static auto _v = get_config()->find("HOSTTRACE_USE_PID");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_use_mpip()
{
static bool _v = tim::get_env("HOSTTRACE_USE_MPIP", false, false);
return _v;
}
bool
get_use_critical_trace()
{
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_critical_trace_debug()
{
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_DEBUG");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_critical_trace_serialize_names()
{
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_SERIALIZE_NAMES");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_roctracer_timeline_profile()
{
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_TIMELINE_PROFILE");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_roctracer_flat_profile()
{
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_FLAT_PROFILE");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_trace_hsa_api()
{
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_HSA_API");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool
get_trace_hsa_activity()
{
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_HSA_ACTIVITY");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
int64_t
get_critical_trace_per_row()
{
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_PER_ROW");
return static_cast<tim::tsettings<int64_t>&>(*_v->second).get();
}
size_t
get_perfetto_shmem_size_hint()
{
static auto _v = get_config()->find("HOSTTRACE_SHMEM_SIZE_HINT_KB");
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
}
size_t
get_perfetto_buffer_size()
{
static auto _v = get_config()->find("HOSTTRACE_BUFFER_SIZE_KB");
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
}
uint64_t
get_critical_trace_update_freq()
{
static uint64_t _v =
get_config()->get<uint64_t>("HOSTTRACE_CRITICAL_TRACE_BUFFER_COUNT");
return _v;
}
uint64_t
get_critical_trace_num_threads()
{
static uint64_t _v =
get_config()->get<uint64_t>("HOSTTRACE_CRITICAL_TRACE_NUM_THREADS");
return _v;
}
std::string
get_trace_hsa_api_types()
{
static std::string _v =
get_config()->get<std::string>("HOSTTRACE_ROCTRACER_HSA_API_TYPES");
return _v;
}
std::string&
get_backend()
{
// select inprocess, system, or both (i.e. all)
static auto _v = get_config()->find("HOSTTRACE_BACKEND");
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
}
std::string
get_perfetto_output_filename()
{
static auto _v = get_config()->find("HOSTTRACE_OUTPUT_FILE");
static auto& _t = static_cast<tim::tsettings<std::string>&>(*_v->second);
if(_t.get().empty())
{
// default name: perfetto-trace.<pid>.proto or perfetto-trace.<rank>.proto
auto _default_fname = settings::compose_output_filename(
"perfetto-trace", "proto", get_use_pid(),
(tim::dmp::is_initialized()) ? tim::dmp::rank() : process::get_id());
auto _pid_patch = std::string{ "/" } + std::to_string(tim::process::get_id()) +
"-perfetto-trace";
auto _dpos = _default_fname.find(_pid_patch);
if(_dpos != std::string::npos)
_default_fname =
_default_fname.replace(_dpos, _pid_patch.length(), "/perfetto-trace");
// have the default display the full path to the output file
_t.set(tim::get_env<std::string>(
"HOSTTRACE_OUTPUT_FILE",
JOIN('/', tim::get_env<std::string>("PWD", ".", false), _default_fname),
false));
}
return _t.get();
}
size_t&
get_sample_rate()
{
static auto _v = get_config()->find("HOSTTRACE_SAMPLE_RATE");
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
}
int64_t
get_critical_trace_count()
{
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_COUNT");
return static_cast<tim::tsettings<int64_t>&>(*_v->second).get();
}
State&
get_state()
{
static State _v{ State::PreInit };
return _v;
}
std::atomic<uint64_t>&
get_cpu_cid()
{
static std::atomic<uint64_t> _v{ 0 };
return _v;
}
std::unique_ptr<std::vector<uint64_t>>&
get_cpu_cid_stack(int64_t _tid)
{
struct hosttrace_cpu_cid_stack
{};
using thread_data_t =
hosttrace_thread_data<std::vector<uint64_t>, hosttrace_cpu_cid_stack>;
static auto& _v = thread_data_t::instances();
static thread_local auto _v_check = [_tid]() {
thread_data_t::construct((_tid > 0) ? *thread_data_t::instances().at(0)
: std::vector<uint64_t>{});
return true;
}();
return _v.at(_tid);
(void) _v_check;
}
Datei-Diff unterdrückt, da er zu groß ist Diff laden
@@ -0,0 +1,46 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/fork_gotcha.hpp"
#include "library/config.hpp"
#include "library/debug.hpp"
void
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
{
HOSTTRACE_DEBUG(
"Warning! Calling fork() within an OpenMPI application using libfabric "
"may result is segmentation fault\n");
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
}
void
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
{
HOSTTRACE_DEBUG("%s() return PID %i\n", _data.tool_id.c_str(), (int) _pid);
}
@@ -0,0 +1,50 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/hosttrace_component.hpp"
#include "library/api.hpp"
void
hosttrace_component::start()
{
if(m_prefix) hosttrace_push_trace(m_prefix);
}
void
hosttrace_component::stop()
{
if(m_prefix) hosttrace_pop_trace(m_prefix);
}
void
hosttrace_component::set_prefix(const char* _prefix)
{
m_prefix = _prefix;
}
TIMEMORY_INITIALIZE_STORAGE(hosttrace_component)
@@ -1,14 +1,40 @@
#include "library.hpp"
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
//
// This file contains miscellaneous function definitions related to timemory
// placed in separate file so that, during development, the long compile-times
// arising from compiling timemory's gotcha wrappers are reduced
//
#include "library/mpi_gotcha.hpp"
#include "library/config.hpp"
#include "library/debug.hpp"
#include "library/hosttrace_component.hpp"
namespace
{
uint64_t mpip_index = std::numeric_limits<uint64_t>::max();
uint64_t mpip_index = std::numeric_limits<uint64_t>::max();
std::string mpi_init_string = {};
// this ensures hosttrace_trace_finalize is called before MPI_Finalize
void
@@ -19,10 +45,12 @@ hosttrace_mpi_set_attr()
return MPI_SUCCESS;
};
static auto _mpi_fini = [](MPI_Comm, int, void*, void*) {
HOSTTRACE_DEBUG("MPI Comm attribute finalize\n");
if(mpip_index != std::numeric_limits<uint64_t>::max())
comp::deactivate_mpip<tim::component_tuple<hosttrace_component>, hosttrace>(
mpip_index);
hosttrace_pop_trace("MPI_Finalize()");
if(!mpi_init_string.empty()) hosttrace_pop_trace(mpi_init_string.c_str());
mpi_init_string = {};
hosttrace_trace_finalize();
return MPI_SUCCESS;
};
@@ -37,26 +65,15 @@ hosttrace_mpi_set_attr()
}
} // namespace
void
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
{
HOSTTRACE_DEBUG(
"Warning! Calling fork() within an OpenMPI application using libfabric "
"may result is segmentation fault\n");
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
}
void
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
{
HOSTTRACE_DEBUG("%s() return PID %i\n", _data.tool_id.c_str(), (int) _pid);
}
void
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***)
{
HOSTTRACE_DEBUG("[%s] %s(int*, char***)\n", __FUNCTION__, _data.tool_id.c_str());
if(get_state() == ::State::DelayedInit) get_state() = ::State::PreInit;
if(get_state() == ::State::DelayedInit)
{
get_state() = ::State::PreInit;
mpi_init_string = _data.tool_id;
}
}
void
@@ -64,7 +81,11 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***, in
{
HOSTTRACE_DEBUG("[%s] %s(int*, char***, int, int*)\n", __FUNCTION__,
_data.tool_id.c_str());
if(get_state() == ::State::DelayedInit) get_state() = ::State::PreInit;
if(get_state() == ::State::DelayedInit)
{
get_state() = ::State::PreInit;
mpi_init_string = _data.tool_id;
}
}
void
@@ -80,7 +101,7 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval)
// being activated unwaringly during runtime instrumentation because that
// will result in double instrumenting the MPI functions (unless the MPI functions
// were excluded via a regex expression)
if(tim::get_env("HOSTTRACE_USE_MPIP", false, false))
if(get_use_mpip())
{
HOSTTRACE_DEBUG("[%s] Activating MPI wrappers...\n", __FUNCTION__);
comp::configure_mpip<tim::component_tuple<hosttrace_component>, hosttrace>();
@@ -98,39 +119,9 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming)
if(mpip_index != std::numeric_limits<uint64_t>::max())
comp::deactivate_mpip<tim::component_tuple<hosttrace_component>, hosttrace>(
mpip_index);
hosttrace_pop_trace("MPI_Finalize()");
if(!mpi_init_string.empty()) hosttrace_pop_trace(mpi_init_string.c_str());
mpi_init_string = {};
hosttrace_trace_finalize();
}
void
hosttrace_component::start()
{
if(m_prefix) hosttrace_push_trace(m_prefix);
}
void
hosttrace_component::stop()
{
if(m_prefix) hosttrace_pop_trace(m_prefix);
}
void
hosttrace_component::set_prefix(const char* _prefix)
{
m_prefix = _prefix;
}
hosttrace_timemory_data::instance_array_t&
hosttrace_timemory_data::instances()
{
static auto _v = instance_array_t{};
return _v;
}
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
TIMEMORY_INITIALIZE_STORAGE(fork_gotcha, mpi_gotcha, comp::wall_clock,
comp::user_global_bundle)
#if defined(CUSTOM_DATA_SOURCE)
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif
TIMEMORY_INITIALIZE_STORAGE(mpi_gotcha)
@@ -0,0 +1,35 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/perfetto.hpp"
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
#if defined(CUSTOM_DATA_SOURCE)
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif
@@ -0,0 +1,80 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/ptl.hpp"
namespace tasking
{
std::mutex&
get_roctracer_mutex()
{
static std::mutex _v{};
return _v;
}
PTL::ThreadPool&
get_roctracer_thread_pool()
{
static auto _v = PTL::ThreadPool{ 1 };
return _v;
}
PTL::TaskGroup<void>&
get_roctracer_task_group()
{
static PTL::TaskGroup<void> _v{ &get_roctracer_thread_pool() };
return _v;
}
std::mutex&
get_critical_trace_mutex()
{
static std::mutex _v{};
return _v;
}
PTL::ThreadPool&
get_critical_trace_thread_pool()
{
static auto _v = PTL::ThreadPool{ 1 };
return _v;
}
PTL::TaskGroup<void>&
get_critical_trace_task_group()
{
static PTL::TaskGroup<void> _v{ &get_critical_trace_thread_pool() };
return _v;
}
namespace
{
bool _ptl_initialized =
(get_roctracer_thread_pool(), get_critical_trace_thread_pool(), true);
}
} // namespace tasking
@@ -0,0 +1,283 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/roctracer.hpp"
#include "library/config.hpp"
#include "library/defines.hpp"
#include "library/roctracer_callbacks.hpp"
#include "library/thread_data.hpp"
namespace tim
{
namespace component
{
void
roctracer::preinit()
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
roctracer_data::label() = "roctracer";
roctracer_data::description() = "ROCm tracer (activity API)";
}
bool
roctracer::is_setup()
{
return roctracer_is_setup();
}
void
roctracer::add_setup(const std::string& _lbl, std::function<void()>&& _func)
{
roctracer_setup_routines().emplace_back(_lbl, std::move(_func));
}
void
roctracer::add_tear_down(const std::string& _lbl, std::function<void()>&& _func)
{
roctracer_tear_down_routines().emplace_back(_lbl, std::move(_func));
}
void
roctracer::remove_setup(const std::string& _lbl)
{
auto& _data = roctracer_setup_routines();
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
{
if(itr->first == _lbl)
{
_data.erase(itr);
break;
}
}
}
void
roctracer::remove_tear_down(const std::string& _lbl)
{
auto& _data = roctracer_setup_routines();
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
{
if(itr->first == _lbl)
{
_data.erase(itr);
break;
}
}
}
void
roctracer::setup()
{
if(!get_use_timemory() && !get_use_perfetto()) return;
auto_lock_t _lk{ type_mutex<roctracer>() };
if(roctracer_is_setup()) return;
roctracer_is_setup() = true;
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
tim::set_env("HSA_TOOLS_LIB", "libhosttrace.so", 0);
auto _kfdwrapper = dynamic_library{ "HOSTTRACE_ROCTRACER_LIBKFDWRAPPER",
HOSTTRACE_ROCTRACER_LIBKFDWRAPPER };
ROCTRACER_CALL(roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, nullptr));
// if(roctracer_default_pool() == nullptr)
{
// Allocating tracing pool
roctracer_properties_t properties{};
memset(&properties, 0, sizeof(roctracer_properties_t));
properties.mode = 0x1000;
properties.buffer_size = 0x1000;
properties.buffer_callback_fun = hip_activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
}
// Enable API callbacks, all domains
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, nullptr));
// Enable activity tracing, all domains
ROCTRACER_CALL(roctracer_enable_activity());
// callback for HSA
for(auto& itr : roctracer_setup_routines())
itr.second();
}
void
roctracer::tear_down()
{
auto_lock_t _lk{ type_mutex<roctracer>() };
if(!roctracer_is_setup()) return;
roctracer_is_setup() = false;
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
// flush all the activity
if(roctracer_default_pool() != nullptr)
{
ROCTRACER_CALL(roctracer_flush_activity());
}
// flush all buffers
roctracer_flush_buf();
// make sure all async operations are executed
for(size_t i = 0; i < max_supported_threads; ++i)
hip_exec_activity_callbacks(i);
// callback for hsa
for(auto& itr : roctracer_tear_down_routines())
itr.second();
// Disable tracing and closing the pool
ROCTRACER_CALL(roctracer_disable_callback());
ROCTRACER_CALL(roctracer_disable_activity());
ROCTRACER_CALL(roctracer_close_pool());
}
void
roctracer::start()
{
if(tracker_type::start() == 0) setup();
}
void
roctracer::stop()
{
if(tracker_type::stop() == 0) tear_down();
}
} // namespace component
} // namespace tim
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer, false, void)
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer_data, true, double)
// HSA-runtime tool on-load method
extern "C"
{
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) TIMEMORY_VISIBILITY("default");
void OnUnload() TIMEMORY_VISIBILITY("default");
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names)
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
tim::consume_parameters(table, runtime_version, failed_tool_count,
failed_tool_names);
// ONLOAD_TRACE_BEG();
// on_exit(exit_handler, nullptr);
auto _setup = [=]() {
get_hsa_timer() =
std::make_unique<hsa_timer_t>(table->core_->hsa_system_get_info_fn);
// const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
const char* output_prefix = nullptr;
// App begin timestamp begin_ts_file.txt
// begin_ts_file_handle = open_output_file(output_prefix,
// "begin_ts_file.txt"); const timestamp_t app_start_time =
// timer->timestamp_fn_ns(); fprintf(begin_ts_file_handle, "%lu\n",
// app_start_time);
bool trace_hsa_api = get_trace_hsa_api();
std::vector<std::string> hsa_api_vec =
tim::delimit(get_trace_hsa_api_types());
// Enable HSA API callbacks/activity
if(trace_hsa_api)
{
// hsa_api_file_handle = open_output_file(output_prefix,
// "hsa_api_trace.txt");
// initialize HSA tracing
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*) table);
HOSTTRACE_DEBUG(" HSA-trace(");
if(!hsa_api_vec.empty())
{
for(const auto& itr : hsa_api_vec)
{
uint32_t cid = HSA_API_ID_NUMBER;
const char* api = itr.c_str();
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api,
&cid, nullptr));
ROCTRACER_CALL(roctracer_enable_op_callback(
ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr));
HOSTTRACE_DEBUG(" %s", api);
}
}
else
{
ROCTRACER_CALL(roctracer_enable_domain_callback(
ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr));
}
HOSTTRACE_DEBUG("\n");
}
bool trace_hsa_activity = get_trace_hsa_activity();
// Enable HSA GPU activity
if(trace_hsa_activity)
{
// initialize HSA tracing
::roctracer::hsa_ops_properties_t ops_properties{
table,
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
nullptr, output_prefix
};
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
HOSTTRACE_DEBUG(" HSA-activity-trace()\n");
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS,
HSA_OP_ID_COPY));
}
};
auto _tear_down = []() {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
ROCTRACER_CALL(
roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
};
if(comp::roctracer::is_setup()) _setup();
comp::roctracer::add_setup("hsa", std::move(_setup));
comp::roctracer::add_tear_down("hsa", std::move(_tear_down));
return true;
}
// HSA-runtime on-unload method
void OnUnload()
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
// ONLOAD_TRACE("");
}
}
@@ -0,0 +1,599 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/roctracer_callbacks.hpp"
#include "library.hpp"
#include "library/config.hpp"
#include "library/critical_trace.hpp"
#include "library/thread_data.hpp"
#include "timemory/backends/threading.hpp"
#include <cstdint>
TIMEMORY_DEFINE_API(roctracer)
namespace api = tim::api;
std::unordered_set<uint64_t>&
get_roctracer_kernels()
{
static auto _v = std::unordered_set<uint64_t>{};
return _v;
}
auto&
get_roctracer_hip_data(int64_t _tid = threading::get_id())
{
using data_t = std::unordered_map<uint64_t, roctracer_bundle_t>;
using thread_data_t = hosttrace_thread_data<data_t, api::roctracer>;
static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{});
return _v.at(_tid);
}
std::unordered_map<uint64_t, const char*>&
get_roctracer_key_data()
{
static auto _v = std::unordered_map<uint64_t, const char*>{};
return _v;
}
std::unordered_map<uint64_t, int64_t>&
get_roctracer_tid_data()
{
static auto _v = std::unordered_map<uint64_t, int64_t>{};
return _v;
}
using cid_tuple_t = std::tuple<uint64_t, uint64_t, uint16_t>;
std::unordered_map<uint64_t, cid_tuple_t>&
get_roctracer_cid_data()
{
static auto _v = std::unordered_map<uint64_t, cid_tuple_t>{};
return _v;
}
auto&
get_hip_activity_callbacks(int64_t _tid = threading::get_id())
{
using thread_data_t =
hosttrace_thread_data<std::vector<std::function<void()>>, api::roctracer>;
static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{});
return _v.at(_tid);
}
std::unique_ptr<hsa_timer_t>&
get_hsa_timer()
{
static auto _v = std::unique_ptr<hsa_timer_t>{};
return _v;
}
using hip_activity_mutex_t = std::decay_t<decltype(get_hip_activity_callbacks())>;
using key_data_mutex_t = std::decay_t<decltype(get_roctracer_key_data())>;
using hip_data_mutex_t = std::decay_t<decltype(get_roctracer_hip_data())>;
using cid_data_mutex_t = std::decay_t<decltype(get_roctracer_cid_data())>;
auto&
get_hip_activity_mutex(int64_t _tid = threading::get_id())
{
return tim::type_mutex<hip_activity_mutex_t, api::roctracer, max_supported_threads>(
_tid);
}
// HSA API callback function
void
hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
{
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
return;
(void) arg;
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n",
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
static thread_local timestamp_t begin_timestamp = 0;
static auto& timer = get_hsa_timer();
static auto _scope = []() {
auto _v = scope::config{};
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
if(get_roctracer_flat_profile()) _v += scope::flat{};
return _v;
}();
if(!timer) return;
switch(cid)
{
case HSA_API_ID_hsa_init:
case HSA_API_ID_hsa_shut_down:
case HSA_API_ID_hsa_agent_get_exception_policies:
case HSA_API_ID_hsa_agent_get_info:
case HSA_API_ID_hsa_amd_agent_iterate_memory_pools:
case HSA_API_ID_hsa_amd_agent_memory_pool_get_info:
case HSA_API_ID_hsa_amd_coherency_get_type:
case HSA_API_ID_hsa_amd_memory_pool_get_info:
case HSA_API_ID_hsa_amd_pointer_info:
case HSA_API_ID_hsa_amd_pointer_info_set_userdata:
case HSA_API_ID_hsa_amd_profiling_async_copy_enable:
case HSA_API_ID_hsa_amd_profiling_get_async_copy_time:
case HSA_API_ID_hsa_amd_profiling_get_dispatch_time:
case HSA_API_ID_hsa_amd_profiling_set_profiler_enabled:
case HSA_API_ID_hsa_cache_get_info:
case HSA_API_ID_hsa_code_object_get_info:
case HSA_API_ID_hsa_code_object_get_symbol:
case HSA_API_ID_hsa_code_object_get_symbol_from_name:
case HSA_API_ID_hsa_code_object_reader_create_from_memory:
case HSA_API_ID_hsa_code_symbol_get_info:
case HSA_API_ID_hsa_executable_create_alt:
case HSA_API_ID_hsa_executable_freeze:
case HSA_API_ID_hsa_executable_get_info:
case HSA_API_ID_hsa_executable_get_symbol:
case HSA_API_ID_hsa_executable_get_symbol_by_name:
case HSA_API_ID_hsa_executable_symbol_get_info:
case HSA_API_ID_hsa_extension_get_name:
case HSA_API_ID_hsa_ext_image_data_get_info:
case HSA_API_ID_hsa_ext_image_data_get_info_with_layout:
case HSA_API_ID_hsa_ext_image_get_capability:
case HSA_API_ID_hsa_ext_image_get_capability_with_layout:
case HSA_API_ID_hsa_isa_get_exception_policies:
case HSA_API_ID_hsa_isa_get_info:
case HSA_API_ID_hsa_isa_get_info_alt:
case HSA_API_ID_hsa_isa_get_round_method:
case HSA_API_ID_hsa_region_get_info:
case HSA_API_ID_hsa_system_extension_supported:
case HSA_API_ID_hsa_system_get_extension_table:
case HSA_API_ID_hsa_system_get_info:
case HSA_API_ID_hsa_system_get_major_extension_table:
case HSA_API_ID_hsa_wavefront_get_info: break;
default:
{
if(data->phase == ACTIVITY_API_PHASE_ENTER)
{
begin_timestamp = timer->timestamp_fn_ns();
}
else
{
const auto* _name = roctracer_op_string(domain, cid, 0);
const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down)
? begin_timestamp
: timer->timestamp_fn_ns();
if(begin_timestamp > end_timestamp) return;
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name },
begin_timestamp);
TRACE_EVENT_END("device", end_timestamp);
}
if(get_use_timemory())
{
std::unique_lock<std::mutex> _lk{ tasking::get_roctracer_mutex() };
auto _begin_ns = begin_timestamp;
auto _end_ns = end_timestamp;
tasking::get_roctracer_task_group().exec(
[_name, _begin_ns, _end_ns]() {
roctracer_hsa_bundle_t _bundle{ _name, _scope };
_bundle.start()
.store(std::plus<double>{},
static_cast<double>(_end_ns - _begin_ns))
.stop();
});
}
// timemory is disabled in this callback because collecting data in this
// thread causes strange segmentation faults
}
}
}
}
void
hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
{
static const char* copy_op_name = "hsa_async_copy";
static const char* dispatch_op_name = "hsa_dispatch";
static const char* barrier_op_name = "hsa_barrier";
const char** _name = nullptr;
switch(op)
{
case HSA_OP_ID_DISPATCH: _name = &dispatch_op_name; break;
case HSA_OP_ID_COPY: _name = &copy_op_name; break;
case HSA_OP_ID_BARRIER: _name = &barrier_op_name; break;
default: break;
}
if(!_name) return;
auto _begin_ns = record->begin_ns;
auto _end_ns = record->end_ns;
static auto _scope = []() {
auto _v = scope::config{};
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
if(get_roctracer_flat_profile()) _v += scope::flat{};
return _v;
}();
auto _func = [_begin_ns, _end_ns, _name]() {
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name }, _begin_ns);
TRACE_EVENT_END("device", _end_ns);
}
if(get_use_timemory())
{
roctracer_hsa_bundle_t _bundle{ *_name, _scope };
_bundle.start()
.store(std::plus<double>{}, static_cast<double>(_end_ns - _begin_ns))
.stop();
}
};
std::unique_lock<std::mutex> _lk{ tasking::get_roctracer_mutex() };
tasking::get_roctracer_task_group().exec(_func);
// timemory is disabled in this callback because collecting data in this thread
// causes strange segmentation faults
tim::consume_parameters(arg);
}
void
hip_exec_activity_callbacks(int64_t _tid)
{
// ROCTRACER_CALL(roctracer_flush_activity());
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
auto& _async_ops = get_hip_activity_callbacks(_tid);
for(auto& itr : *_async_ops)
itr();
_async_ops->clear();
}
namespace
{
thread_local std::unordered_map<size_t, size_t> gpu_cids = {};
}
// HIP API callback function
void
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
{
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
return;
using Device = critical_trace::Device;
using Phase = critical_trace::Phase;
const char* op_name = roctracer_op_string(domain, cid, 0);
if(op_name == nullptr) op_name = hip_api_name(cid);
if(op_name == nullptr) return;
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n", op_name, cid,
data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
switch(cid)
{
case HIP_API_ID___hipPushCallConfiguration:
case HIP_API_ID___hipPopCallConfiguration:
case HIP_API_ID_hipDeviceEnablePeerAccess:
case HIP_API_ID_hipImportExternalMemory:
case HIP_API_ID_hipDestroyExternalMemory: return;
default: break;
}
if(data->phase == ACTIVITY_API_PHASE_ENTER)
{
switch(cid)
{
case HIP_API_ID_hipLaunchKernel:
case HIP_API_ID_hipLaunchCooperativeKernel:
{
const char* _name =
hipKernelNameRefByPtr(data->args.hipLaunchKernel.function_address,
data->args.hipLaunchKernel.stream);
if(_name != nullptr)
{
if(get_use_perfetto() || get_use_timemory())
{
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
get_roctracer_key_data().emplace(data->correlation_id, _name);
get_roctracer_tid_data().emplace(data->correlation_id,
threading::get_id());
}
}
break;
}
case HIP_API_ID_hipModuleLaunchKernel:
{
const char* _name = hipKernelNameRef(data->args.hipModuleLaunchKernel.f);
if(_name != nullptr)
{
if(get_use_perfetto() || get_use_timemory())
{
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
get_roctracer_key_data().emplace(data->correlation_id, _name);
get_roctracer_tid_data().emplace(data->correlation_id,
threading::get_id());
}
}
break;
}
default:
{
if(get_use_perfetto() || get_use_timemory())
{
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
get_roctracer_key_data().emplace(data->correlation_id, op_name);
get_roctracer_tid_data().emplace(data->correlation_id,
threading::get_id());
}
}
}
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ op_name });
}
if(get_use_timemory())
{
get_roctracer_hip_data()->emplace(
data->correlation_id,
roctracer_bundle_t{ op_name, quirk::config<quirk::auto_start>{} });
}
if(get_use_critical_trace())
{
auto _cid = get_cpu_cid()++;
uint16_t _depth = (get_cpu_cid_stack()->empty())
? get_cpu_cid_stack(0)->size()
: get_cpu_cid_stack()->size() - 1;
auto _parent_cid = (get_cpu_cid_stack()->empty())
? get_cpu_cid_stack(0)->back()
: get_cpu_cid_stack()->back();
int64_t _ts = comp::wall_clock::record();
add_critical_trace<Device::GPU, Phase::BEGIN>(
threading::get_id(), _cid, data->correlation_id, _parent_cid, _ts, 0,
critical_trace::add_hash_id(op_name), _depth);
tim::auto_lock_t _lk{ tim::type_mutex<cid_data_mutex_t>() };
get_roctracer_cid_data().emplace(data->correlation_id,
cid_tuple_t{ _cid, _parent_cid, _depth });
}
hip_exec_activity_callbacks(threading::get_id());
}
else if(data->phase == ACTIVITY_API_PHASE_EXIT)
{
hip_exec_activity_callbacks(threading::get_id());
if(get_use_perfetto())
{
TRACE_EVENT_END("device");
}
if(get_use_timemory())
{
auto _stop = [data](int64_t _tid) {
auto& _data = get_roctracer_hip_data(_tid);
auto itr = _data->find(data->correlation_id);
if(itr != get_roctracer_hip_data()->end())
{
itr->second.stop().pop();
_data->erase(itr);
return true;
}
return false;
};
if(!_stop(threading::get_id()))
{
for(size_t i = 0; i < max_supported_threads; ++i)
{
if(_stop(i)) break;
}
}
}
if(get_use_critical_trace())
{
uint16_t _depth = 0;
uint64_t _cid = 0;
uint64_t _parent_cid = 0;
{
tim::auto_lock_t _lk{ tim::type_mutex<cid_data_mutex_t>() };
std::tie(_cid, _parent_cid, _depth) =
get_roctracer_cid_data().at(data->correlation_id);
}
int64_t _ts = comp::wall_clock::record();
add_critical_trace<Device::GPU, Phase::END>(
threading::get_id(), _cid, data->correlation_id, _parent_cid, _ts, _ts,
critical_trace::add_hash_id(op_name), _depth);
}
}
tim::consume_parameters(arg);
}
// Activity tracing callback
void
hip_activity_callback(const char* begin, const char* end, void*)
{
using Device = critical_trace::Device;
using Phase = critical_trace::Phase;
if(!trait::runtime_enabled<comp::roctracer>::get()) return;
static auto _kernel_names = std::unordered_map<const char*, std::string>{};
static auto _indexes = std::unordered_map<uint64_t, int>{};
const roctracer_record_t* record = reinterpret_cast<const roctracer_record_t*>(begin);
const roctracer_record_t* end_record =
reinterpret_cast<const roctracer_record_t*>(end);
HOSTTRACE_DEBUG("Activity records:\n");
while(record < end_record)
{
const char* op_name =
roctracer_op_string(record->domain, record->correlation_id, 0);
if(op_name == nullptr) op_name = hip_api_name(record->correlation_id);
if(op_name != nullptr)
{
HOSTTRACE_DEBUG("\t%-30s\tcorrelation_id(%6lu) time_ns(%12lu:%12lu) "
"delta_ns(%12lu) device_id(%d) "
"stream_id(%lu)\n",
op_name, record->correlation_id, record->begin_ns,
record->end_ns, (record->end_ns - record->begin_ns),
record->device_id, record->queue_id);
}
auto _begin_ns = record->begin_ns;
auto _end_ns = record->end_ns;
auto _corr_id = record->correlation_id;
static auto _scope = []() {
auto _v = scope::config{};
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
if(get_roctracer_flat_profile()) _v += scope::flat{};
return _v;
}();
auto& _keys = get_roctracer_key_data();
auto& _cids = get_roctracer_cid_data();
auto& _tids = get_roctracer_tid_data();
int16_t _depth = 0; // depth of kernel launch
int64_t _tid = 0; // thread id
uint64_t _cid = 0; // correlation id
uint64_t _pcid = 0; // parent corr_id
auto _laps = _indexes[_corr_id]++; // see note #1
const char* _name = nullptr;
bool _found = false;
bool _critical_trace = get_use_critical_trace();
{
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
if(_tids.find(_corr_id) != _tids.end())
{
_found = true;
_tid = _tids.at(_corr_id);
auto itr = _keys.find(_corr_id);
if(itr != _keys.end()) _name = itr->second;
}
}
if(_critical_trace)
{
tim::auto_lock_t _lk{ tim::type_mutex<cid_data_mutex_t>() };
if(_cids.find(_corr_id) != _cids.end())
std::tie(_cid, _pcid, _depth) = _cids.at(_corr_id);
else
_critical_trace = false;
}
auto _func = [_critical_trace, _depth, _tid, _cid, _laps, _begin_ns, _end_ns,
_corr_id, _name]() {
// NOTE #1: we get two measurements for 1 kernel so we need to
// tweak the number of laps for the wall-clock component
if(_name != nullptr)
{
if(get_use_perfetto())
{
if(_kernel_names.find(_name) == _kernel_names.end())
_kernel_names.emplace(_name, tim::demangle(_name));
TRACE_EVENT_BEGIN(
"device",
perfetto::StaticString{ _kernel_names.at(_name).c_str() },
_begin_ns);
TRACE_EVENT_END("device", _end_ns);
}
if(get_use_timemory())
{
roctracer_bundle_t _bundle{ _name, _scope };
_bundle.start()
.store(std::plus<double>{},
static_cast<double>(_end_ns - _begin_ns))
.stop()
.get<comp::wall_clock>([&](comp::wall_clock* wc) {
wc->set_value(_end_ns - _begin_ns);
wc->set_accum(_end_ns - _begin_ns);
if(_laps % 2 == 1)
{
// below is a hack bc we get two measurements for 1 kernel
wc->set_laps(0);
auto itr = wc->get_iterator();
if(itr && itr->data().get_laps() == 0)
{
wc->set_is_invalid(true);
itr->data().set_is_invalid(true);
}
}
return wc;
});
_bundle.pop();
}
if(_critical_trace)
{
auto _hash = critical_trace::add_hash_id(_name);
uint16_t _prio = _laps + 1; // priority
add_critical_trace<Device::GPU, Phase::DELTA, false>(
_tid, _cid, _corr_id, _cid, _begin_ns, _end_ns, _hash, _depth + 1,
_prio);
}
}
};
if(_found)
{
auto& _async_ops = get_hip_activity_callbacks(_tid);
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
_async_ops->emplace_back(std::move(_func));
}
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
}
bool&
roctracer_is_setup()
{
static bool _v = false;
return _v;
}
using roctracer_functions_t = std::vector<std::pair<std::string, std::function<void()>>>;
roctracer_functions_t&
roctracer_setup_routines()
{
static auto _v = roctracer_functions_t{};
return _v;
}
roctracer_functions_t&
roctracer_tear_down_routines()
{
static auto _v = roctracer_functions_t{};
return _v;
}
@@ -0,0 +1,36 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/thread_data.hpp"
instrumentation_bundles::instance_array_t&
instrumentation_bundles::instances()
{
static auto _v = instance_array_t{};
return _v;
}
@@ -0,0 +1,31 @@
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "library/timemory.hpp"
TIMEMORY_INITIALIZE_STORAGE(comp::wall_clock, comp::user_global_bundle)
@@ -1,645 +0,0 @@
#include "roctracer.hpp"
#include "library.hpp"
#include <roctracer.h>
#include <roctracer_ext.h>
#include <roctracer_hcc.h>
#include <roctracer_hip.h>
#include <roctracer_kfd.h>
#define AMD_INTERNAL_BUILD 1
#include <ext/hsa_rt_utils.hpp>
#include <roctracer_hsa.h>
#include <atomic>
// Macro to check ROC-tracer calls status
#define ROCTRACER_CALL(call) \
do \
{ \
int err = call; \
if(err != 0) \
{ \
std::cerr << roctracer_error_string() << " in: " << #call << std::flush; \
} \
} while(0)
using roctracer_bundle_t = tim::component_tuple<comp::roctracer_data>;
namespace units = tim::units;
namespace
{
auto&
get_roctracer_kernels()
{
static auto _v = std::unordered_set<uint64_t>{};
return _v;
}
auto&
get_roctracer_hip_data()
{
static auto _v = std::unordered_map<uint64_t, roctracer_bundle_t>{};
return _v;
}
auto&
get_roctracer_key_data()
{
static auto _v = std::unordered_map<uint64_t, const char*>{};
return _v;
}
using data_type_mutex_t = std::decay_t<decltype(get_roctracer_hip_data())>;
using hsa_timer_t = hsa_rt_utils::Timer;
using timestamp_t = hsa_timer_t::timestamp_t;
auto&
get_hsa_timer()
{
static auto _v = std::unique_ptr<hsa_timer_t>{};
return _v;
}
// HSA API callback function
void
hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
{
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
return;
(void) arg;
static auto _scope = scope::config{} + scope::flat{};
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n",
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
static thread_local timestamp_t hsa_begin_timestamp = 0;
static auto& timer = get_hsa_timer();
if(!timer) return;
switch(cid)
{
case HSA_API_ID_hsa_init:
case HSA_API_ID_hsa_shut_down:
case HSA_API_ID_hsa_agent_get_exception_policies:
case HSA_API_ID_hsa_agent_get_info:
case HSA_API_ID_hsa_amd_agent_iterate_memory_pools:
case HSA_API_ID_hsa_amd_agent_memory_pool_get_info:
case HSA_API_ID_hsa_amd_coherency_get_type:
case HSA_API_ID_hsa_amd_memory_pool_get_info:
case HSA_API_ID_hsa_amd_pointer_info:
case HSA_API_ID_hsa_amd_pointer_info_set_userdata:
case HSA_API_ID_hsa_amd_profiling_async_copy_enable:
case HSA_API_ID_hsa_amd_profiling_get_async_copy_time:
case HSA_API_ID_hsa_amd_profiling_get_dispatch_time:
case HSA_API_ID_hsa_amd_profiling_set_profiler_enabled:
case HSA_API_ID_hsa_cache_get_info:
case HSA_API_ID_hsa_code_object_get_info:
case HSA_API_ID_hsa_code_object_get_symbol:
case HSA_API_ID_hsa_code_object_get_symbol_from_name:
case HSA_API_ID_hsa_code_object_reader_create_from_memory:
case HSA_API_ID_hsa_code_symbol_get_info:
case HSA_API_ID_hsa_executable_create_alt:
case HSA_API_ID_hsa_executable_freeze:
case HSA_API_ID_hsa_executable_get_info:
case HSA_API_ID_hsa_executable_get_symbol:
case HSA_API_ID_hsa_executable_get_symbol_by_name:
case HSA_API_ID_hsa_executable_symbol_get_info:
case HSA_API_ID_hsa_extension_get_name:
case HSA_API_ID_hsa_ext_image_data_get_info:
case HSA_API_ID_hsa_ext_image_data_get_info_with_layout:
case HSA_API_ID_hsa_ext_image_get_capability:
case HSA_API_ID_hsa_ext_image_get_capability_with_layout:
case HSA_API_ID_hsa_isa_get_exception_policies:
case HSA_API_ID_hsa_isa_get_info:
case HSA_API_ID_hsa_isa_get_info_alt:
case HSA_API_ID_hsa_isa_get_round_method:
case HSA_API_ID_hsa_region_get_info:
case HSA_API_ID_hsa_system_extension_supported:
case HSA_API_ID_hsa_system_get_extension_table:
case HSA_API_ID_hsa_system_get_info:
case HSA_API_ID_hsa_system_get_major_extension_table:
case HSA_API_ID_hsa_wavefront_get_info: break;
default:
{
if(data->phase == ACTIVITY_API_PHASE_ENTER)
{
hsa_begin_timestamp = timer->timestamp_fn_ns();
}
else
{
auto _name = roctracer_op_string(domain, cid, 0);
const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down)
? hsa_begin_timestamp
: timer->timestamp_fn_ns();
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name },
hsa_begin_timestamp);
TRACE_EVENT_END("device", end_timestamp);
}
/*if(get_use_timemory())
{
static auto _scope = scope::flat() + scope::timeline();
roctracer_bundle_t{ _name, _scope }
.start()
.store(end_timestamp - hsa_begin_timestamp)
.stop();
}*/
// timemory is disabled in this callback because collecting data in this
// thread causes strange segmentation faults
}
}
}
}
void
hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
{
static const char* copy_op_name = "hsa_async_copy";
static const char* dispatch_op_name = "hsa_dispatch";
static const char* barrier_op_name = "hsa_barrier";
const char** _name = nullptr;
switch(op)
{
case HSA_OP_ID_DISPATCH: _name = &dispatch_op_name; break;
case HSA_OP_ID_COPY: _name = &copy_op_name; break;
case HSA_OP_ID_BARRIER: _name = &barrier_op_name; break;
default: break;
}
if(!_name) return;
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name }, record->begin_ns);
TRACE_EVENT_END("device", record->end_ns);
}
// timemory is disabled in this callback because collecting data in this thread
// causes strange segmentation faults
tim::consume_parameters(arg);
}
// HIP API callback function
void
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
{
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
return;
static auto _scope = scope::flat() + scope::timeline();
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n",
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
if(data->phase == ACTIVITY_API_PHASE_ENTER)
{
switch(cid)
{
case HIP_API_ID___hipPushCallConfiguration:
case HIP_API_ID___hipPopCallConfiguration: break;
case HIP_API_ID_hipLaunchKernel:
{
const char* _name =
hipKernelNameRefByPtr(data->args.hipLaunchKernel.function_address,
data->args.hipLaunchKernel.stream);
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
get_roctracer_kernels().emplace(data->correlation_id);
if(get_use_perfetto())
{
get_roctracer_key_data().emplace(data->correlation_id, _name);
}
if(get_use_timemory())
{
get_roctracer_hip_data().emplace(data->correlation_id,
roctracer_bundle_t{ _name, _scope });
}
break;
}
case HIP_API_ID_hipModuleLaunchKernel:
{
const char* _name = hipKernelNameRef(data->args.hipModuleLaunchKernel.f);
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
get_roctracer_kernels().emplace(data->correlation_id);
if(get_use_perfetto())
{
get_roctracer_key_data().emplace(data->correlation_id, _name);
}
if(get_use_timemory())
{
get_roctracer_hip_data().emplace(data->correlation_id,
roctracer_bundle_t{ _name, _scope });
}
break;
}
default:
{
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
const char* _name = roctracer_op_string(domain, cid, 0);
if(get_use_perfetto())
{
get_roctracer_key_data().emplace(data->correlation_id, _name);
}
if(get_use_timemory())
{
get_roctracer_hip_data().emplace(data->correlation_id,
roctracer_bundle_t{ _name, _scope });
}
break;
}
}
}
else if(data->phase == ACTIVITY_API_PHASE_EXIT)
{}
tim::consume_parameters(domain, arg);
}
// Activity tracing callback
void
hip_activity_callback(const char* begin, const char* end, void*)
{
if(!trait::runtime_enabled<comp::roctracer>::get()) return;
static auto _kernel_names = std::unordered_map<const char*, std::string>{};
const roctracer_record_t* record = reinterpret_cast<const roctracer_record_t*>(begin);
const roctracer_record_t* end_record =
reinterpret_cast<const roctracer_record_t*>(end);
std::unordered_set<uint64_t> _indexes{};
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
auto& _data = get_roctracer_hip_data();
auto& _keys = get_roctracer_key_data();
auto& _kernels = get_roctracer_kernels();
HOSTTRACE_DEBUG("Activity records:\n");
while(record < end_record)
{
HOSTTRACE_DEBUG("\t%-30s\tcorrelation_id(%lu) time_ns(%lu:%lu) device_id(%d) "
"stream_id(%lu)\n",
roctracer_op_string(record->domain, record->correlation_id, 0),
record->correlation_id, record->begin_ns, record->end_ns,
record->device_id, record->queue_id);
auto _is_kernel = _kernels.find(record->correlation_id) != _kernels.end();
if(_is_kernel && record->device_id != 0 && record->queue_id != 0)
{
// these are overheads associated with the kernel launch, not kernel runtime
ROCTRACER_CALL(roctracer_next_record(record, &record));
continue;
}
auto kitr =
(get_use_perfetto()) ? _keys.find(record->correlation_id) : _keys.end();
if(kitr != _keys.end())
{
if(_kernel_names.find(kitr->second) == _kernel_names.end())
_kernel_names.emplace(kitr->second, tim::demangle(kitr->second));
TRACE_EVENT_BEGIN(
"device",
perfetto::StaticString{ _kernel_names.at(kitr->second).c_str() },
record->begin_ns);
TRACE_EVENT_END("device", record->end_ns);
_indexes.emplace(kitr->first);
}
auto itr =
(get_use_timemory()) ? _data.find(record->correlation_id) : _data.end();
if(itr != _data.end())
{
itr->second.start()
.store(std::plus<double>{},
static_cast<double>(record->end_ns - record->begin_ns))
.stop();
_indexes.emplace(itr->first);
}
// code
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
if(get_use_perfetto())
{
for(auto& itr : _indexes)
_keys.erase(itr);
}
if(get_use_timemory())
{
for(auto& itr : _indexes)
_data.erase(itr);
}
HOSTTRACE_DEBUG("[%s] recorded %lu phases\n", __FUNCTION__,
(unsigned long) _indexes.size());
}
bool&
roctracer_is_setup()
{
static bool _v = false;
return _v;
}
using roctracer_functions_t = std::vector<std::pair<std::string, std::function<void()>>>;
auto&
roctracer_setup_routines()
{
static auto _v = roctracer_functions_t{};
return _v;
}
auto&
roctracer_tear_down_routines()
{
static auto _v = roctracer_functions_t{};
return _v;
}
} // namespace
#if !defined(HOSTTRACE_ROCTRACER_LIBKFDWRAPPER)
# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "/opt/rocm/roctracer/lib/libkfdwrapper64.so"
#endif
struct dynamic_library
{
dynamic_library() = delete;
dynamic_library(const dynamic_library&) = delete;
dynamic_library(dynamic_library&&) noexcept = default;
dynamic_library& operator=(const dynamic_library&) = delete;
dynamic_library& operator=(dynamic_library&&) noexcept = default;
dynamic_library(const char* _env, const char* _fname,
int _flags = (RTLD_NOW | RTLD_GLOBAL), bool _store = false)
: envname{ _env }
, filename{ tim::get_env<std::string>(_env, _fname, _store) }
, flags{ _flags }
{
handle = dlopen(filename.c_str(), flags);
if(!handle) fprintf(stderr, "%s\n", dlerror());
dlerror(); // Clear any existing error
}
~dynamic_library()
{
if(handle) dlclose(handle);
}
std::string envname = {};
std::string filename = {};
int flags = 0;
void* handle = nullptr;
};
namespace tim
{
namespace component
{
void
roctracer::preinit()
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
roctracer_data::label() = "roctracer";
roctracer_data::description() = "ROCm tracer (activity API)";
}
bool
roctracer::is_setup()
{
return roctracer_is_setup();
}
void
roctracer::add_setup(const std::string& _lbl, std::function<void()>&& _func)
{
roctracer_setup_routines().emplace_back(_lbl, std::move(_func));
}
void
roctracer::add_tear_down(const std::string& _lbl, std::function<void()>&& _func)
{
roctracer_tear_down_routines().emplace_back(_lbl, std::move(_func));
}
void
roctracer::remove_setup(const std::string& _lbl)
{
auto& _data = roctracer_setup_routines();
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
{
if(itr->first == _lbl)
{
_data.erase(itr);
break;
}
}
}
void
roctracer::remove_tear_down(const std::string& _lbl)
{
auto& _data = roctracer_setup_routines();
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
{
if(itr->first == _lbl)
{
_data.erase(itr);
break;
}
}
}
void
roctracer::setup()
{
if(!get_use_timemory() && !get_use_perfetto()) return;
auto_lock_t _lk{ type_mutex<roctracer>() };
if(roctracer_is_setup()) return;
roctracer_is_setup() = true;
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
tim::set_env("HSA_TOOLS_LIB", "libhosttrace.so", 0);
auto _kfdwrapper = dynamic_library{ "HOSTTRACE_ROCTRACER_LIBKFDWRAPPER",
HOSTTRACE_ROCTRACER_LIBKFDWRAPPER };
ROCTRACER_CALL(roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, nullptr));
if(roctracer_default_pool() == nullptr)
{
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 0x1000;
properties.buffer_callback_fun = hip_activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
}
// Enable API callbacks, all domains
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, nullptr));
// Enable activity tracing, all domains
ROCTRACER_CALL(roctracer_enable_activity());
// callback for HSA
for(auto& itr : roctracer_setup_routines())
itr.second();
}
void
roctracer::tear_down()
{
auto_lock_t _lk{ type_mutex<roctracer>() };
if(!roctracer_is_setup()) return;
roctracer_is_setup() = false;
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
// flush all the activity
if(roctracer_default_pool() != nullptr)
{
ROCTRACER_CALL(roctracer_flush_activity());
}
// flush all buffers
roctracer_flush_buf();
// callback for hsa
for(auto& itr : roctracer_tear_down_routines())
itr.second();
// Disable tracing and closing the pool
ROCTRACER_CALL(roctracer_disable_callback());
ROCTRACER_CALL(roctracer_disable_activity());
ROCTRACER_CALL(roctracer_close_pool());
}
void
roctracer::start()
{
if(tracker_type::start() == 0) setup();
}
void
roctracer::stop()
{
if(tracker_type::stop() == 0) tear_down();
}
} // namespace component
} // namespace tim
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer, false, void)
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer_data, true, double)
// HSA-runtime tool on-load method
extern "C"
{
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) TIMEMORY_VISIBILITY("default");
void OnUnload() TIMEMORY_VISIBILITY("default");
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names)
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
tim::consume_parameters(table, runtime_version, failed_tool_count,
failed_tool_names);
// ONLOAD_TRACE_BEG();
// on_exit(exit_handler, nullptr);
auto _setup = [=]() {
get_hsa_timer() =
std::make_unique<hsa_timer_t>(table->core_->hsa_system_get_info_fn);
// const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
const char* output_prefix = nullptr;
// App begin timestamp begin_ts_file.txt
// begin_ts_file_handle = open_output_file(output_prefix,
// "begin_ts_file.txt"); const timestamp_t app_start_time =
// timer->timestamp_fn_ns(); fprintf(begin_ts_file_handle, "%lu\n",
// app_start_time);
bool trace_hsa_api = tim::get_env("HOSTTRACE_ROCTRACER_HSA_API", true);
std::vector<std::string> hsa_api_vec = tim::delimit(
tim::get_env<std::string>("HOSTTRACE_ROCTRACER_HSA_API_TYPES", ""));
// Enable HSA API callbacks/activity
if(trace_hsa_api)
{
// hsa_api_file_handle = open_output_file(output_prefix,
// "hsa_api_trace.txt");
// initialize HSA tracing
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*) table);
HOSTTRACE_DEBUG(" HSA-trace(");
if(!hsa_api_vec.empty())
{
for(unsigned i = 0; i < hsa_api_vec.size(); ++i)
{
uint32_t cid = HSA_API_ID_NUMBER;
const char* api = hsa_api_vec[i].c_str();
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api,
&cid, nullptr));
ROCTRACER_CALL(roctracer_enable_op_callback(
ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr));
HOSTTRACE_DEBUG(" %s", api);
}
}
else
{
ROCTRACER_CALL(roctracer_enable_domain_callback(
ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr));
}
HOSTTRACE_DEBUG("\n");
}
bool trace_hsa_activity =
tim::get_env("HOSTTRACE_ROCTRACER_HSA_ACTIVITY", true);
// Enable HSA GPU activity
if(trace_hsa_activity)
{
// initialize HSA tracing
roctracer::hsa_ops_properties_t ops_properties{
table,
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
nullptr, output_prefix
};
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
HOSTTRACE_DEBUG(" HSA-activity-trace()\n");
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS,
HSA_OP_ID_COPY));
}
};
auto _tear_down = []() {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
ROCTRACER_CALL(
roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
};
if(comp::roctracer::is_setup()) _setup();
comp::roctracer::add_setup("hsa", std::move(_setup));
comp::roctracer::add_tear_down("hsa", std::move(_tear_down));
return true;
}
// HSA-runtime on-unload method
void OnUnload()
{
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
// ONLOAD_TRACE("");
}
}
@@ -3,6 +3,10 @@ if(NOT HOSTTRACE_DYNINST_API_RT_DIR AND HOSTTRACE_DYNINST_API_RT)
DIRECTORY)
endif()
if(NOT DEFINED NUM_PROCS)
set(NUM_PROCS 2)
endif()
if(HOSTTRACE_BUILD_DYNINST)
set(HOSTTRACE_DYNINST_API_RT_DIR
"${PROJECT_BINARY_DIR}/external/dyninst/dyninstAPI_RT:${PROJECT_BINARY_DIR}/external/dyninst/dyninstAPI"
@@ -16,8 +20,8 @@ set(_test_environment
)
if(TARGET transpose)
if(TRANSPOSE_USE_MPI)
set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2)
if(TRANSPOSE_USE_MPI AND NUM_PROCS GREATER 0)
set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NUM_PROCS})
endif()
add_test(