Reorganization and critical trace support (#17)
* Roctracer wall clock integration (#16)
* Integrates roctracer values into wall-clock
* Fixed scoping + timemory roctracer
* Fixed data race in roctracer
* Synchronized HIP API on main thread
- Cache hip activity callbacks and execute on main thread
- Minor updates to transpose
* Debugging + MPI + transpose updates
* PTL + HSA and timemory + kernel timing
- PTL usage fixed HSA + timemory issues bc we could control the thread destruction
- Fixed laps counting in roctracer callbacks
* Ignore select HIP API types
- The ignored API types are ignored because there appears to be a bug
which causes the "end" callback to be labeled as begin
- hipDeviceEnablePeerAccess
- hipImportExternalMemory
- hipDestroyExternalMemory
* Tweaks to PTL config
* Timemory update + pid-prefix w/ mpi headers
- %pid%- prefix with mpi headers
- timemory submodule update
* CMake + critical trace + reorganize library source
- clang-tidy tweaks
- cmake function updates to use hosttrace_ prefix
- update gitignore
- cmake HOSTTRACE_MAX_THREADS option
- Formatting.cmake
- cleaned up MacroUtilities.cmake
- PTL submodule + usage
- tweak to Findroctracer.cmake
- MT transpose
- Updated PTL submodule
- Updated timemory submodule
- fix to hosttrace return value type if type not found
- reorganized library source code
- support for critical trace
* Remove bits/stdint-uintn.h headers
* Rename + config + depth + critical path
- rename hosttrace_timemory_data to instrumentation_bundles
- rename hosttrace_bundle_t to main_bundle_t
- rename bundle_t to instrumentation_bundle_t
- rework of configuration setup
- critical_trace write directly to file option
- tweaked depth calculation
- updated timemory submodule
- improved parallel support in roctracer callbacks
- working critical_trace
- perfetto device-critical-trace and host-critical-trace categories
- made transpose example parallel
- made parallel-overhead example a bit uneven
- relocated LTO activation
* Fixed duplicates in perfetto critical-trace
* reworked critical trace support
- substantial perf improvement (30-45 min -> 30 sec)
- changes to configuration (new and removed options)
* Removed "%pid%-" output prefix in mpi_gotcha
* Update timemory submodule
[ROCm/rocprofiler-systems commit: 752424efc2]
Dieser Commit ist enthalten in:
committet von
GitHub
Ursprung
cdd2707058
Commit
efb6d766af
@@ -18,6 +18,7 @@ modernize-*,\
|
||||
-modernize-use-using,\
|
||||
-modernize-use-auto,\
|
||||
-modernize-concat-nested-namespaces,\
|
||||
-modernize-use-nodiscard,\
|
||||
performance-*,\
|
||||
readability-*,\
|
||||
-readability-function-size,\
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
parse:
|
||||
additional_commands:
|
||||
checkout_git_submodule:
|
||||
hosttrace_checkout_git_submodule:
|
||||
flags:
|
||||
- RECURSIVE
|
||||
kwargs:
|
||||
|
||||
@@ -33,3 +33,6 @@
|
||||
|
||||
/build*
|
||||
/.vscode
|
||||
/.cache
|
||||
/.clangd
|
||||
/compile_commands.json
|
||||
|
||||
@@ -10,3 +10,6 @@
|
||||
[submodule "external/dyninst"]
|
||||
path = external/dyninst
|
||||
url = https://github.com/jrmadsen/dyninst.git
|
||||
[submodule "external/PTL"]
|
||||
path = external/PTL
|
||||
url = https://github.com/jrmadsen/PTL.git
|
||||
|
||||
@@ -50,49 +50,124 @@ include(BuildSettings) # compiler flags
|
||||
set(CMAKE_CXX_STANDARD
|
||||
17
|
||||
CACHE STRING "CXX language standard")
|
||||
add_option(CMAKE_CXX_STANDARD_REQUIRED "Require C++ language standard" ON)
|
||||
add_option(CMAKE_CXX_EXTENSIONS "Compiler specific language extensions" OFF)
|
||||
add_option(CMAKE_INSTALL_RPATH_USE_LINK_PATH "Enable rpath to linked libraries" ON)
|
||||
add_option(HOSTTRACE_USE_CLANG_TIDY "Enable clang-tidy" OFF)
|
||||
add_option(HOSTTRACE_USE_MPI "Enable MPI support" OFF)
|
||||
add_option(HOSTTRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF)
|
||||
add_option(HOSTTRACE_USE_ROCTRACER "Enable roctracer support" ON)
|
||||
add_option(HOSTTRACE_BUILD_DYNINST "Build dyninst from submodule" OFF)
|
||||
add_option(HOSTTRACE_USE_MPI_HEADERS
|
||||
"Enable wrapping MPI functions w/o enabling MPI dependency" OFF)
|
||||
hosttrace_add_feature(CMAKE_CXX_STANDARD "CXX language standard")
|
||||
hosttrace_add_option(CMAKE_CXX_STANDARD_REQUIRED "Require C++ language standard" ON)
|
||||
hosttrace_add_option(CMAKE_CXX_EXTENSIONS "Compiler specific language extensions" OFF)
|
||||
hosttrace_add_option(CMAKE_INSTALL_RPATH_USE_LINK_PATH "Enable rpath to linked libraries"
|
||||
ON)
|
||||
hosttrace_add_option(HOSTTRACE_USE_CLANG_TIDY "Enable clang-tidy" OFF)
|
||||
hosttrace_add_option(HOSTTRACE_USE_MPI "Enable MPI support" OFF)
|
||||
hosttrace_add_option(HOSTTRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF)
|
||||
hosttrace_add_option(HOSTTRACE_USE_ROCTRACER "Enable roctracer support" ON)
|
||||
hosttrace_add_option(HOSTTRACE_BUILD_DYNINST "Build dyninst from submodule" OFF)
|
||||
hosttrace_add_option(HOSTTRACE_USE_MPI_HEADERS
|
||||
"Enable wrapping MPI functions w/o enabling MPI dependency" OFF)
|
||||
|
||||
include(ProcessorCount)
|
||||
processorcount(HOSTTRACE_PROCESSOR_COUNT)
|
||||
math(EXPR HOSTTRACE_THREAD_COUNT "8 * ${HOSTTRACE_PROCESSOR_COUNT}")
|
||||
set(HOSTTRACE_MAX_THREADS
|
||||
"${HOSTTRACE_THREAD_COUNT}"
|
||||
CACHE
|
||||
STRING
|
||||
"Maximum number of threads in the host application. Likely only needs to be increased if host app does not use thread-pool but creates many threads"
|
||||
)
|
||||
hosttrace_add_feature(
|
||||
HOSTTRACE_MAX_THREADS
|
||||
"Maximum number of total threads supported in the host application (default: 8 * nproc)"
|
||||
)
|
||||
|
||||
# ensure synced
|
||||
set(TIMEMORY_USE_MPI
|
||||
${HOSTTRACE_USE_MPI}
|
||||
CACHE BOOL "Enable MPI support" FORCE)
|
||||
|
||||
# default visibility settings
|
||||
set(CMAKE_C_VISIBILITY_PRESET "default")
|
||||
set(CMAKE_CXX_VISIBILITY_PRESET "default")
|
||||
set(CMAKE_VISIBILITY_INLINES_HIDDEN OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
include(Formatting) # format target
|
||||
include(Packages) # finds third-party libraries
|
||||
|
||||
if(HOSTTRACE_USE_ROCTRACER)
|
||||
find_package(HIP QUIET)
|
||||
if(HIP_VERSION_MAJOR GREATER_EQUAL 4 AND HIP_VERSION_MINOR GREATER 3)
|
||||
set(roctracer_kfdwrapper_LIBRARY)
|
||||
endif()
|
||||
else()
|
||||
set(HIP_VERSION "0.0.0")
|
||||
set(HIP_VERSION_MAJOR 0)
|
||||
set(HIP_VERSION_MINOR 0)
|
||||
set(HIP_VERSION_PATCH 0)
|
||||
endif()
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/include/library/defines.hpp.in
|
||||
${PROJECT_BINARY_DIR}/include/library/defines.hpp @ONLY)
|
||||
|
||||
hosttrace_activate_clang_tidy()
|
||||
|
||||
# custom visibility settings
|
||||
set(CMAKE_C_VISIBILITY_PRESET "hidden")
|
||||
set(CMAKE_CXX_VISIBILITY_PRESET "hidden")
|
||||
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
|
||||
|
||||
if(HOSTTRACE_BUILD_LTO)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
endif()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
#
|
||||
# hosttrace-library target
|
||||
#
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
add_library(
|
||||
hosttrace-library SHARED
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library.cpp ${CMAKE_CURRENT_LIST_DIR}/src/libmisc.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library.hpp ${perfetto_DIR}/sdk/perfetto.cc)
|
||||
set(library_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/config.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/critical_trace.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/fork_gotcha.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/hosttrace_component.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/mpi_gotcha.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/perfetto.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/ptl.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/thread_data.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/timemory.cpp
|
||||
${perfetto_DIR}/sdk/perfetto.cc)
|
||||
|
||||
set(library_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/api.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/config.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/common.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/critical_trace.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/debug.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/fork_gotcha.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/hosttrace_component.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/mpi_gotcha.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/perfetto.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/ptl.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/state.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/thread_data.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/timemory.hpp
|
||||
${perfetto_DIR}/sdk/perfetto.h)
|
||||
|
||||
if(NOT TIMEMORY_USE_PERFETTO)
|
||||
|
||||
endif()
|
||||
|
||||
add_library(hosttrace-library SHARED ${library_sources} ${library_headers})
|
||||
|
||||
if(HOSTTRACE_USE_ROCTRACER)
|
||||
target_sources(
|
||||
hosttrace-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include/roctracer.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/roctracer.cpp)
|
||||
hosttrace-library
|
||||
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include/library/roctracer.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/roctracer.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library/roctracer_callbacks.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library/roctracer_callbacks.cpp)
|
||||
endif()
|
||||
|
||||
target_include_directories(hosttrace-library PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
|
||||
|
||||
target_include_directories(hosttrace-library SYSTEM PRIVATE ${perfetto_DIR}/sdk)
|
||||
|
||||
target_compile_definitions(
|
||||
@@ -101,10 +176,12 @@ target_compile_definitions(
|
||||
|
||||
target_link_libraries(
|
||||
hosttrace-library
|
||||
PRIVATE hosttrace::hosttrace-threading
|
||||
PRIVATE hosttrace::hosttrace-headers
|
||||
hosttrace::hosttrace-threading
|
||||
hosttrace::hosttrace-compile-options
|
||||
hosttrace::hosttrace-roctracer
|
||||
hosttrace::hosttrace-mpi
|
||||
hosttrace::hosttrace-ptl
|
||||
$<BUILD_INTERFACE:timemory::timemory-headers>
|
||||
$<BUILD_INTERFACE:timemory::timemory-gotcha>
|
||||
$<BUILD_INTERFACE:timemory::timemory-cxx-shared>
|
||||
@@ -134,13 +211,13 @@ add_executable(
|
||||
hosttrace-exe
|
||||
${_EXCLUDE} ${CMAKE_CURRENT_LIST_DIR}/src/hosttrace.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/hosttrace.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/hosttrace-details.cpp)
|
||||
|
||||
target_include_directories(hosttrace-exe PRIVATE ${CMAKE_CURRENT_LIST_DIR}/include)
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/hosttrace/details.cpp)
|
||||
|
||||
target_link_libraries(
|
||||
hosttrace-exe
|
||||
PRIVATE hosttrace::hosttrace-dyninst hosttrace::hosttrace-compile-options
|
||||
PRIVATE hosttrace::hosttrace-headers
|
||||
hosttrace::hosttrace-dyninst
|
||||
hosttrace::hosttrace-compile-options
|
||||
$<BUILD_INTERFACE:timemory::timemory-headers>
|
||||
$<IF:$<BOOL:${HOSTTRACE_USE_SANITIZER}>,hosttrace::hosttrace-sanitizer,>)
|
||||
|
||||
@@ -211,3 +288,11 @@ add_subdirectory(tests)
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
include(ConfigCPack)
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
#
|
||||
# config info
|
||||
#
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
hosttrace_print_features()
|
||||
|
||||
@@ -68,7 +68,7 @@ endif()
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# extra flags for debug information in debug or optimized binaries
|
||||
#
|
||||
add_interface_library(
|
||||
hosttrace_add_interface_library(
|
||||
hosttrace-compile-debuginfo
|
||||
"Attempts to set best flags for more expressive profiling information in debug or optimized binaries"
|
||||
)
|
||||
@@ -108,7 +108,7 @@ endif()
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# non-debug optimizations
|
||||
#
|
||||
add_interface_library(hosttrace-compile-extra "Extra optimization flags")
|
||||
hosttrace_add_interface_library(hosttrace-compile-extra "Extra optimization flags")
|
||||
if(NOT HOSTTRACE_USE_COVERAGE)
|
||||
add_target_flag_if_avail(
|
||||
hosttrace-compile-extra "-finline-functions" "-funroll-loops" "-ftree-vectorize"
|
||||
@@ -130,21 +130,16 @@ endif()
|
||||
#
|
||||
add_cxx_flag_if_avail("-faligned-new")
|
||||
|
||||
if(HOSTTRACE_BUILD_LTO)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
endif()
|
||||
|
||||
hosttrace_save_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
|
||||
set(CMAKE_CXX_FLAGS "-flto=thin ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
add_interface_library(hosttrace-lto "Adds link-time-optimization flags")
|
||||
hosttrace_add_interface_library(hosttrace-lto "Adds link-time-optimization flags")
|
||||
add_target_flag_if_avail(hosttrace-lto "-flto=thin")
|
||||
if(NOT cxx_hosttrace_lto_flto_thin)
|
||||
set(CMAKE_CXX_FLAGS "-flto ${CMAKE_CXX_FLAGS}")
|
||||
add_target_flag_if_avail(hosttrace-lto "-flto")
|
||||
if(NOT cxx_hosttrace_lto_flto)
|
||||
add_disabled_interface(hosttrace-lto)
|
||||
set(hosttrace_BUILD_LTO OFF)
|
||||
set(HOSTTRACE_BUILD_LTO OFF)
|
||||
else()
|
||||
target_link_options(hosttrace-lto INTERFACE -flto)
|
||||
endif()
|
||||
@@ -161,8 +156,9 @@ hosttrace_restore_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# print compilation timing reports (Clang compiler)
|
||||
#
|
||||
add_interface_library(hosttrace-compile-timing
|
||||
"Adds compiler flags which report compilation timing metrics")
|
||||
hosttrace_add_interface_library(
|
||||
hosttrace-compile-timing
|
||||
"Adds compiler flags which report compilation timing metrics")
|
||||
if(CMAKE_CXX_COMPILER_IS_CLANG)
|
||||
add_target_flag_if_avail(hosttrace-compile-timing "-ftime-trace")
|
||||
if(NOT cxx_hosttrace_compile_timing_ftime_trace)
|
||||
@@ -176,15 +172,10 @@ if(HOSTTRACE_USE_COMPILE_TIMING)
|
||||
target_link_libraries(hosttrace-compile-options INTERFACE hosttrace-compile-timing)
|
||||
endif()
|
||||
|
||||
if(NOT cxx_hosttrace_compile_timing_ftime_report
|
||||
AND NOT cxx_hosttrace_compile_timing_ftime_trace)
|
||||
add_disabled_interface(hosttrace-compile-timing)
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# developer build flags
|
||||
#
|
||||
add_interface_library(hosttrace-develop-options "Adds developer compiler flags")
|
||||
hosttrace_add_interface_library(hosttrace-develop-options "Adds developer compiler flags")
|
||||
if(HOSTTRACE_BUILD_DEVELOPER)
|
||||
add_target_flag_if_avail(
|
||||
hosttrace-develop-options
|
||||
@@ -195,21 +186,15 @@ endif()
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# visibility build flags
|
||||
#
|
||||
add_interface_library(hosttrace-default-visibility
|
||||
"Adds -fvisibility=default compiler flag")
|
||||
add_interface_library(hosttrace-hidden-visibility
|
||||
"Adds -fvisibility=hidden compiler flag")
|
||||
hosttrace_add_interface_library(hosttrace-default-visibility
|
||||
"Adds -fvisibility=default compiler flag")
|
||||
hosttrace_add_interface_library(hosttrace-hidden-visibility
|
||||
"Adds -fvisibility=hidden compiler flag")
|
||||
|
||||
add_target_flag_if_avail(hosttrace-default-visibility "-fvisibility=default")
|
||||
add_target_flag_if_avail(hosttrace-hidden-visibility "-fvisibility=hidden"
|
||||
"-fvisibility-inlines-hidden")
|
||||
|
||||
foreach(_TYPE default hidden)
|
||||
if(NOT cxx_hosttrace_${_TYPE}_visibility_fvisibility_${_TYPE})
|
||||
add_disabled_interface(hosttrace-${_TYPE}-visibility)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# developer build flags
|
||||
#
|
||||
@@ -235,9 +220,9 @@ set(HOSTTRACE_SANITIZER_TYPES
|
||||
alignment)
|
||||
set_property(CACHE HOSTTRACE_SANITIZER_TYPE PROPERTY STRINGS
|
||||
"${HOSTTRACE_SANITIZER_TYPES}")
|
||||
add_interface_library(hosttrace-sanitizer-compile-options
|
||||
"Adds compiler flags for sanitizers")
|
||||
add_interface_library(
|
||||
hosttrace_add_interface_library(hosttrace-sanitizer-compile-options
|
||||
"Adds compiler flags for sanitizers")
|
||||
hosttrace_add_interface_library(
|
||||
hosttrace-sanitizer
|
||||
"Adds compiler flags to enable ${HOSTTRACE_SANITIZER_TYPE} sanitizer (-fsanitizer=${HOSTTRACE_SANITIZER_TYPE})"
|
||||
)
|
||||
@@ -248,8 +233,9 @@ add_target_flag(hosttrace-sanitizer-compile-options ${COMMON_SANITIZER_FLAGS})
|
||||
|
||||
foreach(_TYPE ${HOSTTRACE_SANITIZER_TYPES})
|
||||
set(_FLAG "-fsanitize=${_TYPE}")
|
||||
add_interface_library(hosttrace-${_TYPE}-sanitizer
|
||||
"Adds compiler flags to enable ${_TYPE} sanitizer (${_FLAG})")
|
||||
hosttrace_add_interface_library(
|
||||
hosttrace-${_TYPE}-sanitizer
|
||||
"Adds compiler flags to enable ${_TYPE} sanitizer (${_FLAG})")
|
||||
add_target_flag(hosttrace-${_TYPE}-sanitizer ${_FLAG})
|
||||
target_link_libraries(hosttrace-${_TYPE}-sanitizer
|
||||
INTERFACE hosttrace-sanitizer-compile-options)
|
||||
@@ -273,7 +259,6 @@ if(HOSTTRACE_USE_SANITIZER)
|
||||
endforeach()
|
||||
else()
|
||||
set(HOSTTRACE_USE_SANITIZER OFF)
|
||||
inform_empty_interface(hosttrace-sanitizer "${HOSTTRACE_SANITIZER_TYPE} sanitizer")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
|
||||
@@ -31,8 +31,8 @@ if("${LIBNAME}" STREQUAL "")
|
||||
string(TOLOWER "${PROJECT_NAME}" LIBNAME)
|
||||
endif()
|
||||
|
||||
add_interface_library(${LIBNAME}-compile-options
|
||||
"Adds the standard set of compiler flags used by timemory")
|
||||
hosttrace_add_interface_library(
|
||||
${LIBNAME}-compile-options "Adds the standard set of compiler flags used by timemory")
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# macro converting string to list
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
include_guard(DIRECTORY)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# Clang Tidy
|
||||
#
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
# clang-tidy
|
||||
macro(HOSTTRACE_ACTIVATE_CLANG_TIDY)
|
||||
if(HOSTTRACE_USE_CLANG_TIDY)
|
||||
find_program(CLANG_TIDY_COMMAND NAMES clang-tidy)
|
||||
hosttrace_add_feature(CLANG_TIDY_COMMAND "Path to clang-tidy command")
|
||||
if(NOT CLANG_TIDY_COMMAND)
|
||||
timemory_message(
|
||||
WARNING "HOSTTRACE_USE_CLANG_TIDY is ON but clang-tidy is not found!")
|
||||
set(HOSTTRACE_USE_CLANG_TIDY OFF)
|
||||
else()
|
||||
set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_COMMAND})
|
||||
|
||||
# Create a preprocessor definition that depends on .clang-tidy content so the
|
||||
# compile command will change when .clang-tidy changes. This ensures that a
|
||||
# subsequent build re-runs clang-tidy on all sources even if they do not
|
||||
# otherwise need to be recompiled. Nothing actually uses this definition. We
|
||||
# add it to targets on which we run clang-tidy just to get the build
|
||||
# dependency on the .clang-tidy file.
|
||||
file(SHA1 ${CMAKE_CURRENT_LIST_DIR}/.clang-tidy clang_tidy_sha1)
|
||||
set(CLANG_TIDY_DEFINITIONS "CLANG_TIDY_SHA1=${clang_tidy_sha1}")
|
||||
unset(clang_tidy_sha1)
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
#
|
||||
# clang-format target
|
||||
#
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
find_program(HOSTTRACE_CLANG_FORMAT_EXE NAMES clang-format-11 clang-format-mp-11
|
||||
clang-format)
|
||||
|
||||
if(HOSTTRACE_CLANG_FORMAT_EXE)
|
||||
file(GLOB_RECURSE sources ${PROJECT_SOURCE_DIR}/src/*.cpp)
|
||||
file(GLOB_RECURSE headers ${PROJECT_SOURCE_DIR}/include/*.hpp)
|
||||
file(GLOB_RECURSE examples ${PROJECT_SOURCE_DIR}/examples/*.cpp
|
||||
${PROJECT_SOURCE_DIR}/examples/*.hpp)
|
||||
add_custom_target(
|
||||
format-hosttrace
|
||||
${HOSTTRACE_CLANG_FORMAT_EXE} -i ${sources} ${headers} ${examples}
|
||||
COMMENT "Running C++ formatter ${HOSTTRACE_CLANG_FORMAT_EXE}...")
|
||||
if(NOT TARGET format)
|
||||
add_custom_target(format)
|
||||
endif()
|
||||
add_dependencies(format format-hosttrace)
|
||||
else()
|
||||
message(
|
||||
AUTHOR_WARNING
|
||||
"clang-format could not be found. format build target not available.")
|
||||
endif()
|
||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -100,10 +100,6 @@ if(roctracer_FOUND)
|
||||
|
||||
if(roctracer_kfdwrapper_LIBRARY)
|
||||
list(APPEND roctracer_LIBRARIES ${roctracer_kfdwrapper_LIBRARY})
|
||||
target_compile_definitions(
|
||||
roctracer::roctracer
|
||||
INTERFACE
|
||||
HOSTTRACE_ROCTRACER_LIBKFDWRAPPER=\"${roctracer_kfdwrapper_LIBRARY}\")
|
||||
target_link_libraries(roctracer::roctracer
|
||||
INTERFACE ${roctracer_kfdwrapper_LIBRARY})
|
||||
target_link_libraries(roctracer::roctx INTERFACE ${roctracer_kfdwrapper_LIBRARY})
|
||||
|
||||
@@ -7,14 +7,19 @@ include_guard(DIRECTORY)
|
||||
#
|
||||
# ########################################################################################
|
||||
|
||||
add_interface_library(hosttrace-headers
|
||||
"Provides minimal set of include flags to compile with hosttrace")
|
||||
add_interface_library(hosttrace-threading "Enables multithreading support")
|
||||
add_interface_library(
|
||||
hosttrace_add_interface_library(
|
||||
hosttrace-headers "Provides minimal set of include flags to compile with hosttrace")
|
||||
hosttrace_add_interface_library(hosttrace-threading "Enables multithreading support")
|
||||
hosttrace_add_interface_library(
|
||||
hosttrace-dyninst
|
||||
"Provides flags and libraries for Dyninst (dynamic instrumentation)")
|
||||
add_interface_library(hosttrace-roctracer "Provides flags and libraries for roctracer")
|
||||
add_interface_library(hosttrace-mpi "Provides MPI or MPI headers")
|
||||
hosttrace_add_interface_library(hosttrace-roctracer
|
||||
"Provides flags and libraries for roctracer")
|
||||
hosttrace_add_interface_library(hosttrace-mpi "Provides MPI or MPI headers")
|
||||
hosttrace_add_interface_library(hosttrace-ptl "Enables PTL support (tasking)")
|
||||
|
||||
target_include_directories(hosttrace-headers INTERFACE ${PROJECT_SOURCE_DIR}/include
|
||||
${PROJECT_BINARY_DIR}/include)
|
||||
|
||||
# include threading because of rooflines
|
||||
target_link_libraries(hosttrace-headers INTERFACE hosttrace-threading)
|
||||
@@ -80,7 +85,7 @@ endif()
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(HOSTTRACE_BUILD_DYNINST)
|
||||
checkout_git_submodule(
|
||||
hosttrace_checkout_git_submodule(
|
||||
RELATIVE_PATH external/dyninst
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
REPO_URL https://github.com/jrmadsen/dyninst.git
|
||||
@@ -145,7 +150,7 @@ else()
|
||||
hosttrace-dyninst INTERFACE DYNINST_API_RT="${HOSTTRACE_DYNINST_API_RT}")
|
||||
endif()
|
||||
|
||||
add_rpath(${Dyninst_LIBRARIES})
|
||||
hosttrace_add_rpath(${Dyninst_LIBRARIES})
|
||||
target_link_libraries(hosttrace-dyninst INTERFACE Dyninst::Dyninst)
|
||||
else() # updated Dyninst CMake system was not found
|
||||
set(_BOOST_COMPONENTS atomic system thread date_time)
|
||||
@@ -204,7 +209,7 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_rpath(${DYNINST_LIBRARIES} ${Boost_LIBRARIES})
|
||||
hosttrace_add_rpath(${DYNINST_LIBRARIES} ${Boost_LIBRARIES})
|
||||
target_link_libraries(hosttrace-dyninst INTERFACE ${DYNINST_LIBRARIES}
|
||||
${Boost_LIBRARIES})
|
||||
foreach(
|
||||
@@ -242,7 +247,7 @@ endif()
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
set(perfetto_DIR ${PROJECT_SOURCE_DIR}/external/perfetto)
|
||||
checkout_git_submodule(
|
||||
hosttrace_checkout_git_submodule(
|
||||
RELATIVE_PATH external/perfetto
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
REPO_URL https://android.googlesource.com/platform/external/perfetto
|
||||
@@ -256,7 +261,7 @@ checkout_git_submodule(
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
if(HOSTTRACE_BUILD_DEVICETRACE)
|
||||
checkout_git_submodule(
|
||||
hosttrace_checkout_git_submodule(
|
||||
RELATIVE_PATH external/elfio
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
REPO_URL https://github.com/jrmadsen/ELFIO.git
|
||||
@@ -267,63 +272,10 @@ endif()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# Clang Tidy
|
||||
# timemory submodule
|
||||
#
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
# clang-tidy
|
||||
macro(HOSTTRACE_ACTIVATE_CLANG_TIDY)
|
||||
if(HOSTTRACE_USE_CLANG_TIDY)
|
||||
find_program(CLANG_TIDY_COMMAND NAMES clang-tidy)
|
||||
add_feature(CLANG_TIDY_COMMAND "Path to clang-tidy command")
|
||||
if(NOT CLANG_TIDY_COMMAND)
|
||||
timemory_message(
|
||||
WARNING "HOSTTRACE_USE_CLANG_TIDY is ON but clang-tidy is not found!")
|
||||
set(HOSTTRACE_USE_CLANG_TIDY OFF)
|
||||
else()
|
||||
set(CMAKE_CXX_CLANG_TIDY ${CLANG_TIDY_COMMAND})
|
||||
|
||||
# Create a preprocessor definition that depends on .clang-tidy content so the
|
||||
# compile command will change when .clang-tidy changes. This ensures that a
|
||||
# subsequent build re-runs clang-tidy on all sources even if they do not
|
||||
# otherwise need to be recompiled. Nothing actually uses this definition. We
|
||||
# add it to targets on which we run clang-tidy just to get the build
|
||||
# dependency on the .clang-tidy file.
|
||||
file(SHA1 ${CMAKE_CURRENT_LIST_DIR}/.clang-tidy clang_tidy_sha1)
|
||||
set(CLANG_TIDY_DEFINITIONS "CLANG_TIDY_SHA1=${clang_tidy_sha1}")
|
||||
unset(clang_tidy_sha1)
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
#
|
||||
# clang-format target
|
||||
#
|
||||
# ------------------------------------------------------------------------------#
|
||||
|
||||
find_program(HOSTTRACE_CLANG_FORMAT_EXE NAMES clang-format-11 clang-format-mp-11
|
||||
clang-format)
|
||||
|
||||
if(HOSTTRACE_CLANG_FORMAT_EXE)
|
||||
file(GLOB sources ${PROJECT_SOURCE_DIR}/src/*.cpp)
|
||||
file(GLOB headers ${PROJECT_SOURCE_DIR}/include/*.hpp)
|
||||
file(GLOB_RECURSE examples ${PROJECT_SOURCE_DIR}/examples/*.cpp
|
||||
${PROJECT_SOURCE_DIR}/examples/*.hpp)
|
||||
add_custom_target(
|
||||
format
|
||||
${HOSTTRACE_CLANG_FORMAT_EXE} -i ${sources} ${headers} ${examples}
|
||||
COMMENT "Running ${HOSTTRACE_CLANG_FORMAT_EXE}...")
|
||||
else()
|
||||
message(
|
||||
AUTHOR_WARNING
|
||||
"clang-format could not be found. format build target not available.")
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
# configure submodule
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
set(TIMEMORY_INSTALL_HEADERS
|
||||
OFF
|
||||
CACHE BOOL "Disable timemory header install")
|
||||
@@ -365,7 +317,7 @@ set(TIMEMORY_TLS_MODEL
|
||||
"global-dynamic"
|
||||
CACHE STRING "Thread-local static model" FORCE)
|
||||
|
||||
checkout_git_submodule(
|
||||
hosttrace_checkout_git_submodule(
|
||||
RELATIVE_PATH external/timemory
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
REPO_URL https://github.com/NERSC/timemory.git
|
||||
@@ -384,3 +336,41 @@ add_subdirectory(external/timemory)
|
||||
|
||||
hosttrace_restore_variables(BUILD_CONFIG VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS
|
||||
CMAKE_POSITION_INDEPENDENT_CODE)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# PTL (Parallel Tasking Library) submodule
|
||||
#
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
# timemory might provide PTL::ptl-shared
|
||||
if(NOT TARGET PTL::ptl-shared)
|
||||
hosttrace_checkout_git_submodule(
|
||||
RELATIVE_PATH external/PTL
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
REPO_URL https://github.com/jrmadsen/PTL.git
|
||||
REPO_BRANCH master)
|
||||
|
||||
set(PTL_BUILD_EXAMPLES OFF)
|
||||
set(PTL_USE_TBB OFF)
|
||||
set(PTL_USE_GPU OFF)
|
||||
set(PTL_DEVELOPER_INSTALL OFF)
|
||||
|
||||
hosttrace_save_variables(
|
||||
BUILD_CONFIG
|
||||
VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS CMAKE_POSITION_INDEPENDENT_CODE
|
||||
CMAKE_CXX_VISIBILITY_PRESET CMAKE_VISIBILITY_INLINES_HIDDEN)
|
||||
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
set(BUILD_STATIC_LIBS OFF)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
|
||||
|
||||
add_subdirectory(external/PTL)
|
||||
hosttrace_restore_variables(
|
||||
BUILD_CONFIG
|
||||
VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS CMAKE_POSITION_INDEPENDENT_CODE
|
||||
CMAKE_CXX_VISIBILITY_PRESET CMAKE_VISIBILITY_INLINES_HIDDEN)
|
||||
endif()
|
||||
|
||||
target_link_libraries(hosttrace-ptl INTERFACE PTL::ptl-shared)
|
||||
|
||||
@@ -38,7 +38,10 @@ main(int argc, char** argv)
|
||||
|
||||
std::vector<std::thread> threads{};
|
||||
for(size_t i = 0; i < nthread; ++i)
|
||||
threads.emplace_back(&run, nitr, nfib);
|
||||
{
|
||||
size_t _nitr = ((i % 2) == 1) ? (nitr - (0.1 * nitr)) : (nitr + (0.1 * nitr));
|
||||
threads.emplace_back(&run, _nitr, nfib);
|
||||
}
|
||||
|
||||
for(auto& itr : threads)
|
||||
itr.join();
|
||||
|
||||
@@ -29,6 +29,7 @@ THE SOFTWARE.
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#define HIP_API_CALL(CALL) \
|
||||
@@ -88,8 +89,8 @@ run(int rank, int argc, char** argv)
|
||||
{
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
unsigned int M = 4960;
|
||||
unsigned int N = 4960;
|
||||
unsigned int M = 4960 * 2;
|
||||
unsigned int N = 4960 * 2;
|
||||
|
||||
std::cout << "[" << rank << "] M: " << M << " N: " << N << std::endl;
|
||||
size_t size = sizeof(int) * M * N;
|
||||
@@ -102,29 +103,30 @@ run(int rank, int argc, char** argv)
|
||||
|
||||
HIP_API_CALL(hipMalloc(&in, size));
|
||||
HIP_API_CALL(hipMalloc(&out, size));
|
||||
check_hip_error();
|
||||
HIP_API_CALL(hipMemset(in, 0, size));
|
||||
HIP_API_CALL(hipMemset(out, 0, size));
|
||||
HIP_API_CALL(hipMemcpy(in, matrix, size, hipMemcpyHostToDevice));
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
check_hip_error();
|
||||
hipDeviceProp_t props;
|
||||
HIP_API_CALL(hipGetDeviceProperties(&props, 0));
|
||||
|
||||
dim3 grid(M / 32, N / 32, 1);
|
||||
dim3 block(32, 32, 1); // transpose_a
|
||||
|
||||
// warmup
|
||||
hipLaunchKernelGGL(transpose_a, grid, block, 0, 0, in, out, M, N);
|
||||
check_hip_error();
|
||||
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
const unsigned times = 10000;
|
||||
for(size_t i = 0; i < times; i++)
|
||||
{
|
||||
hipLaunchKernelGGL(transpose_a, grid, block, 0, 0, in, out, M, N);
|
||||
}
|
||||
check_hip_error();
|
||||
auto _func = [&](hipStream_t stream) {
|
||||
for(size_t i = 0; i < times / 2; i++)
|
||||
{
|
||||
transpose_a<<<grid, block, 0, stream>>>(in, out, M, N);
|
||||
check_hip_error();
|
||||
}
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
};
|
||||
hipStream_t _stream{};
|
||||
HIP_API_CALL(hipStreamCreate(&_stream));
|
||||
std::thread _t{ _func, _stream };
|
||||
_t.join();
|
||||
_func(0);
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
double time =
|
||||
@@ -136,14 +138,12 @@ run(int rank, int argc, char** argv)
|
||||
|
||||
int* out_matrix = (int*) malloc(size);
|
||||
HIP_API_CALL(hipMemcpy(out_matrix, out, size, hipMemcpyDeviceToHost));
|
||||
check_hip_error();
|
||||
|
||||
// cpu_transpose(matrix, out_matrix, M, N);
|
||||
verify(matrix, out_matrix, M, N);
|
||||
|
||||
HIP_API_CALL(hipFree(in));
|
||||
HIP_API_CALL(hipFree(out));
|
||||
check_hip_error();
|
||||
|
||||
free(matrix);
|
||||
free(out_matrix);
|
||||
@@ -171,12 +171,32 @@ do_a2a(int rank)
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
int rank = 0;
|
||||
int rank = 0;
|
||||
int nthreads = 2;
|
||||
if(argc > 1) nthreads = atoi(argv[1]);
|
||||
|
||||
#if defined(USE_MPI)
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
#endif
|
||||
if(rank == 0) run(rank, argc, argv);
|
||||
// this is a temporary workaround in hosttrace when HIP + MPI is enabled
|
||||
int ndevice = 0;
|
||||
int devid = rank;
|
||||
HIP_API_CALL(hipGetDeviceCount(&ndevice));
|
||||
if(ndevice > 0)
|
||||
{
|
||||
devid = rank % ndevice;
|
||||
HIP_API_CALL(hipSetDevice(devid));
|
||||
}
|
||||
if(rank == devid && rank < ndevice)
|
||||
{
|
||||
std::vector<std::thread> _threads{};
|
||||
for(int i = 1; i < nthreads; ++i)
|
||||
_threads.emplace_back(run, rank, argc, argv);
|
||||
run(rank, argc, argv);
|
||||
for(auto& itr : _threads)
|
||||
itr.join();
|
||||
}
|
||||
#if defined(USE_MPI)
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
do_a2a(rank);
|
||||
|
||||
+1
Submodul projects/rocprofiler-systems/external/PTL hinzugefügt bei dd1b67829c
+1
-1
Submodul projects/rocprofiler-systems/external/timemory aktualisiert: 11183bbdd7...c040fe7022
@@ -1,27 +1,30 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2020, The Regents of the University of California,
|
||||
// through Lawrence Berkeley National Laboratory (subject to receipt of any
|
||||
// required approvals from the U.S. Dept. of Energy). All rights reserved.
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -274,7 +277,7 @@ struct function_signature
|
||||
bool m_info_end = false;
|
||||
location_t m_row = { 0, 0 };
|
||||
location_t m_col = { 0, 0 };
|
||||
string_t m_return = "void";
|
||||
string_t m_return = {};
|
||||
string_t m_name = {};
|
||||
string_t m_params = "()";
|
||||
string_t m_file = {};
|
||||
@@ -318,7 +321,7 @@ struct function_signature
|
||||
string_t get() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
if(use_return_info) ss << m_return << " ";
|
||||
if(use_return_info && !m_return.empty()) ss << m_return << " ";
|
||||
ss << m_name;
|
||||
if(use_args_info) ss << m_params;
|
||||
if(m_loop && m_info_beg)
|
||||
|
||||
@@ -1,275 +1,112 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
# include <perfetto.h>
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("host").SetDescription("Host-side function tracing"), \
|
||||
perfetto::Category("device").SetDescription("Device-side function tracing")
|
||||
#else
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("host").SetDescription("Host-side function tracing"), \
|
||||
perfetto::Category("device").SetDescription("Device-side function tracing")
|
||||
perfetto::Category("timemory")
|
||||
.SetDescription("Events from the timemory API")
|
||||
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
|
||||
#endif
|
||||
// this always needs to included first
|
||||
// clang-format off
|
||||
#include "library/perfetto.hpp"
|
||||
// clang-format on
|
||||
|
||||
#include "library/timemory.hpp"
|
||||
#include "library/roctracer.hpp"
|
||||
#include "library/api.hpp"
|
||||
#include "library/fork_gotcha.hpp"
|
||||
#include "library/mpi_gotcha.hpp"
|
||||
#include "library/api.hpp"
|
||||
#include "library/common.hpp"
|
||||
#include "library/state.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/critical_trace.hpp"
|
||||
#include "timemory/macros/language.hpp"
|
||||
#include "timemory/utility/utility.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <sys/types.h>
|
||||
#include <thread>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "timemory/api.hpp"
|
||||
#include "timemory/backends/mpi.hpp"
|
||||
#include "timemory/backends/process.hpp"
|
||||
#include "timemory/backends/threading.hpp"
|
||||
#include "timemory/components.hpp"
|
||||
#include "timemory/components/gotcha/mpip.hpp"
|
||||
#include "timemory/components/papi/papi_tuple.hpp"
|
||||
#include "timemory/config.hpp"
|
||||
#include "timemory/environment.hpp"
|
||||
#include "timemory/manager.hpp"
|
||||
#include "timemory/mpl/apply.hpp"
|
||||
#include "timemory/operations.hpp"
|
||||
#include "timemory/runtime.hpp"
|
||||
#include "timemory/settings.hpp"
|
||||
#include "timemory/storage.hpp"
|
||||
#include "timemory/variadic.hpp"
|
||||
|
||||
#include "roctracer.hpp"
|
||||
|
||||
// forward decl of the API
|
||||
extern "C"
|
||||
template <critical_trace::Device DevID, critical_trace::Phase PhaseID,
|
||||
bool UpdateStack = true>
|
||||
inline void
|
||||
add_critical_trace(int64_t _tid, size_t _cpu_cid, size_t _gpu_cid, size_t _parent_cid,
|
||||
int64_t _ts_beg, int64_t _ts_val, size_t _hash, uint16_t _depth,
|
||||
uint16_t _prio = 0)
|
||||
{
|
||||
void hosttrace_push_trace(const char* name) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_pop_trace(const char* name) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_init(const char*, bool, const char*)
|
||||
TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_finalize(void) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
|
||||
TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_set_mpi(bool use, bool attached) TIMEMORY_VISIBILITY("default");
|
||||
}
|
||||
if(!get_use_critical_trace()) return;
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
// clang-format off
|
||||
// these are used to create unique type mutexes
|
||||
struct critical_insert {};
|
||||
struct cpu_cid_stack {};
|
||||
// clang-format on
|
||||
|
||||
// same sort of functionality as python's " ".join([...])
|
||||
#if !defined(JOIN)
|
||||
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
|
||||
#endif
|
||||
using tim::type_mutex;
|
||||
using auto_lock_t = tim::auto_lock_t;
|
||||
static constexpr auto num_mutexes = max_supported_threads;
|
||||
static auto _update_freq = critical_trace::get_update_frequency();
|
||||
|
||||
#define HOSTTRACE_DEBUG(...) \
|
||||
if(get_debug()) \
|
||||
{ \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
namespace audit = tim::audit;
|
||||
namespace comp = tim::component;
|
||||
namespace quirk = tim::quirk;
|
||||
namespace threading = tim::threading;
|
||||
namespace scope = tim::scope;
|
||||
namespace dmp = tim::dmp;
|
||||
namespace process = tim::process;
|
||||
namespace units = tim::units;
|
||||
namespace trait = tim::trait;
|
||||
|
||||
// this is used to wrap fork()
|
||||
struct fork_gotcha : comp::base<fork_gotcha, void>
|
||||
{
|
||||
using gotcha_data_t = comp::gotcha_data;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
|
||||
|
||||
// this will get called right before fork
|
||||
void audit(const gotcha_data_t& _data, audit::incoming);
|
||||
|
||||
// this will get called right after fork with the return value
|
||||
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
|
||||
};
|
||||
|
||||
// this is used to wrap MPI_Init and MPI_Init_thread
|
||||
struct mpi_gotcha : comp::base<mpi_gotcha, void>
|
||||
{
|
||||
using gotcha_data_t = comp::gotcha_data;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(mpi_gotcha)
|
||||
|
||||
// this will get called right before MPI_Init with that functions arguments
|
||||
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***);
|
||||
|
||||
// this will get called right before MPI_Init_thread with that functions arguments
|
||||
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*);
|
||||
|
||||
// this will get called right after MPI_Init and MPI_Init_thread with the return value
|
||||
void audit(const gotcha_data_t& _data, audit::outgoing, int _retval);
|
||||
|
||||
// this will get called right before MPI_Finalize
|
||||
void audit(const gotcha_data_t& _data, audit::incoming);
|
||||
};
|
||||
|
||||
// timemory api struct
|
||||
struct hosttrace : tim::concepts::api
|
||||
{};
|
||||
|
||||
// timemory component which calls hosttrace functions
|
||||
// (used in gotcha wrappers)
|
||||
struct hosttrace_component : tim::component::base<hosttrace_component, void>
|
||||
{
|
||||
void start();
|
||||
void stop();
|
||||
void set_prefix(const char*);
|
||||
|
||||
private:
|
||||
const char* m_prefix = nullptr;
|
||||
};
|
||||
|
||||
using papi_tot_ins = comp::papi_tuple<PAPI_TOT_INS>;
|
||||
using fork_gotcha_t = comp::gotcha<4, tim::component_tuple<fork_gotcha>, hosttrace>;
|
||||
using mpi_gotcha_t = comp::gotcha<4, tim::component_tuple<mpi_gotcha>, hosttrace>;
|
||||
using hosttrace_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
|
||||
comp::cpu_util, comp::roctracer, papi_tot_ins,
|
||||
comp::user_global_bundle, fork_gotcha_t, mpi_gotcha_t>;
|
||||
using hosttrace_thread_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
|
||||
comp::thread_cpu_util, papi_tot_ins>;
|
||||
using bundle_t =
|
||||
tim::component_bundle<hosttrace, comp::wall_clock*, comp::user_global_bundle*>;
|
||||
using bundle_allocator_t = tim::data::ring_buffer_allocator<bundle_t>;
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
|
||||
#endif
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
|
||||
{
|
||||
public:
|
||||
void OnSetup(const SetupArgs&) override
|
||||
if constexpr(PhaseID != critical_trace::Phase::NONE)
|
||||
{
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
PRINT_HERE("%s", "setup");
|
||||
// unique lock per thread
|
||||
auto& _mtx = type_mutex<critical_insert, hosttrace, num_mutexes>(_tid);
|
||||
auto_lock_t _lk{ _mtx };
|
||||
|
||||
auto& _critical_trace = critical_trace::get(_tid);
|
||||
_critical_trace->emplace_back(
|
||||
critical_trace::entry{ _prio, DevID, PhaseID, _depth, _tid, _cpu_cid,
|
||||
_gpu_cid, _parent_cid, _ts_beg, _ts_val, _hash });
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs&) override
|
||||
if constexpr(UpdateStack)
|
||||
{
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
PRINT_HERE("%s", "start");
|
||||
// unique lock per thread
|
||||
auto& _mtx = type_mutex<cpu_cid_stack, hosttrace, num_mutexes>(_tid);
|
||||
|
||||
if constexpr(PhaseID == critical_trace::Phase::NONE)
|
||||
{
|
||||
auto_lock_t _lk{ _mtx };
|
||||
get_cpu_cid_stack(_tid)->emplace_back(_cpu_cid);
|
||||
}
|
||||
else if constexpr(PhaseID == critical_trace::Phase::BEGIN)
|
||||
{
|
||||
auto_lock_t _lk{ _mtx };
|
||||
get_cpu_cid_stack(_tid)->emplace_back(_cpu_cid);
|
||||
}
|
||||
else if constexpr(PhaseID == critical_trace::Phase::END)
|
||||
{
|
||||
auto_lock_t _lk{ _mtx };
|
||||
get_cpu_cid_stack(_tid)->pop_back();
|
||||
if(_gpu_cid == 0 && _cpu_cid % _update_freq == (_update_freq - 1))
|
||||
critical_trace::update(_tid);
|
||||
}
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs&) override
|
||||
{
|
||||
// Undo any initialization done in OnStart.
|
||||
PRINT_HERE("%s", "stop");
|
||||
}
|
||||
|
||||
// Data sources can also have per-instance state.
|
||||
int my_custom_state = 0;
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
// used for specifying the state of hosttrace
|
||||
enum class State : unsigned short
|
||||
{
|
||||
DelayedInit = 0,
|
||||
PreInit,
|
||||
Active,
|
||||
Finalized
|
||||
};
|
||||
|
||||
bool
|
||||
get_debug();
|
||||
|
||||
State&
|
||||
get_state();
|
||||
|
||||
std::unique_ptr<hosttrace_bundle_t>&
|
||||
get_main_bundle();
|
||||
|
||||
bool
|
||||
get_use_perfetto();
|
||||
|
||||
bool
|
||||
get_use_timemory();
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
template <typename Tp, size_t MaxThreads = 1024>
|
||||
struct hosttrace_thread_data
|
||||
{
|
||||
static constexpr size_t max_supported_threads = MaxThreads;
|
||||
using instance_array_t = std::array<std::unique_ptr<Tp>, max_supported_threads>;
|
||||
|
||||
template <typename... Args>
|
||||
static void construct(Args&&...);
|
||||
static std::unique_ptr<Tp>& instance();
|
||||
static instance_array_t& instances();
|
||||
};
|
||||
|
||||
template <typename Tp, size_t MaxThreads>
|
||||
template <typename... Args>
|
||||
void
|
||||
hosttrace_thread_data<Tp, MaxThreads>::construct(Args&&... _args)
|
||||
{
|
||||
static thread_local bool _v = [&_args...]() {
|
||||
instances().at(threading::get_id()) =
|
||||
std::make_unique<Tp>(std::forward<Args>(_args)...);
|
||||
return true;
|
||||
}();
|
||||
(void) _v;
|
||||
tim::consume_parameters(_tid, _cpu_cid, _gpu_cid, _parent_cid, _ts_beg, _ts_val,
|
||||
_hash, _depth, _prio);
|
||||
}
|
||||
|
||||
template <typename Tp, size_t MaxThreads>
|
||||
std::unique_ptr<Tp>&
|
||||
hosttrace_thread_data<Tp, MaxThreads>::instance()
|
||||
{
|
||||
return instances().at(threading::get_id());
|
||||
}
|
||||
|
||||
template <typename Tp, size_t MaxThreads>
|
||||
typename hosttrace_thread_data<Tp, MaxThreads>::instance_array_t&
|
||||
hosttrace_thread_data<Tp, MaxThreads>::instances()
|
||||
{
|
||||
static auto _v = instance_array_t{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
// there are currently some strange things that happen with vector<bundle_t> so using
|
||||
// vector<bundle_t*> and timemory's ring_buffer_allocator to create contiguous memory-page
|
||||
// aligned instances of the bundle
|
||||
struct hosttrace_timemory_data
|
||||
{
|
||||
static constexpr size_t max_supported_threads = 1024;
|
||||
using instance_array_t = std::array<hosttrace_timemory_data, max_supported_threads>;
|
||||
|
||||
bundle_allocator_t allocator{};
|
||||
std::vector<bundle_t*> bundles{};
|
||||
|
||||
static instance_array_t& instances();
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <timemory/compat/macros.h>
|
||||
|
||||
// forward decl of the API
|
||||
extern "C"
|
||||
{
|
||||
void hosttrace_push_trace(const char* name) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_pop_trace(const char* name) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_init(const char*, bool, const char*)
|
||||
TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_finalize(void) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
|
||||
TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_set_mpi(bool use, bool attached) TIMEMORY_VISIBILITY("default");
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <timemory/api.hpp>
|
||||
#include <timemory/backends/dmp.hpp>
|
||||
#include <timemory/backends/process.hpp>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <sys/types.h>
|
||||
#include <thread>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// timemory api struct
|
||||
struct hosttrace : tim::concepts::api
|
||||
{};
|
||||
@@ -0,0 +1,163 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/api.hpp"
|
||||
#include "library/common.hpp"
|
||||
#include "library/fork_gotcha.hpp"
|
||||
#include "library/mpi_gotcha.hpp"
|
||||
#include "library/roctracer.hpp"
|
||||
#include "library/state.hpp"
|
||||
#include "library/timemory.hpp"
|
||||
|
||||
#include <timemory/backends/threading.hpp>
|
||||
|
||||
#include <string_view>
|
||||
|
||||
// bundle of components around hosttrace_init and hosttrace_finalize
|
||||
using main_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
|
||||
comp::cpu_util, comp::roctracer, papi_tot_ins,
|
||||
comp::user_global_bundle, fork_gotcha_t, mpi_gotcha_t>;
|
||||
|
||||
// bundle of components used in instrumentation
|
||||
using instrumentation_bundle_t =
|
||||
tim::component_bundle<hosttrace, comp::wall_clock*, comp::user_global_bundle*>;
|
||||
|
||||
// allocator for instrumentation_bundle_t
|
||||
using bundle_allocator_t = tim::data::ring_buffer_allocator<instrumentation_bundle_t>;
|
||||
|
||||
// bundle of components around each thread
|
||||
using hosttrace_thread_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
|
||||
comp::thread_cpu_util,
|
||||
#if defined(TIMEMORY_RUSAGE_THREAD) && TIMEMORY_RUSAGE_THREAD > 0
|
||||
comp::peak_rss,
|
||||
#endif
|
||||
papi_tot_ins>;
|
||||
|
||||
//
|
||||
// Initialization routines
|
||||
//
|
||||
void
|
||||
configure_settings();
|
||||
|
||||
void
|
||||
print_config_settings(std::ostream& _os,
|
||||
std::function<bool(const std::string_view&)>&& _filter);
|
||||
|
||||
std::string&
|
||||
get_exe_name();
|
||||
|
||||
//
|
||||
// User-configurable settings
|
||||
//
|
||||
std::string
|
||||
get_config_file();
|
||||
|
||||
bool
|
||||
get_debug();
|
||||
|
||||
bool
|
||||
get_use_perfetto();
|
||||
|
||||
bool
|
||||
get_use_timemory();
|
||||
|
||||
bool&
|
||||
get_use_pid();
|
||||
|
||||
bool
|
||||
get_use_mpip();
|
||||
|
||||
bool
|
||||
get_use_critical_trace();
|
||||
|
||||
bool
|
||||
get_roctracer_timeline_profile();
|
||||
|
||||
bool
|
||||
get_roctracer_flat_profile();
|
||||
|
||||
bool
|
||||
get_trace_hsa_api();
|
||||
|
||||
bool
|
||||
get_trace_hsa_activity();
|
||||
|
||||
bool
|
||||
get_critical_trace_debug();
|
||||
|
||||
bool
|
||||
get_critical_trace_serialize_names();
|
||||
|
||||
size_t
|
||||
get_perfetto_shmem_size_hint();
|
||||
|
||||
size_t
|
||||
get_perfetto_buffer_size();
|
||||
|
||||
uint64_t
|
||||
get_critical_trace_update_freq();
|
||||
|
||||
uint64_t
|
||||
get_critical_trace_num_threads();
|
||||
|
||||
std::string
|
||||
get_trace_hsa_api_types();
|
||||
|
||||
std::string&
|
||||
get_backend();
|
||||
|
||||
std::string
|
||||
get_perfetto_output_filename();
|
||||
|
||||
int64_t
|
||||
get_critical_trace_count();
|
||||
|
||||
size_t&
|
||||
get_sample_rate();
|
||||
|
||||
int64_t
|
||||
get_critical_trace_per_row();
|
||||
|
||||
//
|
||||
// Runtime configuration data
|
||||
//
|
||||
State&
|
||||
get_state();
|
||||
|
||||
std::unique_ptr<main_bundle_t>&
|
||||
get_main_bundle();
|
||||
|
||||
std::atomic<uint64_t>&
|
||||
get_cpu_cid();
|
||||
|
||||
std::unique_ptr<std::vector<uint64_t>>&
|
||||
get_cpu_cid_stack(int64_t _tid = threading::get_id());
|
||||
@@ -0,0 +1,209 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/config.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "timemory/tpls/cereal/cereal/cereal.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace critical_trace
|
||||
{
|
||||
enum class Device : short
|
||||
{
|
||||
NONE = 0,
|
||||
CPU,
|
||||
GPU,
|
||||
};
|
||||
|
||||
enum class Phase : short
|
||||
{
|
||||
NONE = 0,
|
||||
BEGIN,
|
||||
END,
|
||||
DELTA,
|
||||
};
|
||||
|
||||
struct entry
|
||||
{
|
||||
entry() = default;
|
||||
~entry() = default;
|
||||
entry(const entry&) = default;
|
||||
entry(entry&&) noexcept = default;
|
||||
entry& operator=(const entry&) = default;
|
||||
entry& operator=(entry&&) noexcept = default;
|
||||
|
||||
uint16_t priority = 0; // priority value (for sorting)
|
||||
Device device = Device::CPU; // which device it executed on
|
||||
Phase phase = Phase::NONE; // start / stop / unspecified
|
||||
uint16_t depth = 0; // call-stack depth
|
||||
int64_t tid = 0; // thread id it was registered on
|
||||
uint64_t cpu_cid = 0; // CPU correlation id
|
||||
uint64_t gpu_cid = 0; // GPU correlation id
|
||||
uint64_t parent_cid = 0; // parent CPU correlation id
|
||||
int64_t begin_ns = 0; // timestamp of start
|
||||
int64_t end_ns = 0; // timestamp of end
|
||||
size_t hash = 0; // hash for name
|
||||
|
||||
bool operator==(const entry& rhs) const;
|
||||
bool operator!=(const entry& rhs) const { return !(*this == rhs); }
|
||||
bool operator<(const entry& rhs) const;
|
||||
bool operator>(const entry& rhs) const;
|
||||
bool operator<=(const entry& rhs) const { return !(*this > rhs); }
|
||||
bool operator>=(const entry& rhs) const { return !(*this < rhs); }
|
||||
|
||||
entry& operator+=(const entry& rhs);
|
||||
|
||||
size_t get_hash() const;
|
||||
int64_t get_timestamp() const;
|
||||
|
||||
int64_t get_cost() const;
|
||||
|
||||
bool is_bounded(const entry& rhs) const;
|
||||
int64_t get_overlap(const entry& rhs) const;
|
||||
int64_t get_independent(const entry& rhs) const;
|
||||
|
||||
int64_t get_overlap(const entry& rhs, int64_t _tid) const;
|
||||
int64_t get_independent(const entry& rhs, int64_t _tid) const;
|
||||
bool is_bounded(const entry& rhs, int64_t _tid) const;
|
||||
|
||||
void write(std::ostream& _os) const;
|
||||
|
||||
static bool is_delta(const entry&, const std::string_view&);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& _os, const entry& _v)
|
||||
{
|
||||
_v.write(_os);
|
||||
return _os;
|
||||
}
|
||||
template <typename Archive>
|
||||
void serialize(Archive& ar, unsigned int);
|
||||
};
|
||||
|
||||
template <typename Archive>
|
||||
void
|
||||
entry::serialize(Archive& ar, unsigned int)
|
||||
{
|
||||
namespace cereal = tim::cereal;
|
||||
ar(cereal::make_nvp("priority", priority), cereal::make_nvp("device", device),
|
||||
cereal::make_nvp("phase", phase), cereal::make_nvp("depth", depth),
|
||||
cereal::make_nvp("tid", tid), cereal::make_nvp("cpu_cid", cpu_cid),
|
||||
cereal::make_nvp("gpu_cid", gpu_cid), cereal::make_nvp("parent_cid", parent_cid),
|
||||
cereal::make_nvp("begin_ns", begin_ns), cereal::make_nvp("end_ns", end_ns),
|
||||
cereal::make_nvp("hash", hash));
|
||||
|
||||
if(get_critical_trace_serialize_names())
|
||||
{
|
||||
std::string _name{};
|
||||
if(hash > 0) _name = tim::demangle(tim::get_hash_identifier(hash));
|
||||
ar(cereal::make_nvp("name", _name));
|
||||
}
|
||||
}
|
||||
|
||||
struct call_chain : private std::vector<entry>
|
||||
{
|
||||
using base_type = std::vector<entry>;
|
||||
|
||||
using base_type::at;
|
||||
using base_type::back;
|
||||
using base_type::begin;
|
||||
using base_type::cbegin;
|
||||
using base_type::cend;
|
||||
using base_type::clear;
|
||||
using base_type::emplace_back;
|
||||
using base_type::empty;
|
||||
using base_type::end;
|
||||
using base_type::erase;
|
||||
using base_type::front;
|
||||
using base_type::pop_back;
|
||||
using base_type::push_back;
|
||||
using base_type::rbegin;
|
||||
using base_type::rend;
|
||||
using base_type::reserve;
|
||||
using base_type::size;
|
||||
|
||||
size_t get_hash() const;
|
||||
int64_t get_cost(int64_t _tid = -1) const;
|
||||
int64_t get_overlap(int64_t _tid = -1) const;
|
||||
int64_t get_independent(int64_t _tid = -1) const;
|
||||
static std::vector<call_chain>& get_top_chains();
|
||||
|
||||
bool operator==(const call_chain& rhs) const;
|
||||
bool operator!=(const call_chain& rhs) const { return !(*this == rhs); }
|
||||
friend std::ostream& operator<<(std::ostream& _os, const call_chain& _v)
|
||||
{
|
||||
size_t _n = 0;
|
||||
for(const auto& itr : _v)
|
||||
_os << " [" << _n++ << "] " << itr << "\n";
|
||||
return _os;
|
||||
}
|
||||
|
||||
template <typename Archive>
|
||||
void serialize(Archive& ar, unsigned int)
|
||||
{
|
||||
namespace cereal = tim::cereal;
|
||||
ar(cereal::make_nvp("call_chain", static_cast<base_type&>(*this)));
|
||||
}
|
||||
|
||||
template <Device DevT>
|
||||
void generate_perfetto(std::set<entry>& _used) const;
|
||||
|
||||
template <bool BoolV = true, typename FuncT>
|
||||
bool query(FuncT&&) const;
|
||||
};
|
||||
|
||||
using hash_ids = std::unordered_set<std::string>;
|
||||
|
||||
uint64_t
|
||||
get_update_frequency();
|
||||
|
||||
std::unique_ptr<call_chain>&
|
||||
get(int64_t _tid = threading::get_id());
|
||||
|
||||
size_t
|
||||
add_hash_id(const std::string& _label);
|
||||
|
||||
void
|
||||
add_hash_id(const hash_ids&);
|
||||
|
||||
void
|
||||
update(int64_t _tid = threading::get_id());
|
||||
|
||||
void
|
||||
compute(int64_t _tid = threading::get_id());
|
||||
|
||||
struct id
|
||||
{};
|
||||
|
||||
} // namespace critical_trace
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <timemory/api.hpp>
|
||||
#include <timemory/backends/dmp.hpp>
|
||||
#include <timemory/backends/process.hpp>
|
||||
#include <timemory/utility/utility.hpp>
|
||||
|
||||
bool
|
||||
get_debug();
|
||||
|
||||
bool
|
||||
get_critical_trace_debug();
|
||||
|
||||
#if defined(TIMEMORY_USE_MPI)
|
||||
# define HOSTTRACE_CONDITIONAL_PRINT(COND, ...) \
|
||||
if(COND) \
|
||||
{ \
|
||||
fflush(stderr); \
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<decltype(std::cerr)>() }; \
|
||||
fprintf(stderr, "[hosttrace][%i][%li] ", static_cast<int>(tim::dmp::rank()), \
|
||||
tim::threading::get_id()); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
#else
|
||||
# define HOSTTRACE_CONDITIONAL_PRINT(COND, ...) \
|
||||
if(COND) \
|
||||
{ \
|
||||
fflush(stderr); \
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<decltype(std::cerr)>() }; \
|
||||
fprintf(stderr, "[hosttrace][%i][%li] ", \
|
||||
static_cast<int>(tim::process::get_id()), tim::threading::get_id()); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define HOSTTRACE_CONDITIONAL_BASIC_PRINT(COND, ...) \
|
||||
if(COND) \
|
||||
{ \
|
||||
fflush(stderr); \
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<decltype(std::cerr)>() }; \
|
||||
fprintf(stderr, "[hosttrace] "); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
|
||||
#define HOSTTRACE_DEBUG(...) HOSTTRACE_CONDITIONAL_PRINT(get_debug(), __VA_ARGS__)
|
||||
#define HOSTTRACE_PRINT(...) HOSTTRACE_CONDITIONAL_PRINT(true, __VA_ARGS__)
|
||||
#define HOSTTRACE_CT_DEBUG(...) \
|
||||
HOSTTRACE_CONDITIONAL_PRINT(get_critical_trace_debug(), __VA_ARGS__)
|
||||
@@ -0,0 +1,42 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
// clang-format off
|
||||
#define HOSTTRACE_HIP_VERSION_STRING "@HIP_VERSION@"
|
||||
#define HOSTTRACE_HIP_VERSION_MAJOR @HIP_VERSION_MAJOR@
|
||||
#define HOSTTRACE_HIP_VERSION_MINOR @HIP_VERSION_MINOR@
|
||||
#define HOSTTRACE_HIP_VERSION_PATCH @HIP_VERSION_PATCH@
|
||||
// clang-format on
|
||||
|
||||
#if defined(HOSTTRACE_USE_ROCTRACER)
|
||||
# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "@roctracer_kfdwrapper_LIBRARY@"
|
||||
#else
|
||||
# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "/opt/rocm/roctracer/lib/libkfdwrapper64.so"
|
||||
#endif
|
||||
@@ -0,0 +1,71 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/debug.hpp"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <string>
|
||||
#include <timemory/environment.hpp>
|
||||
|
||||
struct dynamic_library
|
||||
{
|
||||
dynamic_library() = delete;
|
||||
dynamic_library(const dynamic_library&) = delete;
|
||||
dynamic_library(dynamic_library&&) noexcept = default;
|
||||
dynamic_library& operator=(const dynamic_library&) = delete;
|
||||
dynamic_library& operator=(dynamic_library&&) noexcept = default;
|
||||
|
||||
dynamic_library(const char* _env, const char* _fname,
|
||||
int _flags = (RTLD_NOW | RTLD_GLOBAL), bool _store = false)
|
||||
: envname{ _env }
|
||||
, filename{ tim::get_env<std::string>(_env, _fname, _store) }
|
||||
, flags{ _flags }
|
||||
{
|
||||
if(!filename.empty())
|
||||
{
|
||||
handle = dlopen(filename.c_str(), flags);
|
||||
if(!handle)
|
||||
{
|
||||
HOSTTRACE_DEBUG("%s\n", dlerror());
|
||||
}
|
||||
dlerror(); // Clear any existing error
|
||||
}
|
||||
}
|
||||
|
||||
~dynamic_library()
|
||||
{
|
||||
if(handle) dlclose(handle);
|
||||
}
|
||||
|
||||
std::string envname = {};
|
||||
std::string filename = {};
|
||||
int flags = 0;
|
||||
void* handle = nullptr;
|
||||
};
|
||||
@@ -0,0 +1,48 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/common.hpp"
|
||||
#include "library/timemory.hpp"
|
||||
|
||||
// this is used to wrap fork()
|
||||
struct fork_gotcha : comp::base<fork_gotcha, void>
|
||||
{
|
||||
using gotcha_data_t = comp::gotcha_data;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
|
||||
|
||||
// this will get called right before fork
|
||||
void audit(const gotcha_data_t& _data, audit::incoming);
|
||||
|
||||
// this will get called right after fork with the return value
|
||||
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
|
||||
};
|
||||
|
||||
using fork_gotcha_t = comp::gotcha<4, tim::component_tuple<fork_gotcha>, hosttrace>;
|
||||
@@ -0,0 +1,43 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/timemory.hpp"
|
||||
|
||||
// timemory component which calls hosttrace functions
|
||||
// (used in gotcha wrappers)
|
||||
struct hosttrace_component : comp::base<hosttrace_component, void>
|
||||
{
|
||||
void start();
|
||||
void stop();
|
||||
void set_prefix(const char*);
|
||||
|
||||
private:
|
||||
const char* m_prefix = nullptr;
|
||||
};
|
||||
@@ -0,0 +1,54 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/common.hpp"
|
||||
#include "library/timemory.hpp"
|
||||
|
||||
// this is used to wrap MPI_Init and MPI_Init_thread
|
||||
struct mpi_gotcha : comp::base<mpi_gotcha, void>
|
||||
{
|
||||
using gotcha_data_t = comp::gotcha_data;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(mpi_gotcha)
|
||||
|
||||
// this will get called right before MPI_Init with that functions arguments
|
||||
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***);
|
||||
|
||||
// this will get called right before MPI_Init_thread with that functions arguments
|
||||
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*);
|
||||
|
||||
// this will get called right after MPI_Init and MPI_Init_thread with the return value
|
||||
void audit(const gotcha_data_t& _data, audit::outgoing, int _retval);
|
||||
|
||||
// this will get called right before MPI_Finalize
|
||||
void audit(const gotcha_data_t& _data, audit::incoming);
|
||||
};
|
||||
|
||||
using mpi_gotcha_t = comp::gotcha<4, tim::component_tuple<mpi_gotcha>, hosttrace>;
|
||||
@@ -0,0 +1,91 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(PERFETTO_CATEGORIES)
|
||||
# error "PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files"
|
||||
#endif
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
# include <perfetto.h>
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("host").SetDescription("Host-side function tracing"), \
|
||||
perfetto::Category("device").SetDescription("Device-side function tracing"), \
|
||||
perfetto::Category("host-critical-trace") \
|
||||
.SetDescription("Host-side critical traces"), \
|
||||
perfetto::Category("device-critical-trace") \
|
||||
.SetDescription("Device-side critical traces")
|
||||
#else
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("host").SetDescription("Host-side function tracing"), \
|
||||
perfetto::Category("device").SetDescription("Device-side function tracing"), \
|
||||
perfetto::Category("host-critical-trace") \
|
||||
.SetDescription("Host-side critical traces"), \
|
||||
perfetto::Category("device-critical-trace") \
|
||||
.SetDescription("Device-side critical traces"), \
|
||||
perfetto::Category("timemory") \
|
||||
.SetDescription("Events from the timemory API")
|
||||
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
|
||||
#endif
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
|
||||
#endif
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
|
||||
{
|
||||
public:
|
||||
void OnSetup(const SetupArgs&) override
|
||||
{
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
PRINT_HERE("%s", "setup");
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs&) override
|
||||
{
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
PRINT_HERE("%s", "start");
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs&) override
|
||||
{
|
||||
// Undo any initialization done in OnStart.
|
||||
PRINT_HERE("%s", "stop");
|
||||
}
|
||||
|
||||
// Data sources can also have per-instance state.
|
||||
int my_custom_state = 0;
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
@@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "PTL/PTL.hh"
|
||||
#include "timemory/macros/attributes.hpp"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace tasking
|
||||
{
|
||||
std::mutex&
|
||||
get_roctracer_mutex();
|
||||
|
||||
PTL::ThreadPool&
|
||||
get_roctracer_thread_pool();
|
||||
|
||||
PTL::TaskGroup<void>&
|
||||
get_roctracer_task_group();
|
||||
|
||||
std::mutex&
|
||||
get_critical_trace_mutex();
|
||||
|
||||
PTL::ThreadPool&
|
||||
get_critical_trace_thread_pool();
|
||||
|
||||
PTL::TaskGroup<void>&
|
||||
get_critical_trace_task_group();
|
||||
} // namespace tasking
|
||||
+27
@@ -1,3 +1,30 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/dynamic_library.hpp"
|
||||
#include "library/perfetto.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
#include "library/roctracer.hpp"
|
||||
|
||||
#include <roctracer.h>
|
||||
#include <roctracer_ext.h>
|
||||
#include <roctracer_hcc.h>
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
#define AMD_INTERNAL_BUILD 1
|
||||
#include <ext/hsa_rt_utils.hpp>
|
||||
#include <roctracer_hsa.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define ROCTRACER_CALL(call) \
|
||||
do \
|
||||
{ \
|
||||
int err = call; \
|
||||
if(err != 0) \
|
||||
{ \
|
||||
std::cerr << roctracer_error_string() << " in: " << #call << std::flush; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
using hsa_timer_t = hsa_rt_utils::Timer;
|
||||
using timestamp_t = hsa_timer_t::timestamp_t;
|
||||
using roctracer_bundle_t = tim::component_bundle<hosttrace, comp::roctracer_data,
|
||||
comp::wall_clock, quirk::explicit_pop>;
|
||||
using roctracer_hsa_bundle_t = tim::component_bundle<hosttrace, comp::roctracer_data>;
|
||||
using roctracer_functions_t = std::vector<std::pair<std::string, std::function<void()>>>;
|
||||
|
||||
std::unique_ptr<hsa_timer_t>&
|
||||
get_hsa_timer();
|
||||
|
||||
// HSA API callback function
|
||||
void
|
||||
hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
|
||||
|
||||
void
|
||||
hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg);
|
||||
|
||||
void
|
||||
hip_exec_activity_callbacks(int64_t _tid);
|
||||
|
||||
// HIP API callback function
|
||||
void
|
||||
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
|
||||
|
||||
// Activity tracing callback
|
||||
void
|
||||
hip_activity_callback(const char* begin, const char* end, void*);
|
||||
|
||||
bool&
|
||||
roctracer_is_setup();
|
||||
|
||||
roctracer_functions_t&
|
||||
roctracer_setup_routines();
|
||||
|
||||
roctracer_functions_t&
|
||||
roctracer_tear_down_routines();
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
// used for specifying the state of hosttrace
|
||||
enum class State : unsigned short
|
||||
{
|
||||
DelayedInit = 0,
|
||||
PreInit,
|
||||
Active,
|
||||
Finalized
|
||||
};
|
||||
@@ -0,0 +1,126 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library/config.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#if !defined(HOSTTRACE_MAX_THREADS)
|
||||
# define HOSTTRACE_MAX_THREADS 1024
|
||||
#endif
|
||||
|
||||
static constexpr size_t max_supported_threads = HOSTTRACE_MAX_THREADS;
|
||||
|
||||
template <typename Tp, typename Tag = void, size_t MaxThreads = max_supported_threads>
|
||||
struct hosttrace_thread_data
|
||||
{
|
||||
using instance_array_t = std::array<std::unique_ptr<Tp>, MaxThreads>;
|
||||
using construct_on_init = std::true_type;
|
||||
|
||||
template <typename... Args>
|
||||
static void construct(Args&&...);
|
||||
static std::unique_ptr<Tp>& instance();
|
||||
static instance_array_t& instances();
|
||||
template <typename... Args>
|
||||
static std::unique_ptr<Tp>& instance(construct_on_init, Args&&...);
|
||||
template <typename... Args>
|
||||
static instance_array_t& instances(construct_on_init, Args&&...);
|
||||
};
|
||||
|
||||
template <typename Tp, typename Tag, size_t MaxThreads>
|
||||
template <typename... Args>
|
||||
void
|
||||
hosttrace_thread_data<Tp, Tag, MaxThreads>::construct(Args&&... _args)
|
||||
{
|
||||
static thread_local bool _v = [&_args...]() {
|
||||
instances().at(threading::get_id()) =
|
||||
std::make_unique<Tp>(std::forward<Args>(_args)...);
|
||||
return true;
|
||||
}();
|
||||
(void) _v;
|
||||
}
|
||||
|
||||
template <typename Tp, typename Tag, size_t MaxThreads>
|
||||
std::unique_ptr<Tp>&
|
||||
hosttrace_thread_data<Tp, Tag, MaxThreads>::instance()
|
||||
{
|
||||
return instances().at(threading::get_id());
|
||||
}
|
||||
|
||||
template <typename Tp, typename Tag, size_t MaxThreads>
|
||||
typename hosttrace_thread_data<Tp, Tag, MaxThreads>::instance_array_t&
|
||||
hosttrace_thread_data<Tp, Tag, MaxThreads>::instances()
|
||||
{
|
||||
static auto _v = instance_array_t{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
template <typename Tp, typename Tag, size_t MaxThreads>
|
||||
template <typename... Args>
|
||||
std::unique_ptr<Tp>&
|
||||
hosttrace_thread_data<Tp, Tag, MaxThreads>::instance(construct_on_init, Args&&... _args)
|
||||
{
|
||||
construct(std::forward<Args>(_args)...);
|
||||
return instances().at(threading::get_id());
|
||||
}
|
||||
|
||||
template <typename Tp, typename Tag, size_t MaxThreads>
|
||||
template <typename... Args>
|
||||
typename hosttrace_thread_data<Tp, Tag, MaxThreads>::instance_array_t&
|
||||
hosttrace_thread_data<Tp, Tag, MaxThreads>::instances(construct_on_init, Args&&... _args)
|
||||
{
|
||||
static auto _v = [&]() {
|
||||
auto _internal = instance_array_t{};
|
||||
for(size_t i = 0; i < MaxThreads; ++i)
|
||||
_internal.at(i) = std::make_unique<Tp>(std::forward<Args>(_args)...);
|
||||
return _internal;
|
||||
}();
|
||||
return _v;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
// there are currently some strange things that happen with
|
||||
// vector<instrumentation_bundle_t> so using vector<instrumentation_bundle_t*> and
|
||||
// timemory's ring_buffer_allocator to create contiguous memory-page aligned instances of
|
||||
// the bundle
|
||||
struct instrumentation_bundles
|
||||
{
|
||||
using instance_array_t = std::array<instrumentation_bundles, max_supported_threads>;
|
||||
|
||||
bundle_allocator_t allocator{};
|
||||
std::vector<instrumentation_bundle_t*> bundles{};
|
||||
|
||||
static instance_array_t& instances();
|
||||
};
|
||||
@@ -0,0 +1,63 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <timemory/api.hpp>
|
||||
#include <timemory/backends/mpi.hpp>
|
||||
#include <timemory/backends/process.hpp>
|
||||
#include <timemory/backends/threading.hpp>
|
||||
#include <timemory/components.hpp>
|
||||
#include <timemory/components/gotcha/mpip.hpp>
|
||||
#include <timemory/components/papi/papi_tuple.hpp>
|
||||
#include <timemory/config.hpp>
|
||||
#include <timemory/environment.hpp>
|
||||
#include <timemory/manager.hpp>
|
||||
#include <timemory/mpl/apply.hpp>
|
||||
#include <timemory/operations.hpp>
|
||||
#include <timemory/runtime.hpp>
|
||||
#include <timemory/settings.hpp>
|
||||
#include <timemory/storage.hpp>
|
||||
#include <timemory/variadic.hpp>
|
||||
|
||||
namespace audit = tim::audit;
|
||||
namespace comp = tim::component;
|
||||
namespace quirk = tim::quirk;
|
||||
namespace threading = tim::threading;
|
||||
namespace scope = tim::scope;
|
||||
namespace dmp = tim::dmp;
|
||||
namespace process = tim::process;
|
||||
namespace units = tim::units;
|
||||
namespace trait = tim::trait;
|
||||
|
||||
// same sort of functionality as python's " ".join([...])
|
||||
#if !defined(JOIN)
|
||||
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
using papi_tot_ins = comp::papi_tuple<PAPI_TOT_INS>;
|
||||
@@ -0,0 +1,29 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
@@ -1,27 +1,30 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2020, The Regents of the University of California,
|
||||
// through Lawrence Berkeley National Laboratory (subject to receipt of any
|
||||
// required approvals from the U.S. Dept. of Energy). All rights reserved.
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "hosttrace.hpp"
|
||||
|
||||
@@ -1183,7 +1186,7 @@ main(int argc, char** argv)
|
||||
auto mpie_fini_args = hosttrace_call_expr("HOSTTRACE_MPI_FINALIZE", "OFF");
|
||||
auto trace_call_args =
|
||||
hosttrace_call_expr("HOSTTRACE_COMPONENTS", default_components);
|
||||
auto use_mpi_call_args = hosttrace_call_expr("HOSTTRACE_USE_MPI", "ON");
|
||||
auto use_mpi_call_args = hosttrace_call_expr("HOSTTRACE_USE_PID", "ON");
|
||||
auto use_mpip_call_args = hosttrace_call_expr(
|
||||
"HOSTTRACE_USE_MPIP", (binary_rewrite && use_mpi && use_mpip) ? "ON" : "OFF");
|
||||
auto none_call_args = hosttrace_call_expr();
|
||||
@@ -1777,7 +1780,7 @@ main(int argc, char** argv)
|
||||
const auto& outf = outfile;
|
||||
if(outf.find('/') != string_t::npos)
|
||||
{
|
||||
auto outdir = outf.substr(0, outf.find_last_of('/') - 1);
|
||||
auto outdir = outf.substr(0, outf.find_last_of('/'));
|
||||
tim::makedir(outdir);
|
||||
}
|
||||
|
||||
|
||||
+20
-20
@@ -1,26 +1,30 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2020, The Regents of the University of California,
|
||||
// through Lawrence Berkeley National Laboratory (subject to receipt of any
|
||||
// required approvals from the U.S. Dept. of Energy). All rights reserved.
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "hosttrace.hpp"
|
||||
|
||||
@@ -47,7 +51,7 @@ get_loop_file_line_info(module_t* mutatee_module, procedure_t* f, flow_graph_t*
|
||||
|
||||
char fname[MUTNAMELEN];
|
||||
char mname[MUTNAMELEN];
|
||||
const char* typeName = nullptr;
|
||||
std::string typeName = {};
|
||||
|
||||
mutatee_module->getName(mname, MUTNAMELEN);
|
||||
|
||||
@@ -73,8 +77,6 @@ get_loop_file_line_info(module_t* mutatee_module, procedure_t* f, flow_graph_t*
|
||||
{
|
||||
typeName = returnType->getName();
|
||||
}
|
||||
else
|
||||
typeName = "void";
|
||||
|
||||
auto params = f->getParams();
|
||||
std::vector<string_t> _params;
|
||||
@@ -148,8 +150,8 @@ get_func_file_line_info(module_t* mutatee_module, procedure_t* f)
|
||||
char fname[MUTNAMELEN];
|
||||
char mname[MUTNAMELEN];
|
||||
int row1, col1, row2, col2;
|
||||
string_t filename;
|
||||
string_t typeName;
|
||||
string_t filename = {};
|
||||
string_t typeName = {};
|
||||
|
||||
mutatee_module->getName(mname, MUTNAMELEN);
|
||||
|
||||
@@ -164,8 +166,6 @@ get_func_file_line_info(module_t* mutatee_module, procedure_t* f)
|
||||
{
|
||||
typeName = returnType->getName();
|
||||
}
|
||||
else
|
||||
typeName = "void";
|
||||
|
||||
auto params = f->getParams();
|
||||
std::vector<string_t> _params;
|
||||
@@ -1,51 +1,39 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library.hpp"
|
||||
|
||||
bool
|
||||
get_debug()
|
||||
{
|
||||
static bool _v = tim::get_env("HOSTTRACE_DEBUG", false);
|
||||
return _v;
|
||||
}
|
||||
|
||||
State&
|
||||
get_state()
|
||||
{
|
||||
static State _v{ State::PreInit };
|
||||
return _v;
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_perfetto()
|
||||
{
|
||||
// if using timemory, default to perfetto being off
|
||||
static auto _default_v = !tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", false, false);
|
||||
// explicit env control for using perfetto
|
||||
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_PERFETTO", _default_v);
|
||||
return _v;
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_timemory()
|
||||
{
|
||||
// default to opposite of whether perfetto setting
|
||||
// to use both timemory and perfetto, both HOSTTRACE_USE_TIMEMORY and
|
||||
// HOSTTRACE_USE_PERFETTO must be true
|
||||
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", !get_use_perfetto());
|
||||
return _v;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
#include "library/config.hpp"
|
||||
#include "library/critical_trace.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include <string_view>
|
||||
|
||||
namespace
|
||||
{
|
||||
size_t&
|
||||
get_sample_rate()
|
||||
{
|
||||
static auto _v = tim::get_env<size_t>("HOSTTRACE_SAMPLE_RATE", 1);
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::vector<bool>&
|
||||
get_sample_data()
|
||||
{
|
||||
@@ -53,17 +41,6 @@ get_sample_data()
|
||||
return _v;
|
||||
}
|
||||
|
||||
bool&
|
||||
get_use_mpi()
|
||||
{
|
||||
#if defined(TIMEMORY_USE_MPI)
|
||||
static bool _v = tim::get_env("HOSTTRACE_USE_MPI", false);
|
||||
#else
|
||||
static bool _v = false;
|
||||
#endif
|
||||
return _v;
|
||||
}
|
||||
|
||||
void
|
||||
setup_gotchas()
|
||||
{
|
||||
@@ -83,16 +60,17 @@ setup_gotchas()
|
||||
mpi_gotcha_t::template configure<0, int, int*, char***>("MPI_Init");
|
||||
mpi_gotcha_t::template configure<1, int, int*, char***, int, int*>(
|
||||
"MPI_Init_thread");
|
||||
#if defined(HOSTTRACE_USE_MPI_HEADERS)
|
||||
mpi_gotcha_t::template configure<3, int>("MPI_Finalize");
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
auto
|
||||
ensure_finalization()
|
||||
ensure_finalization(bool _static_init = false)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
if(!_static_init)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
}
|
||||
return scope::destructor{ []() { hosttrace_trace_finalize(); } };
|
||||
}
|
||||
|
||||
@@ -103,45 +81,6 @@ get_trace_session()
|
||||
return _session;
|
||||
}
|
||||
|
||||
auto
|
||||
get_perfetto_output_filename()
|
||||
{
|
||||
static auto _v = []() {
|
||||
// default name: perfetto-trace.<pid>.proto or perfetto-trace.<rank>.proto
|
||||
auto _default_fname = tim::settings::compose_output_filename(
|
||||
JOIN('.', "perfetto-trace", (get_use_mpi()) ? "%rank%" : "%pid%"), "proto");
|
||||
// have the default display the full path to the output file
|
||||
return tim::get_env<std::string>(
|
||||
"HOSTTRACE_OUTPUT_FILE",
|
||||
JOIN('/', tim::get_env<std::string>("PWD", ".", false), _default_fname));
|
||||
}();
|
||||
|
||||
auto _tmp = _v;
|
||||
auto _replace = [&_tmp](const std::string& _key, auto&& _val) {
|
||||
auto _pos = _tmp.find(_key);
|
||||
if(_pos != std::string::npos)
|
||||
_tmp.replace(_pos, _key.length(), std::to_string(_val()));
|
||||
};
|
||||
_replace("%pid%", []() { return process::get_id(); });
|
||||
_replace("%rank%", []() { return tim::mpi::rank(); });
|
||||
// backwards compatibility
|
||||
_replace("%p", []() { return process::get_id(); });
|
||||
return _tmp;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_backend()
|
||||
{
|
||||
// select inprocess, system, or both (i.e. all)
|
||||
static auto _v = tim::get_env_choice<std::string>(
|
||||
"HOSTTRACE_BACKEND",
|
||||
tim::get_env("HOSTTRACE_BACKEND_SYSTEM", false, false)
|
||||
? "system" // if HOSTTRACE_BACKEND_SYSTEM is true, default to system.
|
||||
: "inprocess", // Otherwise, default to inprocess
|
||||
{ "inprocess", "system", "all" });
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto
|
||||
is_system_backend()
|
||||
{
|
||||
@@ -150,10 +89,10 @@ is_system_backend()
|
||||
}
|
||||
|
||||
auto&
|
||||
get_timemory_data()
|
||||
get_instrumentation_bundles()
|
||||
{
|
||||
static thread_local auto& _v =
|
||||
hosttrace_timemory_data::instances().at(threading::get_id());
|
||||
instrumentation_bundles::instances().at(threading::get_id());
|
||||
return _v;
|
||||
}
|
||||
|
||||
@@ -166,6 +105,17 @@ get_functors()
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_cpu_cid_parents()
|
||||
{
|
||||
static thread_local auto _v =
|
||||
std::unordered_map<uint64_t, std::tuple<uint64_t, uint16_t>>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
using Device = critical_trace::Device;
|
||||
using Phase = critical_trace::Phase;
|
||||
|
||||
bool
|
||||
hosttrace_init_tooling()
|
||||
{
|
||||
@@ -184,26 +134,17 @@ hosttrace_init_tooling()
|
||||
return false;
|
||||
}
|
||||
|
||||
// always initialize timemory because gotcha wrappers are always used
|
||||
tim::settings::flamegraph_output() = false;
|
||||
tim::settings::cout_output() = false;
|
||||
tim::settings::file_output() = true;
|
||||
tim::settings::enable_signal_handler() = true;
|
||||
tim::settings::collapse_processes() = false;
|
||||
tim::settings::collapse_threads() = false;
|
||||
tim::settings::max_thread_bookmarks() = 1;
|
||||
tim::settings::global_components() = tim::get_env<std::string>(
|
||||
"HOSTTRACE_COMPONENTS", "wall_clock", get_use_timemory());
|
||||
int _threadpool_verbose = (get_debug()) ? 4 : -1;
|
||||
tasking::get_roctracer_thread_pool().set_verbose(_threadpool_verbose);
|
||||
tasking::get_critical_trace_thread_pool().set_verbose(_threadpool_verbose);
|
||||
|
||||
// enable timestamp directories when perfetto + mpi is activated
|
||||
if(get_use_perfetto() && get_use_mpi()) tim::settings::time_output() = true;
|
||||
// below will effectively do:
|
||||
// get_cpu_cid_stack(0)->emplace_back(-1);
|
||||
// plus query some env variables
|
||||
add_critical_trace<Device::CPU, Phase::NONE>(0, -1, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
auto _cmd = tim::read_command_line(process::get_id());
|
||||
auto _exe = (_cmd.empty()) ? "hosttrace" : _cmd.front();
|
||||
auto _pos = _exe.find_last_of('/');
|
||||
if(_pos < _exe.length() - 1) _exe = _exe.substr(_pos + 1);
|
||||
|
||||
tim::timemory_init({ _exe }, "hosttrace-");
|
||||
// configure the settings
|
||||
configure_settings();
|
||||
|
||||
if(get_sample_rate() < 1) get_sample_rate() = 1;
|
||||
get_sample_data().reserve(512);
|
||||
@@ -218,16 +159,18 @@ hosttrace_init_tooling()
|
||||
if(_comps.size() == 1 && _comps.find(TIMEMORY_WALL_CLOCK) != _comps.end())
|
||||
{
|
||||
// using wall_clock directly is lower overhead than using it via user_bundle
|
||||
bundle_t::get_initializer() = [](bundle_t& _bundle) {
|
||||
_bundle.initialize<comp::wall_clock>();
|
||||
};
|
||||
instrumentation_bundle_t::get_initializer() =
|
||||
[](instrumentation_bundle_t& _bundle) {
|
||||
_bundle.initialize<comp::wall_clock>();
|
||||
};
|
||||
}
|
||||
else if(!_comps.empty())
|
||||
{
|
||||
// use user_bundle for other than wall-clock
|
||||
bundle_t::get_initializer() = [](bundle_t& _bundle) {
|
||||
_bundle.initialize<comp::user_global_bundle>();
|
||||
};
|
||||
instrumentation_bundle_t::get_initializer() =
|
||||
[](instrumentation_bundle_t& _bundle) {
|
||||
_bundle.initialize<comp::user_global_bundle>();
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -252,9 +195,8 @@ hosttrace_init_tooling()
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
// environment settings
|
||||
auto shmem_size_hint =
|
||||
tim::get_env<size_t>("HOSTTRACE_SHMEM_SIZE_HINT_KB", 40960);
|
||||
auto buffer_size = tim::get_env<size_t>("HOSTTRACE_BUFFER_SIZE_KB", 1024000);
|
||||
auto shmem_size_hint = get_perfetto_shmem_size_hint();
|
||||
auto buffer_size = get_perfetto_buffer_size();
|
||||
|
||||
auto* buffer_config = cfg.add_buffers();
|
||||
buffer_config->set_size_kb(buffer_size);
|
||||
@@ -276,6 +218,7 @@ hosttrace_init_tooling()
|
||||
(void) get_perfetto_output_filename();
|
||||
}
|
||||
|
||||
auto _exe = get_exe_name();
|
||||
static auto _thread_init = [_exe]() {
|
||||
hosttrace_thread_data<hosttrace_thread_bundle_t>::construct(
|
||||
TIMEMORY_JOIN("", _exe, "/thread-", threading::get_id()),
|
||||
@@ -285,10 +228,11 @@ hosttrace_init_tooling()
|
||||
} };
|
||||
(void) _dtor;
|
||||
};
|
||||
|
||||
// functors for starting and stopping timemory
|
||||
static auto _push_timemory = [](const char* name) {
|
||||
_thread_init();
|
||||
auto& _data = get_timemory_data();
|
||||
auto& _data = get_instrumentation_bundles();
|
||||
// this generates a hash for the raw string array
|
||||
auto _hash = tim::add_hash_id(tim::string_view_t{ name });
|
||||
auto* _bundle = _data.allocator.allocate(1);
|
||||
@@ -311,7 +255,7 @@ hosttrace_init_tooling()
|
||||
};
|
||||
|
||||
static auto _pop_timemory = [](const char* name) {
|
||||
auto& _data = get_timemory_data();
|
||||
auto& _data = get_instrumentation_bundles();
|
||||
if(_data.bundles.empty())
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] skipped %s :: empty bundle stack\n",
|
||||
@@ -358,8 +302,22 @@ hosttrace_init_tooling()
|
||||
|
||||
if(dmp::rank() == 0)
|
||||
{
|
||||
tim::print_env(std::cerr,
|
||||
[](const std::string& _v) { return _v.find("HOSTTRACE_") == 0; });
|
||||
// generic filter for filtering relevant options
|
||||
auto _is_hosttrace_option = [](const auto& _v) {
|
||||
#if !defined(HOSTTRACE_USE_ROCTRACER)
|
||||
if(_v.find("HOSTTRACE_ROCTRACER_") == 0) return false;
|
||||
#endif
|
||||
if(!get_use_critical_trace() && _v.find("HOSTTRACE_CRITICAL_TRACE_") == 0)
|
||||
return false;
|
||||
return (_v.find("HOSTTRACE_") == 0) ||
|
||||
((_v.find("TIMEMORY_") != 0) && (_v.find("SIGNAL_") != 0));
|
||||
};
|
||||
|
||||
tim::print_env(std::cerr, [_is_hosttrace_option](const std::string& _v) {
|
||||
return _is_hosttrace_option(_v);
|
||||
});
|
||||
|
||||
print_config_settings(std::cerr, _is_hosttrace_option);
|
||||
}
|
||||
|
||||
if(get_use_perfetto() && !is_system_backend())
|
||||
@@ -421,6 +379,21 @@ extern "C"
|
||||
auto _enabled = (_sample_idx++ % _sample_rate == 0);
|
||||
get_sample_data().emplace_back(_enabled);
|
||||
if(_enabled) get_functors().first(name);
|
||||
if(get_use_critical_trace())
|
||||
{
|
||||
auto _ts = comp::wall_clock::record();
|
||||
auto _cid = get_cpu_cid()++;
|
||||
uint16_t _depth = (get_cpu_cid_stack()->empty())
|
||||
? get_cpu_cid_stack(0)->size()
|
||||
: get_cpu_cid_stack()->size() - 1;
|
||||
auto _parent_cid = (get_cpu_cid_stack()->empty())
|
||||
? get_cpu_cid_stack(0)->back()
|
||||
: get_cpu_cid_stack()->back();
|
||||
get_cpu_cid_parents().emplace(_cid, std::make_tuple(_parent_cid, _depth));
|
||||
add_critical_trace<Device::CPU, Phase::BEGIN>(
|
||||
threading::get_id(), _cid, 0, _parent_cid, _ts, 0,
|
||||
critical_trace::add_hash_id(name), _depth);
|
||||
}
|
||||
}
|
||||
|
||||
void hosttrace_pop_trace(const char* name)
|
||||
@@ -434,6 +407,20 @@ extern "C"
|
||||
if(_sample_data.back()) get_functors().second(name);
|
||||
_sample_data.pop_back();
|
||||
}
|
||||
if(get_use_critical_trace())
|
||||
{
|
||||
if(get_cpu_cid_stack() && !get_cpu_cid_stack()->empty())
|
||||
{
|
||||
auto _ts = comp::wall_clock::record();
|
||||
auto _cid = get_cpu_cid_stack()->back();
|
||||
uint64_t _parent_cid = 0;
|
||||
uint16_t _depth = 0;
|
||||
std::tie(_parent_cid, _depth) = get_cpu_cid_parents().at(_cid);
|
||||
add_critical_trace<Device::CPU, Phase::END>(
|
||||
threading::get_id(), _cid, 0, _parent_cid, _ts, _ts,
|
||||
critical_trace::add_hash_id(name), _depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -463,15 +450,19 @@ extern "C"
|
||||
comp::roctracer::tear_down();
|
||||
#endif
|
||||
|
||||
// join extra thread(s) used by roctracer
|
||||
HOSTTRACE_DEBUG("[%s] waiting for all roctracer tasks to complete...\n",
|
||||
__FUNCTION__);
|
||||
tasking::get_roctracer_task_group().join();
|
||||
|
||||
// stop the main bundle and report the high-level metrics
|
||||
if(get_main_bundle())
|
||||
{
|
||||
get_main_bundle()->stop();
|
||||
int64_t _id = (get_use_mpi()) ? dmp::rank() : process::get_id();
|
||||
std::stringstream _ss{};
|
||||
_ss << "[" << __FUNCTION__ << "][" << _id << "] " << *get_main_bundle()
|
||||
<< "\n";
|
||||
std::cerr << _ss.str();
|
||||
std::string _msg = JOIN("", *get_main_bundle());
|
||||
auto _pos = _msg.find(">>> ");
|
||||
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
|
||||
HOSTTRACE_PRINT("%s\n", _msg.c_str());
|
||||
get_main_bundle().reset();
|
||||
}
|
||||
|
||||
@@ -484,14 +475,15 @@ extern "C"
|
||||
if(itr && itr->get<comp::wall_clock>() &&
|
||||
!itr->get<comp::wall_clock>()->get_is_running())
|
||||
{
|
||||
std::stringstream _ss{};
|
||||
_ss << *itr << "\n";
|
||||
std::cerr << _ss.str();
|
||||
std::string _msg = JOIN("", *itr);
|
||||
auto _pos = _msg.find(">>> ");
|
||||
if(_pos != std::string::npos) _msg = _msg.substr(_pos + 5);
|
||||
HOSTTRACE_PRINT("%s\n", _msg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// ensure that all the MT instances are flushed
|
||||
for(auto& itr : hosttrace_timemory_data::instances())
|
||||
for(auto& itr : instrumentation_bundles::instances())
|
||||
{
|
||||
while(!itr.bundles.empty())
|
||||
{
|
||||
@@ -503,6 +495,44 @@ extern "C"
|
||||
}
|
||||
}
|
||||
|
||||
if(get_use_critical_trace())
|
||||
{
|
||||
// increase the thread-pool size
|
||||
tasking::get_critical_trace_thread_pool().initialize_threadpool(
|
||||
get_critical_trace_num_threads());
|
||||
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
{
|
||||
using critical_trace_hash_data =
|
||||
hosttrace_thread_data<critical_trace::hash_ids, critical_trace::id>;
|
||||
|
||||
if(critical_trace_hash_data::instances().at(i))
|
||||
critical_trace::add_hash_id(
|
||||
*critical_trace_hash_data::instances().at(i));
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
{
|
||||
using critical_trace_chain_data =
|
||||
hosttrace_thread_data<critical_trace::call_chain>;
|
||||
|
||||
if(critical_trace_chain_data::instances().at(i))
|
||||
critical_trace::update(i); // launch update task
|
||||
}
|
||||
|
||||
// make sure outstanding hash tasks completed before compute
|
||||
HOSTTRACE_PRINT("[%s] waiting for all critical trace tasks to complete...\n",
|
||||
__FUNCTION__);
|
||||
tasking::get_critical_trace_task_group().join();
|
||||
|
||||
// launch compute task
|
||||
HOSTTRACE_PRINT("[%s] launching critical trace compute task...\n",
|
||||
__FUNCTION__);
|
||||
critical_trace::compute();
|
||||
}
|
||||
|
||||
tasking::get_critical_trace_task_group().join();
|
||||
|
||||
bool _perfetto_output_error = false;
|
||||
if(get_use_perfetto() && !is_system_backend())
|
||||
{
|
||||
@@ -530,20 +560,27 @@ extern "C"
|
||||
static_cast<double>(trace_data.size()) / units::KB,
|
||||
static_cast<double>(trace_data.size()) / units::MB,
|
||||
static_cast<double>(trace_data.size()) / units::GB);
|
||||
std::ofstream output{};
|
||||
output.open(get_perfetto_output_filename(), std::ios::out | std::ios::binary);
|
||||
if(!output)
|
||||
std::ofstream ofs{};
|
||||
if(!tim::filepath::open(ofs, get_perfetto_output_filename(),
|
||||
std::ios::out | std::ios::binary))
|
||||
{
|
||||
fprintf(stderr, "[%s]> Error opening '%s'...\n", __FUNCTION__,
|
||||
get_perfetto_output_filename().c_str());
|
||||
_perfetto_output_error = true;
|
||||
}
|
||||
else
|
||||
output.write(&trace_data[0], trace_data.size());
|
||||
output.close();
|
||||
ofs.write(&trace_data[0], trace_data.size());
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
// these should be destroyed before timemory is finalized, especially the
|
||||
// roctracer thread-pool
|
||||
tasking::get_roctracer_thread_pool().destroy_threadpool();
|
||||
tasking::get_critical_trace_thread_pool().destroy_threadpool();
|
||||
|
||||
HOSTTRACE_DEBUG("Finalizing timemory...\n");
|
||||
tim::timemory_finalize();
|
||||
HOSTTRACE_DEBUG("Finalizing timemory... Done\n");
|
||||
|
||||
if(_perfetto_output_error)
|
||||
throw std::runtime_error("Unable to create perfetto output file");
|
||||
@@ -564,20 +601,17 @@ extern "C"
|
||||
{
|
||||
auto& _main_bundle = get_main_bundle();
|
||||
_main_bundle->start();
|
||||
#if defined(TIMEMORY_USE_MPI)
|
||||
tim::set_env("HOSTTRACE_USE_MPI", "ON", 1);
|
||||
get_use_mpi() = true;
|
||||
#endif
|
||||
get_state() = State::DelayedInit;
|
||||
get_use_pid() = true;
|
||||
get_state() = State::DelayedInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<hosttrace_bundle_t>&
|
||||
std::unique_ptr<main_bundle_t>&
|
||||
get_main_bundle()
|
||||
{
|
||||
static auto _v =
|
||||
(setup_gotchas(), std::make_unique<hosttrace_bundle_t>(
|
||||
(setup_gotchas(), std::make_unique<main_bundle_t>(
|
||||
"hosttrace", quirk::config<quirk::auto_start>{}));
|
||||
return _v;
|
||||
}
|
||||
@@ -587,5 +621,5 @@ namespace
|
||||
// if static objects are destroyed randomly (relatively uncommon behavior)
|
||||
// this might call finalization before perfetto ends the tracing session
|
||||
// but static variable in hosttrace_init_tooling is more likely
|
||||
auto _ensure_finalization = ensure_finalization();
|
||||
auto _ensure_finalization = ensure_finalization(true);
|
||||
} // namespace
|
||||
|
||||
@@ -0,0 +1,528 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "timemory/backends/dmp.hpp"
|
||||
#include "timemory/backends/process.hpp"
|
||||
#include "timemory/settings/types.hpp"
|
||||
#include "timemory/utility/argparse.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <numeric>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <timemory/environment.hpp>
|
||||
#include <timemory/settings.hpp>
|
||||
|
||||
using settings = tim::settings;
|
||||
|
||||
namespace
|
||||
{
|
||||
auto
|
||||
get_config()
|
||||
{
|
||||
static auto _once = (configure_settings(), true);
|
||||
return settings::shared_instance();
|
||||
(void) _once;
|
||||
}
|
||||
|
||||
#define HOSTTRACE_CONFIG_SETTING(TYPE, ENV_NAME, DESCRIPTION, INITIAL_VALUE) \
|
||||
_config->insert<TYPE, TYPE>(ENV_NAME, ENV_NAME, DESCRIPTION, INITIAL_VALUE, \
|
||||
std::vector<std::string>{})
|
||||
} // namespace
|
||||
|
||||
void
|
||||
configure_settings()
|
||||
{
|
||||
static bool _once = false;
|
||||
if(_once) return;
|
||||
_once = true;
|
||||
|
||||
static auto _config = settings::shared_instance();
|
||||
// auto* _config = settings::instance();
|
||||
|
||||
// if using timemory, default to perfetto being off
|
||||
auto _default_perfetto_v =
|
||||
!tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", false, false);
|
||||
|
||||
auto _default_config_file =
|
||||
JOIN("/", tim::get_env<std::string>("HOME", "."), "hosttrace.cfg");
|
||||
|
||||
auto _system_backend = tim::get_env("HOSTTRACE_BACKEND_SYSTEM", false, false);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_CONFIG_FILE",
|
||||
"Configuration file of hosttrace and timemory settings",
|
||||
_default_config_file);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_DEBUG", "Enable debugging output",
|
||||
_config->get_debug());
|
||||
|
||||
auto _hosttrace_debug = _config->get<bool>("HOSTTRACE_DEBUG");
|
||||
if(_hosttrace_debug) tim::set_env("TIMEMORY_DEBUG_SETTINGS", "1", 0);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_USE_PERFETTO", "Enable perfetto backend",
|
||||
_default_perfetto_v);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_USE_TIMEMORY", "Enable timemory backend",
|
||||
!_config->get<bool>("HOSTTRACE_USE_PERFETTO"));
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
bool, "HOSTTRACE_USE_PID",
|
||||
"Enable tagging filenames with process identifier (either MPI rank or pid)",
|
||||
true);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
size_t, "HOSTTRACE_SAMPLE_RATE",
|
||||
"Counts every function call (N), only record function if (N % <VALUE> == 0)", 1);
|
||||
|
||||
auto _backend = tim::get_env_choice<std::string>(
|
||||
"HOSTTRACE_BACKEND",
|
||||
(_system_backend)
|
||||
? "system" // if HOSTTRACE_BACKEND_SYSTEM is true, default to system.
|
||||
: "inprocess", // Otherwise, default to inprocess
|
||||
{ "inprocess", "system", "all" }, false);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_BACKEND",
|
||||
"Specify the perfetto backend to activate. Options are: "
|
||||
"'inprocess', 'system', or 'all'",
|
||||
_backend);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_CRITICAL_TRACE",
|
||||
"Enable generation of the critical trace", false);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
bool, "HOSTTRACE_ROCTRACER_TIMELINE_PROFILE",
|
||||
"Create unique entries for every kernel with timemory backend",
|
||||
_config->get_timeline_profile());
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
bool, "HOSTTRACE_ROCTRACER_FLAT_PROFILE",
|
||||
"Ignore hierarchy in all kernels entries with timemory backend",
|
||||
_config->get_flat_profile());
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_ROCTRACER_HSA_ACTIVITY",
|
||||
"Enable HSA activity tracing support", false);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_ROCTRACER_HSA_API",
|
||||
"Enable HSA API tracing support", false);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_ROCTRACER_HSA_API_TYPES",
|
||||
"HSA API type to collect", "");
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_CRITICAL_TRACE_DEBUG",
|
||||
"Enable debugging for critical trace", _hosttrace_debug);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
bool, "HOSTTRACE_CRITICAL_TRACE_SERIALIZE_NAMES",
|
||||
"Include names in serialization of critical trace (mainly for debugging)",
|
||||
_hosttrace_debug);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(size_t, "HOSTTRACE_SHMEM_SIZE_HINT_KB",
|
||||
"Hint for shared-memory buffer size in perfetto (in KB)",
|
||||
40960);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(size_t, "HOSTTRACE_BUFFER_SIZE_KB",
|
||||
"Size of perfetto buffer (in KB)", 1024000);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(int64_t, "HOSTTRACE_CRITICAL_TRACE_COUNT",
|
||||
"Number of critical trace to export (0 == all)", 0);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(uint64_t, "HOSTTRACE_CRITICAL_TRACE_BUFFER_COUNT",
|
||||
"Number of critical trace records to store in thread-local "
|
||||
"memory before submitting to shared buffer",
|
||||
2000);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
uint64_t, "HOSTTRACE_CRITICAL_TRACE_NUM_THREADS",
|
||||
"Number of threads to use when generating the critical trace",
|
||||
std::min<uint64_t>(8, std::thread::hardware_concurrency()));
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
int64_t, "HOSTTRACE_CRITICAL_TRACE_PER_ROW",
|
||||
"How many critical traces per row in perfetto (0 == all in one row)", 0);
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(
|
||||
std::string, "HOSTTRACE_COMPONENTS",
|
||||
"List of components to collect via timemory (see timemory-avail)", "wall_clock");
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(std::string, "HOSTTRACE_OUTPUT_FILE", "Perfetto filename",
|
||||
"");
|
||||
|
||||
HOSTTRACE_CONFIG_SETTING(bool, "HOSTTRACE_SETTINGS_DESC",
|
||||
"Provide descriptions when printing settings", false);
|
||||
|
||||
_config->get_flamegraph_output() = false;
|
||||
_config->get_cout_output() = false;
|
||||
_config->get_file_output() = true;
|
||||
_config->get_json_output() = true;
|
||||
_config->get_tree_output() = true;
|
||||
_config->get_enable_signal_handler() = true;
|
||||
_config->get_collapse_processes() = false;
|
||||
_config->get_collapse_threads() = false;
|
||||
_config->get_stack_clearing() = false;
|
||||
_config->get_time_output() = true;
|
||||
_config->get_timing_precision() = 6;
|
||||
|
||||
for(auto&& itr :
|
||||
tim::delimit(_config->get<std::string>("HOSTTRACE_CONFIG_FILE"), ";:"))
|
||||
{
|
||||
HOSTTRACE_CONDITIONAL_BASIC_PRINT(true, "Reading config file %s\n", itr.c_str());
|
||||
_config->read(itr);
|
||||
}
|
||||
|
||||
_config->get_global_components() = _config->get<std::string>("HOSTTRACE_COMPONENTS");
|
||||
|
||||
// always initialize timemory because gotcha wrappers are always used
|
||||
auto _cmd = tim::read_command_line(process::get_id());
|
||||
auto _exe = (_cmd.empty()) ? "exe" : _cmd.front();
|
||||
auto _pos = _exe.find_last_of('/');
|
||||
if(_pos < _exe.length() - 1) _exe = _exe.substr(_pos + 1);
|
||||
get_exe_name() = _exe;
|
||||
|
||||
scope::get_fields()[scope::flat::value] = tim::settings::flat_profile();
|
||||
scope::get_fields()[scope::timeline::value] = tim::settings::timeline_profile();
|
||||
|
||||
bool _found_sep = false;
|
||||
for(const auto& itr : _cmd)
|
||||
{
|
||||
if(itr == "--") _found_sep = true;
|
||||
}
|
||||
if(!_found_sep && _cmd.size() > 1) _cmd.insert(_cmd.begin() + 1, "--");
|
||||
|
||||
using argparser_t = tim::argparse::argument_parser;
|
||||
argparser_t _parser{ _exe };
|
||||
tim::timemory_init(_cmd, _parser, "hosttrace-");
|
||||
|
||||
settings::suppress_parsing() = true;
|
||||
settings::suppress_config() = true;
|
||||
settings::use_output_suffix() = _config->get<bool>("HOSTTRACE_USE_PID");
|
||||
}
|
||||
|
||||
void
|
||||
print_config_settings(std::ostream& _os,
|
||||
std::function<bool(const std::string_view&)>&& _filter)
|
||||
{
|
||||
auto _flags = _os.flags();
|
||||
|
||||
constexpr size_t nfields = 3;
|
||||
using str_array_t = std::array<std::string, nfields>;
|
||||
std::vector<str_array_t> _data{};
|
||||
std::array<size_t, nfields> _widths{};
|
||||
_widths.fill(0);
|
||||
for(const auto& itr : *get_config())
|
||||
{
|
||||
if(_filter(itr.first))
|
||||
{
|
||||
auto _disp = itr.second->get_display(std::ios::boolalpha);
|
||||
_data.emplace_back(str_array_t{ _disp.at("name"), _disp.at("value"),
|
||||
_disp.at("description") });
|
||||
for(size_t i = 0; i < nfields; ++i)
|
||||
_widths.at(i) =
|
||||
std::max<size_t>(_widths.at(i), _data.back().at(i).length());
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(_data.begin(), _data.end(), [](const auto& lhs, const auto& rhs) {
|
||||
auto _npos = std::string::npos;
|
||||
// HOSTTRACE_CONFIG_FILE always first
|
||||
if(lhs.at(0).find("HOSTTRACE_CONFIG") != _npos) return true;
|
||||
if(rhs.at(0).find("HOSTTRACE_CONFIG") != _npos) return false;
|
||||
// HOSTTRACE_USE_* prioritized
|
||||
auto _lhs_use = lhs.at(0).find("HOSTTRACE_USE_");
|
||||
auto _rhs_use = rhs.at(0).find("HOSTTRACE_USE_");
|
||||
if(_lhs_use != _rhs_use && _lhs_use < _rhs_use) return true;
|
||||
if(_lhs_use != _rhs_use && _lhs_use > _rhs_use) return false;
|
||||
// length sort followed by alphabetical sort
|
||||
return (lhs.at(0).length() == rhs.at(0).length())
|
||||
? (lhs.at(0) < rhs.at(0))
|
||||
: (lhs.at(0).length() < rhs.at(0).length());
|
||||
});
|
||||
|
||||
bool _print_desc = get_debug() || get_config()->get<bool>("HOSTTRACE_SETTINGS_DESC");
|
||||
|
||||
auto tot_width = std::accumulate(_widths.begin(), _widths.end(), 0);
|
||||
if(!_print_desc) tot_width -= _widths.back() + 4;
|
||||
|
||||
std::stringstream _spacer{};
|
||||
_spacer.fill('-');
|
||||
_spacer << "#" << std::setw(tot_width + 11) << ""
|
||||
<< "#";
|
||||
_os << _spacer.str() << "\n";
|
||||
// _os << "# Hosttrace settings:" << std::setw(tot_width - 8) << "#" << "\n";
|
||||
for(const auto& itr : _data)
|
||||
{
|
||||
_os << "# ";
|
||||
for(size_t i = 0; i < nfields; ++i)
|
||||
{
|
||||
switch(i)
|
||||
{
|
||||
case 0: _os << std::left; break;
|
||||
case 1: _os << std::left; break;
|
||||
case 2: _os << std::left; break;
|
||||
}
|
||||
_os << std::setw(_widths.at(i)) << itr.at(i) << " ";
|
||||
if(!_print_desc && i == 1) break;
|
||||
switch(i)
|
||||
{
|
||||
case 0: _os << "= "; break;
|
||||
case 1: _os << "[ "; break;
|
||||
case 2: _os << "]"; break;
|
||||
}
|
||||
}
|
||||
_os << " #\n";
|
||||
}
|
||||
_os << _spacer.str() << "\n";
|
||||
|
||||
_os.setf(_flags);
|
||||
}
|
||||
|
||||
std::string&
|
||||
get_exe_name()
|
||||
{
|
||||
static std::string _v = {};
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::string
|
||||
get_config_file()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_CONFIG_FILE");
|
||||
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_debug()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_DEBUG");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_perfetto()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_USE_PERFETTO");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_timemory()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_USE_TIMEMORY");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool&
|
||||
get_use_pid()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_USE_PID");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_mpip()
|
||||
{
|
||||
static bool _v = tim::get_env("HOSTTRACE_USE_MPIP", false, false);
|
||||
return _v;
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_critical_trace()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_critical_trace_debug()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_DEBUG");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_critical_trace_serialize_names()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_SERIALIZE_NAMES");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_roctracer_timeline_profile()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_TIMELINE_PROFILE");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_roctracer_flat_profile()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_FLAT_PROFILE");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_trace_hsa_api()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_HSA_API");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_trace_hsa_activity()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_ROCTRACER_HSA_ACTIVITY");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_critical_trace_per_row()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_PER_ROW");
|
||||
return static_cast<tim::tsettings<int64_t>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
size_t
|
||||
get_perfetto_shmem_size_hint()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_SHMEM_SIZE_HINT_KB");
|
||||
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
size_t
|
||||
get_perfetto_buffer_size()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_BUFFER_SIZE_KB");
|
||||
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
get_critical_trace_update_freq()
|
||||
{
|
||||
static uint64_t _v =
|
||||
get_config()->get<uint64_t>("HOSTTRACE_CRITICAL_TRACE_BUFFER_COUNT");
|
||||
return _v;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
get_critical_trace_num_threads()
|
||||
{
|
||||
static uint64_t _v =
|
||||
get_config()->get<uint64_t>("HOSTTRACE_CRITICAL_TRACE_NUM_THREADS");
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::string
|
||||
get_trace_hsa_api_types()
|
||||
{
|
||||
static std::string _v =
|
||||
get_config()->get<std::string>("HOSTTRACE_ROCTRACER_HSA_API_TYPES");
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::string&
|
||||
get_backend()
|
||||
{
|
||||
// select inprocess, system, or both (i.e. all)
|
||||
static auto _v = get_config()->find("HOSTTRACE_BACKEND");
|
||||
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
std::string
|
||||
get_perfetto_output_filename()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_OUTPUT_FILE");
|
||||
static auto& _t = static_cast<tim::tsettings<std::string>&>(*_v->second);
|
||||
if(_t.get().empty())
|
||||
{
|
||||
// default name: perfetto-trace.<pid>.proto or perfetto-trace.<rank>.proto
|
||||
auto _default_fname = settings::compose_output_filename(
|
||||
"perfetto-trace", "proto", get_use_pid(),
|
||||
(tim::dmp::is_initialized()) ? tim::dmp::rank() : process::get_id());
|
||||
auto _pid_patch = std::string{ "/" } + std::to_string(tim::process::get_id()) +
|
||||
"-perfetto-trace";
|
||||
auto _dpos = _default_fname.find(_pid_patch);
|
||||
if(_dpos != std::string::npos)
|
||||
_default_fname =
|
||||
_default_fname.replace(_dpos, _pid_patch.length(), "/perfetto-trace");
|
||||
// have the default display the full path to the output file
|
||||
_t.set(tim::get_env<std::string>(
|
||||
"HOSTTRACE_OUTPUT_FILE",
|
||||
JOIN('/', tim::get_env<std::string>("PWD", ".", false), _default_fname),
|
||||
false));
|
||||
}
|
||||
return _t.get();
|
||||
}
|
||||
|
||||
size_t&
|
||||
get_sample_rate()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_SAMPLE_RATE");
|
||||
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
int64_t
|
||||
get_critical_trace_count()
|
||||
{
|
||||
static auto _v = get_config()->find("HOSTTRACE_CRITICAL_TRACE_COUNT");
|
||||
return static_cast<tim::tsettings<int64_t>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
State&
|
||||
get_state()
|
||||
{
|
||||
static State _v{ State::PreInit };
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::atomic<uint64_t>&
|
||||
get_cpu_cid()
|
||||
{
|
||||
static std::atomic<uint64_t> _v{ 0 };
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::unique_ptr<std::vector<uint64_t>>&
|
||||
get_cpu_cid_stack(int64_t _tid)
|
||||
{
|
||||
struct hosttrace_cpu_cid_stack
|
||||
{};
|
||||
using thread_data_t =
|
||||
hosttrace_thread_data<std::vector<uint64_t>, hosttrace_cpu_cid_stack>;
|
||||
static auto& _v = thread_data_t::instances();
|
||||
static thread_local auto _v_check = [_tid]() {
|
||||
thread_data_t::construct((_tid > 0) ? *thread_data_t::instances().at(0)
|
||||
: std::vector<uint64_t>{});
|
||||
return true;
|
||||
}();
|
||||
return _v.at(_tid);
|
||||
(void) _v_check;
|
||||
}
|
||||
Datei-Diff unterdrückt, da er zu groß ist
Diff laden
@@ -0,0 +1,46 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/fork_gotcha.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
|
||||
{
|
||||
HOSTTRACE_DEBUG(
|
||||
"Warning! Calling fork() within an OpenMPI application using libfabric "
|
||||
"may result is segmentation fault\n");
|
||||
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
|
||||
}
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
|
||||
{
|
||||
HOSTTRACE_DEBUG("%s() return PID %i\n", _data.tool_id.c_str(), (int) _pid);
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/hosttrace_component.hpp"
|
||||
#include "library/api.hpp"
|
||||
|
||||
void
|
||||
hosttrace_component::start()
|
||||
{
|
||||
if(m_prefix) hosttrace_push_trace(m_prefix);
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::stop()
|
||||
{
|
||||
if(m_prefix) hosttrace_pop_trace(m_prefix);
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::set_prefix(const char* _prefix)
|
||||
{
|
||||
m_prefix = _prefix;
|
||||
}
|
||||
|
||||
TIMEMORY_INITIALIZE_STORAGE(hosttrace_component)
|
||||
+50
-59
@@ -1,14 +1,40 @@
|
||||
#include "library.hpp"
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
//
|
||||
// This file contains miscellaneous function definitions related to timemory
|
||||
// placed in separate file so that, during development, the long compile-times
|
||||
// arising from compiling timemory's gotcha wrappers are reduced
|
||||
//
|
||||
#include "library/mpi_gotcha.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/hosttrace_component.hpp"
|
||||
|
||||
namespace
|
||||
{
|
||||
uint64_t mpip_index = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t mpip_index = std::numeric_limits<uint64_t>::max();
|
||||
std::string mpi_init_string = {};
|
||||
|
||||
// this ensures hosttrace_trace_finalize is called before MPI_Finalize
|
||||
void
|
||||
@@ -19,10 +45,12 @@ hosttrace_mpi_set_attr()
|
||||
return MPI_SUCCESS;
|
||||
};
|
||||
static auto _mpi_fini = [](MPI_Comm, int, void*, void*) {
|
||||
HOSTTRACE_DEBUG("MPI Comm attribute finalize\n");
|
||||
if(mpip_index != std::numeric_limits<uint64_t>::max())
|
||||
comp::deactivate_mpip<tim::component_tuple<hosttrace_component>, hosttrace>(
|
||||
mpip_index);
|
||||
hosttrace_pop_trace("MPI_Finalize()");
|
||||
if(!mpi_init_string.empty()) hosttrace_pop_trace(mpi_init_string.c_str());
|
||||
mpi_init_string = {};
|
||||
hosttrace_trace_finalize();
|
||||
return MPI_SUCCESS;
|
||||
};
|
||||
@@ -37,26 +65,15 @@ hosttrace_mpi_set_attr()
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
|
||||
{
|
||||
HOSTTRACE_DEBUG(
|
||||
"Warning! Calling fork() within an OpenMPI application using libfabric "
|
||||
"may result is segmentation fault\n");
|
||||
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
|
||||
}
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
|
||||
{
|
||||
HOSTTRACE_DEBUG("%s() return PID %i\n", _data.tool_id.c_str(), (int) _pid);
|
||||
}
|
||||
|
||||
void
|
||||
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s(int*, char***)\n", __FUNCTION__, _data.tool_id.c_str());
|
||||
if(get_state() == ::State::DelayedInit) get_state() = ::State::PreInit;
|
||||
if(get_state() == ::State::DelayedInit)
|
||||
{
|
||||
get_state() = ::State::PreInit;
|
||||
mpi_init_string = _data.tool_id;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -64,7 +81,11 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***, in
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s(int*, char***, int, int*)\n", __FUNCTION__,
|
||||
_data.tool_id.c_str());
|
||||
if(get_state() == ::State::DelayedInit) get_state() = ::State::PreInit;
|
||||
if(get_state() == ::State::DelayedInit)
|
||||
{
|
||||
get_state() = ::State::PreInit;
|
||||
mpi_init_string = _data.tool_id;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -80,7 +101,7 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval)
|
||||
// being activated unwaringly during runtime instrumentation because that
|
||||
// will result in double instrumenting the MPI functions (unless the MPI functions
|
||||
// were excluded via a regex expression)
|
||||
if(tim::get_env("HOSTTRACE_USE_MPIP", false, false))
|
||||
if(get_use_mpip())
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] Activating MPI wrappers...\n", __FUNCTION__);
|
||||
comp::configure_mpip<tim::component_tuple<hosttrace_component>, hosttrace>();
|
||||
@@ -98,39 +119,9 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming)
|
||||
if(mpip_index != std::numeric_limits<uint64_t>::max())
|
||||
comp::deactivate_mpip<tim::component_tuple<hosttrace_component>, hosttrace>(
|
||||
mpip_index);
|
||||
hosttrace_pop_trace("MPI_Finalize()");
|
||||
if(!mpi_init_string.empty()) hosttrace_pop_trace(mpi_init_string.c_str());
|
||||
mpi_init_string = {};
|
||||
hosttrace_trace_finalize();
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::start()
|
||||
{
|
||||
if(m_prefix) hosttrace_push_trace(m_prefix);
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::stop()
|
||||
{
|
||||
if(m_prefix) hosttrace_pop_trace(m_prefix);
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::set_prefix(const char* _prefix)
|
||||
{
|
||||
m_prefix = _prefix;
|
||||
}
|
||||
|
||||
hosttrace_timemory_data::instance_array_t&
|
||||
hosttrace_timemory_data::instances()
|
||||
{
|
||||
static auto _v = instance_array_t{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
TIMEMORY_INITIALIZE_STORAGE(fork_gotcha, mpi_gotcha, comp::wall_clock,
|
||||
comp::user_global_bundle)
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
TIMEMORY_INITIALIZE_STORAGE(mpi_gotcha)
|
||||
@@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/perfetto.hpp"
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/ptl.hpp"
|
||||
|
||||
namespace tasking
|
||||
{
|
||||
std::mutex&
|
||||
get_roctracer_mutex()
|
||||
{
|
||||
static std::mutex _v{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
PTL::ThreadPool&
|
||||
get_roctracer_thread_pool()
|
||||
{
|
||||
static auto _v = PTL::ThreadPool{ 1 };
|
||||
return _v;
|
||||
}
|
||||
|
||||
PTL::TaskGroup<void>&
|
||||
get_roctracer_task_group()
|
||||
{
|
||||
static PTL::TaskGroup<void> _v{ &get_roctracer_thread_pool() };
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::mutex&
|
||||
get_critical_trace_mutex()
|
||||
{
|
||||
static std::mutex _v{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
PTL::ThreadPool&
|
||||
get_critical_trace_thread_pool()
|
||||
{
|
||||
static auto _v = PTL::ThreadPool{ 1 };
|
||||
return _v;
|
||||
}
|
||||
|
||||
PTL::TaskGroup<void>&
|
||||
get_critical_trace_task_group()
|
||||
{
|
||||
static PTL::TaskGroup<void> _v{ &get_critical_trace_thread_pool() };
|
||||
return _v;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
bool _ptl_initialized =
|
||||
(get_roctracer_thread_pool(), get_critical_trace_thread_pool(), true);
|
||||
}
|
||||
} // namespace tasking
|
||||
@@ -0,0 +1,283 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/roctracer.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/defines.hpp"
|
||||
#include "library/roctracer_callbacks.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
|
||||
namespace tim
|
||||
{
|
||||
namespace component
|
||||
{
|
||||
void
|
||||
roctracer::preinit()
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
roctracer_data::label() = "roctracer";
|
||||
roctracer_data::description() = "ROCm tracer (activity API)";
|
||||
}
|
||||
|
||||
bool
|
||||
roctracer::is_setup()
|
||||
{
|
||||
return roctracer_is_setup();
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::add_setup(const std::string& _lbl, std::function<void()>&& _func)
|
||||
{
|
||||
roctracer_setup_routines().emplace_back(_lbl, std::move(_func));
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::add_tear_down(const std::string& _lbl, std::function<void()>&& _func)
|
||||
{
|
||||
roctracer_tear_down_routines().emplace_back(_lbl, std::move(_func));
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::remove_setup(const std::string& _lbl)
|
||||
{
|
||||
auto& _data = roctracer_setup_routines();
|
||||
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
|
||||
{
|
||||
if(itr->first == _lbl)
|
||||
{
|
||||
_data.erase(itr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::remove_tear_down(const std::string& _lbl)
|
||||
{
|
||||
auto& _data = roctracer_setup_routines();
|
||||
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
|
||||
{
|
||||
if(itr->first == _lbl)
|
||||
{
|
||||
_data.erase(itr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::setup()
|
||||
{
|
||||
if(!get_use_timemory() && !get_use_perfetto()) return;
|
||||
|
||||
auto_lock_t _lk{ type_mutex<roctracer>() };
|
||||
if(roctracer_is_setup()) return;
|
||||
roctracer_is_setup() = true;
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
|
||||
tim::set_env("HSA_TOOLS_LIB", "libhosttrace.so", 0);
|
||||
|
||||
auto _kfdwrapper = dynamic_library{ "HOSTTRACE_ROCTRACER_LIBKFDWRAPPER",
|
||||
HOSTTRACE_ROCTRACER_LIBKFDWRAPPER };
|
||||
|
||||
ROCTRACER_CALL(roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, nullptr));
|
||||
|
||||
// if(roctracer_default_pool() == nullptr)
|
||||
{
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
memset(&properties, 0, sizeof(roctracer_properties_t));
|
||||
properties.mode = 0x1000;
|
||||
properties.buffer_size = 0x1000;
|
||||
properties.buffer_callback_fun = hip_activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
}
|
||||
|
||||
// Enable API callbacks, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, nullptr));
|
||||
// Enable activity tracing, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_activity());
|
||||
|
||||
// callback for HSA
|
||||
for(auto& itr : roctracer_setup_routines())
|
||||
itr.second();
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::tear_down()
|
||||
{
|
||||
auto_lock_t _lk{ type_mutex<roctracer>() };
|
||||
if(!roctracer_is_setup()) return;
|
||||
roctracer_is_setup() = false;
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
|
||||
// flush all the activity
|
||||
if(roctracer_default_pool() != nullptr)
|
||||
{
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
}
|
||||
// flush all buffers
|
||||
roctracer_flush_buf();
|
||||
|
||||
// make sure all async operations are executed
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
hip_exec_activity_callbacks(i);
|
||||
|
||||
// callback for hsa
|
||||
for(auto& itr : roctracer_tear_down_routines())
|
||||
itr.second();
|
||||
|
||||
// Disable tracing and closing the pool
|
||||
ROCTRACER_CALL(roctracer_disable_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::start()
|
||||
{
|
||||
if(tracker_type::start() == 0) setup();
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::stop()
|
||||
{
|
||||
if(tracker_type::stop() == 0) tear_down();
|
||||
}
|
||||
} // namespace component
|
||||
} // namespace tim
|
||||
|
||||
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer, false, void)
|
||||
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer_data, true, double)
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C"
|
||||
{
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) TIMEMORY_VISIBILITY("default");
|
||||
void OnUnload() TIMEMORY_VISIBILITY("default");
|
||||
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
tim::consume_parameters(table, runtime_version, failed_tool_count,
|
||||
failed_tool_names);
|
||||
|
||||
// ONLOAD_TRACE_BEG();
|
||||
// on_exit(exit_handler, nullptr);
|
||||
|
||||
auto _setup = [=]() {
|
||||
get_hsa_timer() =
|
||||
std::make_unique<hsa_timer_t>(table->core_->hsa_system_get_info_fn);
|
||||
|
||||
// const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
const char* output_prefix = nullptr;
|
||||
|
||||
// App begin timestamp begin_ts_file.txt
|
||||
// begin_ts_file_handle = open_output_file(output_prefix,
|
||||
// "begin_ts_file.txt"); const timestamp_t app_start_time =
|
||||
// timer->timestamp_fn_ns(); fprintf(begin_ts_file_handle, "%lu\n",
|
||||
// app_start_time);
|
||||
|
||||
bool trace_hsa_api = get_trace_hsa_api();
|
||||
std::vector<std::string> hsa_api_vec =
|
||||
tim::delimit(get_trace_hsa_api_types());
|
||||
|
||||
// Enable HSA API callbacks/activity
|
||||
if(trace_hsa_api)
|
||||
{
|
||||
// hsa_api_file_handle = open_output_file(output_prefix,
|
||||
// "hsa_api_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*) table);
|
||||
|
||||
HOSTTRACE_DEBUG(" HSA-trace(");
|
||||
if(!hsa_api_vec.empty())
|
||||
{
|
||||
for(const auto& itr : hsa_api_vec)
|
||||
{
|
||||
uint32_t cid = HSA_API_ID_NUMBER;
|
||||
const char* api = itr.c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api,
|
||||
&cid, nullptr));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(
|
||||
ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr));
|
||||
|
||||
HOSTTRACE_DEBUG(" %s", api);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(
|
||||
ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr));
|
||||
}
|
||||
HOSTTRACE_DEBUG("\n");
|
||||
}
|
||||
|
||||
bool trace_hsa_activity = get_trace_hsa_activity();
|
||||
// Enable HSA GPU activity
|
||||
if(trace_hsa_activity)
|
||||
{
|
||||
// initialize HSA tracing
|
||||
::roctracer::hsa_ops_properties_t ops_properties{
|
||||
table,
|
||||
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
|
||||
nullptr, output_prefix
|
||||
};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
HOSTTRACE_DEBUG(" HSA-activity-trace()\n");
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS,
|
||||
HSA_OP_ID_COPY));
|
||||
}
|
||||
};
|
||||
|
||||
auto _tear_down = []() {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
|
||||
ROCTRACER_CALL(
|
||||
roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
|
||||
};
|
||||
|
||||
if(comp::roctracer::is_setup()) _setup();
|
||||
|
||||
comp::roctracer::add_setup("hsa", std::move(_setup));
|
||||
comp::roctracer::add_tear_down("hsa", std::move(_tear_down));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// HSA-runtime on-unload method
|
||||
void OnUnload()
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
// ONLOAD_TRACE("");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,599 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/roctracer_callbacks.hpp"
|
||||
#include "library.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/critical_trace.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "timemory/backends/threading.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
TIMEMORY_DEFINE_API(roctracer)
|
||||
namespace api = tim::api;
|
||||
|
||||
std::unordered_set<uint64_t>&
|
||||
get_roctracer_kernels()
|
||||
{
|
||||
static auto _v = std::unordered_set<uint64_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_roctracer_hip_data(int64_t _tid = threading::get_id())
|
||||
{
|
||||
using data_t = std::unordered_map<uint64_t, roctracer_bundle_t>;
|
||||
using thread_data_t = hosttrace_thread_data<data_t, api::roctracer>;
|
||||
static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{});
|
||||
return _v.at(_tid);
|
||||
}
|
||||
|
||||
std::unordered_map<uint64_t, const char*>&
|
||||
get_roctracer_key_data()
|
||||
{
|
||||
static auto _v = std::unordered_map<uint64_t, const char*>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::unordered_map<uint64_t, int64_t>&
|
||||
get_roctracer_tid_data()
|
||||
{
|
||||
static auto _v = std::unordered_map<uint64_t, int64_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
using cid_tuple_t = std::tuple<uint64_t, uint64_t, uint16_t>;
|
||||
std::unordered_map<uint64_t, cid_tuple_t>&
|
||||
get_roctracer_cid_data()
|
||||
{
|
||||
static auto _v = std::unordered_map<uint64_t, cid_tuple_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_hip_activity_callbacks(int64_t _tid = threading::get_id())
|
||||
{
|
||||
using thread_data_t =
|
||||
hosttrace_thread_data<std::vector<std::function<void()>>, api::roctracer>;
|
||||
static auto& _v = thread_data_t::instances(thread_data_t::construct_on_init{});
|
||||
return _v.at(_tid);
|
||||
}
|
||||
|
||||
std::unique_ptr<hsa_timer_t>&
|
||||
get_hsa_timer()
|
||||
{
|
||||
static auto _v = std::unique_ptr<hsa_timer_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
using hip_activity_mutex_t = std::decay_t<decltype(get_hip_activity_callbacks())>;
|
||||
using key_data_mutex_t = std::decay_t<decltype(get_roctracer_key_data())>;
|
||||
using hip_data_mutex_t = std::decay_t<decltype(get_roctracer_hip_data())>;
|
||||
using cid_data_mutex_t = std::decay_t<decltype(get_roctracer_cid_data())>;
|
||||
|
||||
auto&
|
||||
get_hip_activity_mutex(int64_t _tid = threading::get_id())
|
||||
{
|
||||
return tim::type_mutex<hip_activity_mutex_t, api::roctracer, max_supported_threads>(
|
||||
_tid);
|
||||
}
|
||||
|
||||
// HSA API callback function
|
||||
void
|
||||
hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
|
||||
{
|
||||
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
|
||||
return;
|
||||
|
||||
(void) arg;
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n",
|
||||
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
|
||||
static thread_local timestamp_t begin_timestamp = 0;
|
||||
static auto& timer = get_hsa_timer();
|
||||
static auto _scope = []() {
|
||||
auto _v = scope::config{};
|
||||
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
|
||||
if(get_roctracer_flat_profile()) _v += scope::flat{};
|
||||
return _v;
|
||||
}();
|
||||
|
||||
if(!timer) return;
|
||||
|
||||
switch(cid)
|
||||
{
|
||||
case HSA_API_ID_hsa_init:
|
||||
case HSA_API_ID_hsa_shut_down:
|
||||
case HSA_API_ID_hsa_agent_get_exception_policies:
|
||||
case HSA_API_ID_hsa_agent_get_info:
|
||||
case HSA_API_ID_hsa_amd_agent_iterate_memory_pools:
|
||||
case HSA_API_ID_hsa_amd_agent_memory_pool_get_info:
|
||||
case HSA_API_ID_hsa_amd_coherency_get_type:
|
||||
case HSA_API_ID_hsa_amd_memory_pool_get_info:
|
||||
case HSA_API_ID_hsa_amd_pointer_info:
|
||||
case HSA_API_ID_hsa_amd_pointer_info_set_userdata:
|
||||
case HSA_API_ID_hsa_amd_profiling_async_copy_enable:
|
||||
case HSA_API_ID_hsa_amd_profiling_get_async_copy_time:
|
||||
case HSA_API_ID_hsa_amd_profiling_get_dispatch_time:
|
||||
case HSA_API_ID_hsa_amd_profiling_set_profiler_enabled:
|
||||
case HSA_API_ID_hsa_cache_get_info:
|
||||
case HSA_API_ID_hsa_code_object_get_info:
|
||||
case HSA_API_ID_hsa_code_object_get_symbol:
|
||||
case HSA_API_ID_hsa_code_object_get_symbol_from_name:
|
||||
case HSA_API_ID_hsa_code_object_reader_create_from_memory:
|
||||
case HSA_API_ID_hsa_code_symbol_get_info:
|
||||
case HSA_API_ID_hsa_executable_create_alt:
|
||||
case HSA_API_ID_hsa_executable_freeze:
|
||||
case HSA_API_ID_hsa_executable_get_info:
|
||||
case HSA_API_ID_hsa_executable_get_symbol:
|
||||
case HSA_API_ID_hsa_executable_get_symbol_by_name:
|
||||
case HSA_API_ID_hsa_executable_symbol_get_info:
|
||||
case HSA_API_ID_hsa_extension_get_name:
|
||||
case HSA_API_ID_hsa_ext_image_data_get_info:
|
||||
case HSA_API_ID_hsa_ext_image_data_get_info_with_layout:
|
||||
case HSA_API_ID_hsa_ext_image_get_capability:
|
||||
case HSA_API_ID_hsa_ext_image_get_capability_with_layout:
|
||||
case HSA_API_ID_hsa_isa_get_exception_policies:
|
||||
case HSA_API_ID_hsa_isa_get_info:
|
||||
case HSA_API_ID_hsa_isa_get_info_alt:
|
||||
case HSA_API_ID_hsa_isa_get_round_method:
|
||||
case HSA_API_ID_hsa_region_get_info:
|
||||
case HSA_API_ID_hsa_system_extension_supported:
|
||||
case HSA_API_ID_hsa_system_get_extension_table:
|
||||
case HSA_API_ID_hsa_system_get_info:
|
||||
case HSA_API_ID_hsa_system_get_major_extension_table:
|
||||
case HSA_API_ID_hsa_wavefront_get_info: break;
|
||||
default:
|
||||
{
|
||||
if(data->phase == ACTIVITY_API_PHASE_ENTER)
|
||||
{
|
||||
begin_timestamp = timer->timestamp_fn_ns();
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto* _name = roctracer_op_string(domain, cid, 0);
|
||||
const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down)
|
||||
? begin_timestamp
|
||||
: timer->timestamp_fn_ns();
|
||||
|
||||
if(begin_timestamp > end_timestamp) return;
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name },
|
||||
begin_timestamp);
|
||||
TRACE_EVENT_END("device", end_timestamp);
|
||||
}
|
||||
|
||||
if(get_use_timemory())
|
||||
{
|
||||
std::unique_lock<std::mutex> _lk{ tasking::get_roctracer_mutex() };
|
||||
auto _begin_ns = begin_timestamp;
|
||||
auto _end_ns = end_timestamp;
|
||||
tasking::get_roctracer_task_group().exec(
|
||||
[_name, _begin_ns, _end_ns]() {
|
||||
roctracer_hsa_bundle_t _bundle{ _name, _scope };
|
||||
_bundle.start()
|
||||
.store(std::plus<double>{},
|
||||
static_cast<double>(_end_ns - _begin_ns))
|
||||
.stop();
|
||||
});
|
||||
}
|
||||
// timemory is disabled in this callback because collecting data in this
|
||||
// thread causes strange segmentation faults
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
|
||||
{
|
||||
static const char* copy_op_name = "hsa_async_copy";
|
||||
static const char* dispatch_op_name = "hsa_dispatch";
|
||||
static const char* barrier_op_name = "hsa_barrier";
|
||||
const char** _name = nullptr;
|
||||
|
||||
switch(op)
|
||||
{
|
||||
case HSA_OP_ID_DISPATCH: _name = &dispatch_op_name; break;
|
||||
case HSA_OP_ID_COPY: _name = ©_op_name; break;
|
||||
case HSA_OP_ID_BARRIER: _name = &barrier_op_name; break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if(!_name) return;
|
||||
|
||||
auto _begin_ns = record->begin_ns;
|
||||
auto _end_ns = record->end_ns;
|
||||
static auto _scope = []() {
|
||||
auto _v = scope::config{};
|
||||
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
|
||||
if(get_roctracer_flat_profile()) _v += scope::flat{};
|
||||
return _v;
|
||||
}();
|
||||
|
||||
auto _func = [_begin_ns, _end_ns, _name]() {
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name }, _begin_ns);
|
||||
TRACE_EVENT_END("device", _end_ns);
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
roctracer_hsa_bundle_t _bundle{ *_name, _scope };
|
||||
_bundle.start()
|
||||
.store(std::plus<double>{}, static_cast<double>(_end_ns - _begin_ns))
|
||||
.stop();
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_lock<std::mutex> _lk{ tasking::get_roctracer_mutex() };
|
||||
tasking::get_roctracer_task_group().exec(_func);
|
||||
|
||||
// timemory is disabled in this callback because collecting data in this thread
|
||||
// causes strange segmentation faults
|
||||
tim::consume_parameters(arg);
|
||||
}
|
||||
|
||||
void
|
||||
hip_exec_activity_callbacks(int64_t _tid)
|
||||
{
|
||||
// ROCTRACER_CALL(roctracer_flush_activity());
|
||||
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
|
||||
auto& _async_ops = get_hip_activity_callbacks(_tid);
|
||||
for(auto& itr : *_async_ops)
|
||||
itr();
|
||||
_async_ops->clear();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
thread_local std::unordered_map<size_t, size_t> gpu_cids = {};
|
||||
}
|
||||
|
||||
// HIP API callback function
|
||||
void
|
||||
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
|
||||
{
|
||||
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
|
||||
return;
|
||||
|
||||
using Device = critical_trace::Device;
|
||||
using Phase = critical_trace::Phase;
|
||||
|
||||
const char* op_name = roctracer_op_string(domain, cid, 0);
|
||||
if(op_name == nullptr) op_name = hip_api_name(cid);
|
||||
if(op_name == nullptr) return;
|
||||
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n", op_name, cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
|
||||
switch(cid)
|
||||
{
|
||||
case HIP_API_ID___hipPushCallConfiguration:
|
||||
case HIP_API_ID___hipPopCallConfiguration:
|
||||
case HIP_API_ID_hipDeviceEnablePeerAccess:
|
||||
case HIP_API_ID_hipImportExternalMemory:
|
||||
case HIP_API_ID_hipDestroyExternalMemory: return;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if(data->phase == ACTIVITY_API_PHASE_ENTER)
|
||||
{
|
||||
switch(cid)
|
||||
{
|
||||
case HIP_API_ID_hipLaunchKernel:
|
||||
case HIP_API_ID_hipLaunchCooperativeKernel:
|
||||
{
|
||||
const char* _name =
|
||||
hipKernelNameRefByPtr(data->args.hipLaunchKernel.function_address,
|
||||
data->args.hipLaunchKernel.stream);
|
||||
if(_name != nullptr)
|
||||
{
|
||||
if(get_use_perfetto() || get_use_timemory())
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
|
||||
get_roctracer_key_data().emplace(data->correlation_id, _name);
|
||||
get_roctracer_tid_data().emplace(data->correlation_id,
|
||||
threading::get_id());
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
{
|
||||
const char* _name = hipKernelNameRef(data->args.hipModuleLaunchKernel.f);
|
||||
if(_name != nullptr)
|
||||
{
|
||||
if(get_use_perfetto() || get_use_timemory())
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
|
||||
get_roctracer_key_data().emplace(data->correlation_id, _name);
|
||||
get_roctracer_tid_data().emplace(data->correlation_id,
|
||||
threading::get_id());
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
if(get_use_perfetto() || get_use_timemory())
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
|
||||
get_roctracer_key_data().emplace(data->correlation_id, op_name);
|
||||
get_roctracer_tid_data().emplace(data->correlation_id,
|
||||
threading::get_id());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ op_name });
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
get_roctracer_hip_data()->emplace(
|
||||
data->correlation_id,
|
||||
roctracer_bundle_t{ op_name, quirk::config<quirk::auto_start>{} });
|
||||
}
|
||||
if(get_use_critical_trace())
|
||||
{
|
||||
auto _cid = get_cpu_cid()++;
|
||||
uint16_t _depth = (get_cpu_cid_stack()->empty())
|
||||
? get_cpu_cid_stack(0)->size()
|
||||
: get_cpu_cid_stack()->size() - 1;
|
||||
auto _parent_cid = (get_cpu_cid_stack()->empty())
|
||||
? get_cpu_cid_stack(0)->back()
|
||||
: get_cpu_cid_stack()->back();
|
||||
int64_t _ts = comp::wall_clock::record();
|
||||
add_critical_trace<Device::GPU, Phase::BEGIN>(
|
||||
threading::get_id(), _cid, data->correlation_id, _parent_cid, _ts, 0,
|
||||
critical_trace::add_hash_id(op_name), _depth);
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<cid_data_mutex_t>() };
|
||||
get_roctracer_cid_data().emplace(data->correlation_id,
|
||||
cid_tuple_t{ _cid, _parent_cid, _depth });
|
||||
}
|
||||
|
||||
hip_exec_activity_callbacks(threading::get_id());
|
||||
}
|
||||
else if(data->phase == ACTIVITY_API_PHASE_EXIT)
|
||||
{
|
||||
hip_exec_activity_callbacks(threading::get_id());
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_END("device");
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
auto _stop = [data](int64_t _tid) {
|
||||
auto& _data = get_roctracer_hip_data(_tid);
|
||||
auto itr = _data->find(data->correlation_id);
|
||||
if(itr != get_roctracer_hip_data()->end())
|
||||
{
|
||||
itr->second.stop().pop();
|
||||
_data->erase(itr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if(!_stop(threading::get_id()))
|
||||
{
|
||||
for(size_t i = 0; i < max_supported_threads; ++i)
|
||||
{
|
||||
if(_stop(i)) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(get_use_critical_trace())
|
||||
{
|
||||
uint16_t _depth = 0;
|
||||
uint64_t _cid = 0;
|
||||
uint64_t _parent_cid = 0;
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<cid_data_mutex_t>() };
|
||||
std::tie(_cid, _parent_cid, _depth) =
|
||||
get_roctracer_cid_data().at(data->correlation_id);
|
||||
}
|
||||
int64_t _ts = comp::wall_clock::record();
|
||||
add_critical_trace<Device::GPU, Phase::END>(
|
||||
threading::get_id(), _cid, data->correlation_id, _parent_cid, _ts, _ts,
|
||||
critical_trace::add_hash_id(op_name), _depth);
|
||||
}
|
||||
}
|
||||
tim::consume_parameters(arg);
|
||||
}
|
||||
|
||||
// Activity tracing callback
|
||||
void
|
||||
hip_activity_callback(const char* begin, const char* end, void*)
|
||||
{
|
||||
using Device = critical_trace::Device;
|
||||
using Phase = critical_trace::Phase;
|
||||
|
||||
if(!trait::runtime_enabled<comp::roctracer>::get()) return;
|
||||
static auto _kernel_names = std::unordered_map<const char*, std::string>{};
|
||||
static auto _indexes = std::unordered_map<uint64_t, int>{};
|
||||
const roctracer_record_t* record = reinterpret_cast<const roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record =
|
||||
reinterpret_cast<const roctracer_record_t*>(end);
|
||||
|
||||
HOSTTRACE_DEBUG("Activity records:\n");
|
||||
|
||||
while(record < end_record)
|
||||
{
|
||||
const char* op_name =
|
||||
roctracer_op_string(record->domain, record->correlation_id, 0);
|
||||
if(op_name == nullptr) op_name = hip_api_name(record->correlation_id);
|
||||
|
||||
if(op_name != nullptr)
|
||||
{
|
||||
HOSTTRACE_DEBUG("\t%-30s\tcorrelation_id(%6lu) time_ns(%12lu:%12lu) "
|
||||
"delta_ns(%12lu) device_id(%d) "
|
||||
"stream_id(%lu)\n",
|
||||
op_name, record->correlation_id, record->begin_ns,
|
||||
record->end_ns, (record->end_ns - record->begin_ns),
|
||||
record->device_id, record->queue_id);
|
||||
}
|
||||
|
||||
auto _begin_ns = record->begin_ns;
|
||||
auto _end_ns = record->end_ns;
|
||||
auto _corr_id = record->correlation_id;
|
||||
static auto _scope = []() {
|
||||
auto _v = scope::config{};
|
||||
if(get_roctracer_timeline_profile()) _v += scope::timeline{};
|
||||
if(get_roctracer_flat_profile()) _v += scope::flat{};
|
||||
return _v;
|
||||
}();
|
||||
|
||||
auto& _keys = get_roctracer_key_data();
|
||||
auto& _cids = get_roctracer_cid_data();
|
||||
auto& _tids = get_roctracer_tid_data();
|
||||
|
||||
int16_t _depth = 0; // depth of kernel launch
|
||||
int64_t _tid = 0; // thread id
|
||||
uint64_t _cid = 0; // correlation id
|
||||
uint64_t _pcid = 0; // parent corr_id
|
||||
auto _laps = _indexes[_corr_id]++; // see note #1
|
||||
const char* _name = nullptr;
|
||||
bool _found = false;
|
||||
bool _critical_trace = get_use_critical_trace();
|
||||
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
|
||||
if(_tids.find(_corr_id) != _tids.end())
|
||||
{
|
||||
_found = true;
|
||||
_tid = _tids.at(_corr_id);
|
||||
auto itr = _keys.find(_corr_id);
|
||||
if(itr != _keys.end()) _name = itr->second;
|
||||
}
|
||||
}
|
||||
|
||||
if(_critical_trace)
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<cid_data_mutex_t>() };
|
||||
if(_cids.find(_corr_id) != _cids.end())
|
||||
std::tie(_cid, _pcid, _depth) = _cids.at(_corr_id);
|
||||
else
|
||||
_critical_trace = false;
|
||||
}
|
||||
|
||||
auto _func = [_critical_trace, _depth, _tid, _cid, _laps, _begin_ns, _end_ns,
|
||||
_corr_id, _name]() {
|
||||
// NOTE #1: we get two measurements for 1 kernel so we need to
|
||||
// tweak the number of laps for the wall-clock component
|
||||
if(_name != nullptr)
|
||||
{
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
if(_kernel_names.find(_name) == _kernel_names.end())
|
||||
_kernel_names.emplace(_name, tim::demangle(_name));
|
||||
TRACE_EVENT_BEGIN(
|
||||
"device",
|
||||
perfetto::StaticString{ _kernel_names.at(_name).c_str() },
|
||||
_begin_ns);
|
||||
TRACE_EVENT_END("device", _end_ns);
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
roctracer_bundle_t _bundle{ _name, _scope };
|
||||
_bundle.start()
|
||||
.store(std::plus<double>{},
|
||||
static_cast<double>(_end_ns - _begin_ns))
|
||||
.stop()
|
||||
.get<comp::wall_clock>([&](comp::wall_clock* wc) {
|
||||
wc->set_value(_end_ns - _begin_ns);
|
||||
wc->set_accum(_end_ns - _begin_ns);
|
||||
if(_laps % 2 == 1)
|
||||
{
|
||||
// below is a hack bc we get two measurements for 1 kernel
|
||||
wc->set_laps(0);
|
||||
|
||||
auto itr = wc->get_iterator();
|
||||
if(itr && itr->data().get_laps() == 0)
|
||||
{
|
||||
wc->set_is_invalid(true);
|
||||
itr->data().set_is_invalid(true);
|
||||
}
|
||||
}
|
||||
return wc;
|
||||
});
|
||||
_bundle.pop();
|
||||
}
|
||||
if(_critical_trace)
|
||||
{
|
||||
auto _hash = critical_trace::add_hash_id(_name);
|
||||
uint16_t _prio = _laps + 1; // priority
|
||||
add_critical_trace<Device::GPU, Phase::DELTA, false>(
|
||||
_tid, _cid, _corr_id, _cid, _begin_ns, _end_ns, _hash, _depth + 1,
|
||||
_prio);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if(_found)
|
||||
{
|
||||
auto& _async_ops = get_hip_activity_callbacks(_tid);
|
||||
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
|
||||
_async_ops->emplace_back(std::move(_func));
|
||||
}
|
||||
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
bool&
|
||||
roctracer_is_setup()
|
||||
{
|
||||
static bool _v = false;
|
||||
return _v;
|
||||
}
|
||||
|
||||
using roctracer_functions_t = std::vector<std::pair<std::string, std::function<void()>>>;
|
||||
|
||||
roctracer_functions_t&
|
||||
roctracer_setup_routines()
|
||||
{
|
||||
static auto _v = roctracer_functions_t{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
roctracer_functions_t&
|
||||
roctracer_tear_down_routines()
|
||||
{
|
||||
static auto _v = roctracer_functions_t{};
|
||||
return _v;
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/thread_data.hpp"
|
||||
|
||||
instrumentation_bundles::instance_array_t&
|
||||
instrumentation_bundles::instances()
|
||||
{
|
||||
static auto _v = instance_array_t{};
|
||||
return _v;
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
// Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "library/timemory.hpp"
|
||||
|
||||
TIMEMORY_INITIALIZE_STORAGE(comp::wall_clock, comp::user_global_bundle)
|
||||
@@ -1,645 +0,0 @@
|
||||
|
||||
#include "roctracer.hpp"
|
||||
#include "library.hpp"
|
||||
|
||||
#include <roctracer.h>
|
||||
#include <roctracer_ext.h>
|
||||
#include <roctracer_hcc.h>
|
||||
#include <roctracer_hip.h>
|
||||
#include <roctracer_kfd.h>
|
||||
|
||||
#define AMD_INTERNAL_BUILD 1
|
||||
#include <ext/hsa_rt_utils.hpp>
|
||||
#include <roctracer_hsa.h>
|
||||
|
||||
#include <atomic>
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define ROCTRACER_CALL(call) \
|
||||
do \
|
||||
{ \
|
||||
int err = call; \
|
||||
if(err != 0) \
|
||||
{ \
|
||||
std::cerr << roctracer_error_string() << " in: " << #call << std::flush; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
using roctracer_bundle_t = tim::component_tuple<comp::roctracer_data>;
|
||||
|
||||
namespace units = tim::units;
|
||||
|
||||
namespace
|
||||
{
|
||||
auto&
|
||||
get_roctracer_kernels()
|
||||
{
|
||||
static auto _v = std::unordered_set<uint64_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_roctracer_hip_data()
|
||||
{
|
||||
static auto _v = std::unordered_map<uint64_t, roctracer_bundle_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_roctracer_key_data()
|
||||
{
|
||||
static auto _v = std::unordered_map<uint64_t, const char*>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
using data_type_mutex_t = std::decay_t<decltype(get_roctracer_hip_data())>;
|
||||
using hsa_timer_t = hsa_rt_utils::Timer;
|
||||
using timestamp_t = hsa_timer_t::timestamp_t;
|
||||
|
||||
auto&
|
||||
get_hsa_timer()
|
||||
{
|
||||
static auto _v = std::unique_ptr<hsa_timer_t>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
// HSA API callback function
|
||||
void
|
||||
hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
|
||||
{
|
||||
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
|
||||
return;
|
||||
|
||||
(void) arg;
|
||||
static auto _scope = scope::config{} + scope::flat{};
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n",
|
||||
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
|
||||
static thread_local timestamp_t hsa_begin_timestamp = 0;
|
||||
static auto& timer = get_hsa_timer();
|
||||
|
||||
if(!timer) return;
|
||||
|
||||
switch(cid)
|
||||
{
|
||||
case HSA_API_ID_hsa_init:
|
||||
case HSA_API_ID_hsa_shut_down:
|
||||
case HSA_API_ID_hsa_agent_get_exception_policies:
|
||||
case HSA_API_ID_hsa_agent_get_info:
|
||||
case HSA_API_ID_hsa_amd_agent_iterate_memory_pools:
|
||||
case HSA_API_ID_hsa_amd_agent_memory_pool_get_info:
|
||||
case HSA_API_ID_hsa_amd_coherency_get_type:
|
||||
case HSA_API_ID_hsa_amd_memory_pool_get_info:
|
||||
case HSA_API_ID_hsa_amd_pointer_info:
|
||||
case HSA_API_ID_hsa_amd_pointer_info_set_userdata:
|
||||
case HSA_API_ID_hsa_amd_profiling_async_copy_enable:
|
||||
case HSA_API_ID_hsa_amd_profiling_get_async_copy_time:
|
||||
case HSA_API_ID_hsa_amd_profiling_get_dispatch_time:
|
||||
case HSA_API_ID_hsa_amd_profiling_set_profiler_enabled:
|
||||
case HSA_API_ID_hsa_cache_get_info:
|
||||
case HSA_API_ID_hsa_code_object_get_info:
|
||||
case HSA_API_ID_hsa_code_object_get_symbol:
|
||||
case HSA_API_ID_hsa_code_object_get_symbol_from_name:
|
||||
case HSA_API_ID_hsa_code_object_reader_create_from_memory:
|
||||
case HSA_API_ID_hsa_code_symbol_get_info:
|
||||
case HSA_API_ID_hsa_executable_create_alt:
|
||||
case HSA_API_ID_hsa_executable_freeze:
|
||||
case HSA_API_ID_hsa_executable_get_info:
|
||||
case HSA_API_ID_hsa_executable_get_symbol:
|
||||
case HSA_API_ID_hsa_executable_get_symbol_by_name:
|
||||
case HSA_API_ID_hsa_executable_symbol_get_info:
|
||||
case HSA_API_ID_hsa_extension_get_name:
|
||||
case HSA_API_ID_hsa_ext_image_data_get_info:
|
||||
case HSA_API_ID_hsa_ext_image_data_get_info_with_layout:
|
||||
case HSA_API_ID_hsa_ext_image_get_capability:
|
||||
case HSA_API_ID_hsa_ext_image_get_capability_with_layout:
|
||||
case HSA_API_ID_hsa_isa_get_exception_policies:
|
||||
case HSA_API_ID_hsa_isa_get_info:
|
||||
case HSA_API_ID_hsa_isa_get_info_alt:
|
||||
case HSA_API_ID_hsa_isa_get_round_method:
|
||||
case HSA_API_ID_hsa_region_get_info:
|
||||
case HSA_API_ID_hsa_system_extension_supported:
|
||||
case HSA_API_ID_hsa_system_get_extension_table:
|
||||
case HSA_API_ID_hsa_system_get_info:
|
||||
case HSA_API_ID_hsa_system_get_major_extension_table:
|
||||
case HSA_API_ID_hsa_wavefront_get_info: break;
|
||||
default:
|
||||
{
|
||||
if(data->phase == ACTIVITY_API_PHASE_ENTER)
|
||||
{
|
||||
hsa_begin_timestamp = timer->timestamp_fn_ns();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _name = roctracer_op_string(domain, cid, 0);
|
||||
const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down)
|
||||
? hsa_begin_timestamp
|
||||
: timer->timestamp_fn_ns();
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name },
|
||||
hsa_begin_timestamp);
|
||||
TRACE_EVENT_END("device", end_timestamp);
|
||||
}
|
||||
|
||||
/*if(get_use_timemory())
|
||||
{
|
||||
static auto _scope = scope::flat() + scope::timeline();
|
||||
roctracer_bundle_t{ _name, _scope }
|
||||
.start()
|
||||
.store(end_timestamp - hsa_begin_timestamp)
|
||||
.stop();
|
||||
}*/
|
||||
// timemory is disabled in this callback because collecting data in this
|
||||
// thread causes strange segmentation faults
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
|
||||
{
|
||||
static const char* copy_op_name = "hsa_async_copy";
|
||||
static const char* dispatch_op_name = "hsa_dispatch";
|
||||
static const char* barrier_op_name = "hsa_barrier";
|
||||
const char** _name = nullptr;
|
||||
|
||||
switch(op)
|
||||
{
|
||||
case HSA_OP_ID_DISPATCH: _name = &dispatch_op_name; break;
|
||||
case HSA_OP_ID_COPY: _name = ©_op_name; break;
|
||||
case HSA_OP_ID_BARRIER: _name = &barrier_op_name; break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if(!_name) return;
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name }, record->begin_ns);
|
||||
TRACE_EVENT_END("device", record->end_ns);
|
||||
}
|
||||
|
||||
// timemory is disabled in this callback because collecting data in this thread
|
||||
// causes strange segmentation faults
|
||||
tim::consume_parameters(arg);
|
||||
}
|
||||
|
||||
// HIP API callback function
|
||||
void
|
||||
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
|
||||
{
|
||||
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
|
||||
return;
|
||||
|
||||
static auto _scope = scope::flat() + scope::timeline();
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
HOSTTRACE_DEBUG("<%-30s id(%u)\tcorrelation_id(%lu) %s>\n",
|
||||
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
|
||||
if(data->phase == ACTIVITY_API_PHASE_ENTER)
|
||||
{
|
||||
switch(cid)
|
||||
{
|
||||
case HIP_API_ID___hipPushCallConfiguration:
|
||||
case HIP_API_ID___hipPopCallConfiguration: break;
|
||||
case HIP_API_ID_hipLaunchKernel:
|
||||
{
|
||||
const char* _name =
|
||||
hipKernelNameRefByPtr(data->args.hipLaunchKernel.function_address,
|
||||
data->args.hipLaunchKernel.stream);
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
|
||||
get_roctracer_kernels().emplace(data->correlation_id);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
get_roctracer_key_data().emplace(data->correlation_id, _name);
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
get_roctracer_hip_data().emplace(data->correlation_id,
|
||||
roctracer_bundle_t{ _name, _scope });
|
||||
}
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
{
|
||||
const char* _name = hipKernelNameRef(data->args.hipModuleLaunchKernel.f);
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
|
||||
get_roctracer_kernels().emplace(data->correlation_id);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
get_roctracer_key_data().emplace(data->correlation_id, _name);
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
get_roctracer_hip_data().emplace(data->correlation_id,
|
||||
roctracer_bundle_t{ _name, _scope });
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
|
||||
const char* _name = roctracer_op_string(domain, cid, 0);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
get_roctracer_key_data().emplace(data->correlation_id, _name);
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
get_roctracer_hip_data().emplace(data->correlation_id,
|
||||
roctracer_bundle_t{ _name, _scope });
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(data->phase == ACTIVITY_API_PHASE_EXIT)
|
||||
{}
|
||||
tim::consume_parameters(domain, arg);
|
||||
}
|
||||
|
||||
// Activity tracing callback
|
||||
void
|
||||
hip_activity_callback(const char* begin, const char* end, void*)
|
||||
{
|
||||
if(!trait::runtime_enabled<comp::roctracer>::get()) return;
|
||||
static auto _kernel_names = std::unordered_map<const char*, std::string>{};
|
||||
const roctracer_record_t* record = reinterpret_cast<const roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record =
|
||||
reinterpret_cast<const roctracer_record_t*>(end);
|
||||
std::unordered_set<uint64_t> _indexes{};
|
||||
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<data_type_mutex_t>() };
|
||||
auto& _data = get_roctracer_hip_data();
|
||||
auto& _keys = get_roctracer_key_data();
|
||||
auto& _kernels = get_roctracer_kernels();
|
||||
|
||||
HOSTTRACE_DEBUG("Activity records:\n");
|
||||
while(record < end_record)
|
||||
{
|
||||
HOSTTRACE_DEBUG("\t%-30s\tcorrelation_id(%lu) time_ns(%lu:%lu) device_id(%d) "
|
||||
"stream_id(%lu)\n",
|
||||
roctracer_op_string(record->domain, record->correlation_id, 0),
|
||||
record->correlation_id, record->begin_ns, record->end_ns,
|
||||
record->device_id, record->queue_id);
|
||||
|
||||
auto _is_kernel = _kernels.find(record->correlation_id) != _kernels.end();
|
||||
if(_is_kernel && record->device_id != 0 && record->queue_id != 0)
|
||||
{
|
||||
// these are overheads associated with the kernel launch, not kernel runtime
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
continue;
|
||||
}
|
||||
|
||||
auto kitr =
|
||||
(get_use_perfetto()) ? _keys.find(record->correlation_id) : _keys.end();
|
||||
if(kitr != _keys.end())
|
||||
{
|
||||
if(_kernel_names.find(kitr->second) == _kernel_names.end())
|
||||
_kernel_names.emplace(kitr->second, tim::demangle(kitr->second));
|
||||
TRACE_EVENT_BEGIN(
|
||||
"device",
|
||||
perfetto::StaticString{ _kernel_names.at(kitr->second).c_str() },
|
||||
record->begin_ns);
|
||||
TRACE_EVENT_END("device", record->end_ns);
|
||||
_indexes.emplace(kitr->first);
|
||||
}
|
||||
|
||||
auto itr =
|
||||
(get_use_timemory()) ? _data.find(record->correlation_id) : _data.end();
|
||||
if(itr != _data.end())
|
||||
{
|
||||
itr->second.start()
|
||||
.store(std::plus<double>{},
|
||||
static_cast<double>(record->end_ns - record->begin_ns))
|
||||
.stop();
|
||||
_indexes.emplace(itr->first);
|
||||
}
|
||||
// code
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
for(auto& itr : _indexes)
|
||||
_keys.erase(itr);
|
||||
}
|
||||
|
||||
if(get_use_timemory())
|
||||
{
|
||||
for(auto& itr : _indexes)
|
||||
_data.erase(itr);
|
||||
}
|
||||
|
||||
HOSTTRACE_DEBUG("[%s] recorded %lu phases\n", __FUNCTION__,
|
||||
(unsigned long) _indexes.size());
|
||||
}
|
||||
|
||||
bool&
|
||||
roctracer_is_setup()
|
||||
{
|
||||
static bool _v = false;
|
||||
return _v;
|
||||
}
|
||||
|
||||
using roctracer_functions_t = std::vector<std::pair<std::string, std::function<void()>>>;
|
||||
|
||||
auto&
|
||||
roctracer_setup_routines()
|
||||
{
|
||||
static auto _v = roctracer_functions_t{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
roctracer_tear_down_routines()
|
||||
{
|
||||
static auto _v = roctracer_functions_t{};
|
||||
return _v;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
#if !defined(HOSTTRACE_ROCTRACER_LIBKFDWRAPPER)
|
||||
# define HOSTTRACE_ROCTRACER_LIBKFDWRAPPER "/opt/rocm/roctracer/lib/libkfdwrapper64.so"
|
||||
#endif
|
||||
|
||||
struct dynamic_library
|
||||
{
|
||||
dynamic_library() = delete;
|
||||
dynamic_library(const dynamic_library&) = delete;
|
||||
dynamic_library(dynamic_library&&) noexcept = default;
|
||||
dynamic_library& operator=(const dynamic_library&) = delete;
|
||||
dynamic_library& operator=(dynamic_library&&) noexcept = default;
|
||||
|
||||
dynamic_library(const char* _env, const char* _fname,
|
||||
int _flags = (RTLD_NOW | RTLD_GLOBAL), bool _store = false)
|
||||
: envname{ _env }
|
||||
, filename{ tim::get_env<std::string>(_env, _fname, _store) }
|
||||
, flags{ _flags }
|
||||
{
|
||||
handle = dlopen(filename.c_str(), flags);
|
||||
if(!handle) fprintf(stderr, "%s\n", dlerror());
|
||||
dlerror(); // Clear any existing error
|
||||
}
|
||||
|
||||
~dynamic_library()
|
||||
{
|
||||
if(handle) dlclose(handle);
|
||||
}
|
||||
|
||||
std::string envname = {};
|
||||
std::string filename = {};
|
||||
int flags = 0;
|
||||
void* handle = nullptr;
|
||||
};
|
||||
|
||||
namespace tim
|
||||
{
|
||||
namespace component
|
||||
{
|
||||
void
|
||||
roctracer::preinit()
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
roctracer_data::label() = "roctracer";
|
||||
roctracer_data::description() = "ROCm tracer (activity API)";
|
||||
}
|
||||
|
||||
bool
|
||||
roctracer::is_setup()
|
||||
{
|
||||
return roctracer_is_setup();
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::add_setup(const std::string& _lbl, std::function<void()>&& _func)
|
||||
{
|
||||
roctracer_setup_routines().emplace_back(_lbl, std::move(_func));
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::add_tear_down(const std::string& _lbl, std::function<void()>&& _func)
|
||||
{
|
||||
roctracer_tear_down_routines().emplace_back(_lbl, std::move(_func));
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::remove_setup(const std::string& _lbl)
|
||||
{
|
||||
auto& _data = roctracer_setup_routines();
|
||||
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
|
||||
{
|
||||
if(itr->first == _lbl)
|
||||
{
|
||||
_data.erase(itr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::remove_tear_down(const std::string& _lbl)
|
||||
{
|
||||
auto& _data = roctracer_setup_routines();
|
||||
for(auto itr = _data.begin(); itr != _data.end(); ++itr)
|
||||
{
|
||||
if(itr->first == _lbl)
|
||||
{
|
||||
_data.erase(itr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::setup()
|
||||
{
|
||||
if(!get_use_timemory() && !get_use_perfetto()) return;
|
||||
|
||||
auto_lock_t _lk{ type_mutex<roctracer>() };
|
||||
if(roctracer_is_setup()) return;
|
||||
roctracer_is_setup() = true;
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
|
||||
tim::set_env("HSA_TOOLS_LIB", "libhosttrace.so", 0);
|
||||
|
||||
auto _kfdwrapper = dynamic_library{ "HOSTTRACE_ROCTRACER_LIBKFDWRAPPER",
|
||||
HOSTTRACE_ROCTRACER_LIBKFDWRAPPER };
|
||||
|
||||
ROCTRACER_CALL(roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, nullptr));
|
||||
|
||||
if(roctracer_default_pool() == nullptr)
|
||||
{
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x1000;
|
||||
properties.buffer_callback_fun = hip_activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
}
|
||||
|
||||
// Enable API callbacks, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, nullptr));
|
||||
// Enable activity tracing, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_activity());
|
||||
|
||||
// callback for HSA
|
||||
for(auto& itr : roctracer_setup_routines())
|
||||
itr.second();
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::tear_down()
|
||||
{
|
||||
auto_lock_t _lk{ type_mutex<roctracer>() };
|
||||
if(!roctracer_is_setup()) return;
|
||||
roctracer_is_setup() = false;
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
|
||||
// flush all the activity
|
||||
if(roctracer_default_pool() != nullptr)
|
||||
{
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
}
|
||||
// flush all buffers
|
||||
roctracer_flush_buf();
|
||||
|
||||
// callback for hsa
|
||||
for(auto& itr : roctracer_tear_down_routines())
|
||||
itr.second();
|
||||
|
||||
// Disable tracing and closing the pool
|
||||
ROCTRACER_CALL(roctracer_disable_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::start()
|
||||
{
|
||||
if(tracker_type::start() == 0) setup();
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::stop()
|
||||
{
|
||||
if(tracker_type::stop() == 0) tear_down();
|
||||
}
|
||||
} // namespace component
|
||||
} // namespace tim
|
||||
|
||||
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer, false, void)
|
||||
TIMEMORY_INSTANTIATE_EXTERN_COMPONENT(roctracer_data, true, double)
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C"
|
||||
{
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) TIMEMORY_VISIBILITY("default");
|
||||
void OnUnload() TIMEMORY_VISIBILITY("default");
|
||||
|
||||
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
tim::consume_parameters(table, runtime_version, failed_tool_count,
|
||||
failed_tool_names);
|
||||
|
||||
// ONLOAD_TRACE_BEG();
|
||||
// on_exit(exit_handler, nullptr);
|
||||
|
||||
auto _setup = [=]() {
|
||||
get_hsa_timer() =
|
||||
std::make_unique<hsa_timer_t>(table->core_->hsa_system_get_info_fn);
|
||||
|
||||
// const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
const char* output_prefix = nullptr;
|
||||
|
||||
// App begin timestamp begin_ts_file.txt
|
||||
// begin_ts_file_handle = open_output_file(output_prefix,
|
||||
// "begin_ts_file.txt"); const timestamp_t app_start_time =
|
||||
// timer->timestamp_fn_ns(); fprintf(begin_ts_file_handle, "%lu\n",
|
||||
// app_start_time);
|
||||
|
||||
bool trace_hsa_api = tim::get_env("HOSTTRACE_ROCTRACER_HSA_API", true);
|
||||
std::vector<std::string> hsa_api_vec = tim::delimit(
|
||||
tim::get_env<std::string>("HOSTTRACE_ROCTRACER_HSA_API_TYPES", ""));
|
||||
|
||||
// Enable HSA API callbacks/activity
|
||||
if(trace_hsa_api)
|
||||
{
|
||||
// hsa_api_file_handle = open_output_file(output_prefix,
|
||||
// "hsa_api_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*) table);
|
||||
|
||||
HOSTTRACE_DEBUG(" HSA-trace(");
|
||||
if(!hsa_api_vec.empty())
|
||||
{
|
||||
for(unsigned i = 0; i < hsa_api_vec.size(); ++i)
|
||||
{
|
||||
uint32_t cid = HSA_API_ID_NUMBER;
|
||||
const char* api = hsa_api_vec[i].c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api,
|
||||
&cid, nullptr));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(
|
||||
ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr));
|
||||
|
||||
HOSTTRACE_DEBUG(" %s", api);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(
|
||||
ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr));
|
||||
}
|
||||
HOSTTRACE_DEBUG("\n");
|
||||
}
|
||||
|
||||
bool trace_hsa_activity =
|
||||
tim::get_env("HOSTTRACE_ROCTRACER_HSA_ACTIVITY", true);
|
||||
// Enable HSA GPU activity
|
||||
if(trace_hsa_activity)
|
||||
{
|
||||
// initialize HSA tracing
|
||||
roctracer::hsa_ops_properties_t ops_properties{
|
||||
table,
|
||||
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
|
||||
nullptr, output_prefix
|
||||
};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
HOSTTRACE_DEBUG(" HSA-activity-trace()\n");
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS,
|
||||
HSA_OP_ID_COPY));
|
||||
}
|
||||
};
|
||||
|
||||
auto _tear_down = []() {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
|
||||
ROCTRACER_CALL(
|
||||
roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
|
||||
};
|
||||
|
||||
if(comp::roctracer::is_setup()) _setup();
|
||||
|
||||
comp::roctracer::add_setup("hsa", std::move(_setup));
|
||||
comp::roctracer::add_tear_down("hsa", std::move(_tear_down));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// HSA-runtime on-unload method
|
||||
void OnUnload()
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
// ONLOAD_TRACE("");
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,10 @@ if(NOT HOSTTRACE_DYNINST_API_RT_DIR AND HOSTTRACE_DYNINST_API_RT)
|
||||
DIRECTORY)
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED NUM_PROCS)
|
||||
set(NUM_PROCS 2)
|
||||
endif()
|
||||
|
||||
if(HOSTTRACE_BUILD_DYNINST)
|
||||
set(HOSTTRACE_DYNINST_API_RT_DIR
|
||||
"${PROJECT_BINARY_DIR}/external/dyninst/dyninstAPI_RT:${PROJECT_BINARY_DIR}/external/dyninst/dyninstAPI"
|
||||
@@ -16,8 +20,8 @@ set(_test_environment
|
||||
)
|
||||
|
||||
if(TARGET transpose)
|
||||
if(TRANSPOSE_USE_MPI)
|
||||
set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2)
|
||||
if(TRANSPOSE_USE_MPI AND NUM_PROCS GREATER 0)
|
||||
set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NUM_PROCS})
|
||||
endif()
|
||||
|
||||
add_test(
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren