diff --git a/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml b/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml index cb444539e2..3bfdbcbe49 100644 --- a/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml +++ b/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml @@ -468,9 +468,17 @@ jobs: shell: bash run: | git config --global --add safe.directory '*' - apt-get install -y cmake libgtest-dev python3-pip libasan8 libtsan2 + apt-get install -y cmake libgtest-dev python3-pip libasan8 libtsan2 software-properties-common python3 -m pip install -r requirements.txt python3 -m pip install pytest + add-apt-repository ppa:ubuntu-toolchain-r/test + apt-get update + apt-get install -y g++-13 + update-alternatives --install $(which gcc) gcc $(which gcc-13) 100 --slave $(which g++) g++ $(which g++-13) + realpath $(which gcc) + realpath $(which g++) + gcc --version + g++ --version - name: Configure, Build, and Test timeout-minutes: 45 diff --git a/projects/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/CMakeLists.txt index 02f4903d0d..082c4b9211 100644 --- a/projects/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/CMakeLists.txt @@ -72,6 +72,11 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${PROJECT_SOURCE_DIR}/cmake/Mo include(GNUInstallDirs) # install directories set(CMAKE_INSTALL_LIBDIR "lib") # rocm doesn't use lib64 +set(CMAKE_BUILD_RPATH "${PROJECT_BINARY_DIR}/lib:\$ORIGIN:\$ORIGIN/../lib") +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH + OFF + CACHE BOOL "") +mark_as_advanced(CMAKE_INSTALL_RPATH_USE_LINK_PATH) set(ROCPROFILER_INTERNAL_BUILD_DOCS OFF diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_linting.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_linting.cmake index f2e93d6273..9e0103722c 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_linting.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_linting.cmake @@ -26,6 +26,9 @@ macro(ROCPROFILER_ACTIVATE_CLANG_TIDY) "ROCPROFILER_ENABLE_CLANG_TIDY is ON but clang-tidy is not found!") endif() + rocprofiler_add_feature(ROCPROFILER_CLANG_TIDY_COMMAND + "path to clang-tidy executable") + set(CMAKE_CXX_CLANG_TIDY ${ROCPROFILER_CLANG_TIDY_COMMAND} -header-filter=${PROJECT_SOURCE_DIR}/source/.* diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_memcheck.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_memcheck.cmake index 92a34d6583..9484ad5d10 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_memcheck.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_memcheck.cmake @@ -2,7 +2,7 @@ # # set(ROCPROFILER_MEMCHECK_TYPES "ThreadSanitizer" "AddressSanitizer" "LeakSanitizer" - "MemorySanitizer" "UndefinedBehaviorSanitizer") + "UndefinedBehaviorSanitizer") if(ROCPROFILER_MEMCHECK AND NOT ROCPROFILER_MEMCHECK IN_LIST ROCPROFILER_MEMCHECK_TYPES) message( @@ -13,30 +13,52 @@ endif() set_property(CACHE ROCPROFILER_MEMCHECK PROPERTY STRINGS "${ROCPROFILER_MEMCHECK_TYPES}") -function(rocprofiler_add_memcheck_flags _TYPE) +function(rocprofiler_add_memcheck_flags _TYPE _FLAG _LIB_BASE) target_compile_options( rocprofiler-memcheck INTERFACE $) + -fno-optimize-sibling-calls -fno-inline-functions -fsanitize=${_FLAG}>) target_link_options(rocprofiler-memcheck INTERFACE - $) + $) + + if(NOT EXISTS ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp) + file(MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp") + endif() + + execute_process( + COMMAND ${PROJECT_SOURCE_DIR}/source/scripts/deduce-sanitizer-lib.sh + lib${_LIB_BASE} ${CMAKE_CXX_COMPILER} -fsanitize=${_FLAG} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp + RESULT_VARIABLE _DEDUCE_RET + ERROR_VARIABLE _DEDUCE_ERR + OUTPUT_VARIABLE _DEDUCE_OUT + OUTPUT_STRIP_TRAILING_WHITESPACE) + + if(_DEDUCE_RET EQUAL 0 AND EXISTS "${_DEDUCE_OUT}") + set(${_TYPE}_LIBRARY + "${_DEDUCE_OUT}" + CACHE FILEPATH "Linked library when compiled with -fsanitize=${_FLAG}") + endif() endfunction() function(rocprofiler_set_memcheck_env _TYPE _LIB_BASE) - set(_LIBS ${_LIB_BASE}) + if(NOT ${_TYPE}_LIBRARY) + set(_LIBS ${_LIB_BASE}) - foreach(_N ${ARGN} 6 5 4 3 2 1 0) - list( - APPEND _LIBS - ${CMAKE_SHARED_LIBRARY_PREFIX}${_LIB_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}.${_N} - ) - endforeach() + foreach(_N ${ARGN} 6 5 4 3 2 1 0) + list( + APPEND + _LIBS + ${CMAKE_SHARED_LIBRARY_PREFIX}${_LIB_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}.${_N} + ) + endforeach() - foreach(_LIB ${_LIBS}) - if(NOT ${_TYPE}_LIBRARY) - find_library(${_TYPE}_LIBRARY NAMES ${_LIB}) - endif() - endforeach() + foreach(_LIB ${_LIBS}) + if(NOT ${_TYPE}_LIBRARY) + find_library(${_TYPE}_LIBRARY NAMES ${_LIB}) + endif() + endforeach() + endif() target_link_libraries(rocprofiler-memcheck INTERFACE ${_LIB_BASE}) @@ -50,19 +72,29 @@ endfunction() # always unset so that it doesn't preload if memcheck disabled unset(ROCPROFILER_MEMCHECK_PRELOAD_ENV CACHE) +# the soversions below are fallbacks in case deduce-sanitizer-lib.sh fails +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION + VERSION_GREATER_EQUAL "13.0.0") + set(ThreadSanitizer_SOVERSION 2) + set(AddressSanitizer_SOVERSION 8) +else() + set(ThreadSanitizer_SOVERSION 0) + set(AddressSanitizer_SOVERSION 6) +endif() + if(ROCPROFILER_MEMCHECK STREQUAL "AddressSanitizer") - rocprofiler_add_memcheck_flags("address") - rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "asan") + rocprofiler_add_memcheck_flags("${ROCPROFILER_MEMCHECK}" "address" "asan") + rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "asan" + ${AddressSanitizer_SOVERSION}) elseif(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer") - rocprofiler_add_memcheck_flags("leak") + rocprofiler_add_memcheck_flags("${ROCPROFILER_MEMCHECK}" "leak" "lsan") rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "lsan") -elseif(ROCPROFILER_MEMCHECK STREQUAL "MemorySanitizer") - rocprofiler_add_memcheck_flags("memory") elseif(ROCPROFILER_MEMCHECK STREQUAL "ThreadSanitizer") - rocprofiler_add_memcheck_flags("thread") - rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "tsan" 0) + rocprofiler_add_memcheck_flags("${ROCPROFILER_MEMCHECK}" "thread" "tsan") + rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "tsan" + ${ThreadSanitizer_SOVERSION}) elseif(ROCPROFILER_MEMCHECK STREQUAL "UndefinedBehaviorSanitizer") - rocprofiler_add_memcheck_flags("undefined") + rocprofiler_add_memcheck_flags("${ROCPROFILER_MEMCHECK}" "undefined" "ubsan") rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "ubsan") elseif(NOT ROCPROFILER_MEMCHECK STREQUAL "") message(FATAL_ERROR "Unsupported ROCPROFILER_MEMCHECK type: ${ROCPROFILER_MEMCHECK}") diff --git a/projects/rocprofiler-sdk/external/CMakeLists.txt b/projects/rocprofiler-sdk/external/CMakeLists.txt index f0f07c6ca1..a7944339ed 100644 --- a/projects/rocprofiler-sdk/external/CMakeLists.txt +++ b/projects/rocprofiler-sdk/external/CMakeLists.txt @@ -7,10 +7,15 @@ include(rocprofiler_utilities) set(BUILD_TESTING OFF) set(BUILD_SHARED_LIBS OFF) +set(BUILD_OBJECT_LIBS OFF) # Specific to PTL +set(BUILD_STATIC_LIBS ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_CXX_VISIBILITY_PRESET "hidden") +set(CMAKE_VISIBILITY_INLINES_HIDDEN ON) # filesystem library if(ROCPROFILER_BUILD_GHC_FS) + # checkout submodule if not already checked out or clone repo if no .gitmodules file rocprofiler_checkout_git_submodule( RECURSIVE RELATIVE_PATH external/filesystem @@ -28,13 +33,8 @@ endif() if(ROCPROFILER_BUILD_TESTS) if(ROCPROFILER_BUILD_GTEST) - set(INSTALL_GTEST - OFF - CACHE BOOL "") - set(BUILD_GMOCK - OFF - CACHE BOOL "") - + # checkout submodule if not already checked out or clone repo if no .gitmodules + # file rocprofiler_checkout_git_submodule( RECURSIVE RELATIVE_PATH external/googletest @@ -42,6 +42,8 @@ if(ROCPROFILER_BUILD_TESTS) REPO_URL https://github.com/google/googletest.git REPO_BRANCH "main") + set(BUILD_GMOCK OFF) + set(INSTALL_GTEST OFF) add_subdirectory(googletest EXCLUDE_FROM_ALL) if(NOT TARGET GTest::gtest) @@ -61,6 +63,7 @@ if(ROCPROFILER_BUILD_TESTS) target_link_libraries(rocprofiler-gtest INTERFACE GTest::gtest) endif() + # checkout submodule if not already checked out or clone repo if no .gitmodules file rocprofiler_checkout_git_submodule( RECURSIVE RELATIVE_PATH external/cereal @@ -124,6 +127,7 @@ else() endif() if(NOT TARGET PTL::ptl-static) + # checkout submodule if not already checked out or clone repo if no .gitmodules file rocprofiler_checkout_git_submodule( RELATIVE_PATH external/ptl WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} @@ -135,35 +139,7 @@ if(NOT TARGET PTL::ptl-static) set(PTL_USE_LOCKS ON) set(PTL_BUILD_EXAMPLES OFF) set(PTL_DEVELOPER_INSTALL OFF) - - if(NOT DEFINED BUILD_OBJECT_LIBS) - set(BUILD_OBJECT_LIBS OFF) - endif() - - if(NOT DEFINED BUILD_STATIC_LIBS) - set(BUILD_STATIC_LIBS OFF) - endif() - - rocprofiler_save_variables( - BUILD_CONFIG - VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS BUILD_OBJECT_LIBS - CMAKE_POSITION_INDEPENDENT_CODE CMAKE_CXX_VISIBILITY_PRESET - CMAKE_VISIBILITY_INLINES_HIDDEN) - - set(BUILD_SHARED_LIBS OFF) - set(BUILD_STATIC_LIBS ON) - set(BUILD_OBJECT_LIBS OFF) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) - set(CMAKE_CXX_VISIBILITY_PRESET "hidden") - set(CMAKE_VISIBILITY_INLINES_HIDDEN ON) - add_subdirectory(ptl EXCLUDE_FROM_ALL) - - rocprofiler_restore_variables( - BUILD_CONFIG - VARIABLES BUILD_SHARED_LIBS BUILD_STATIC_LIBS BUILD_OBJECT_LIBS - CMAKE_POSITION_INDEPENDENT_CODE CMAKE_CXX_VISIBILITY_PRESET - CMAKE_VISIBILITY_INLINES_HIDDEN) endif() # doxygen-awesome diff --git a/projects/rocprofiler-sdk/samples/CMakeLists.txt b/projects/rocprofiler-sdk/samples/CMakeLists.txt index f185b55af2..4787374dac 100644 --- a/projects/rocprofiler-sdk/samples/CMakeLists.txt +++ b/projects/rocprofiler-sdk/samples/CMakeLists.txt @@ -12,6 +12,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "") CACHE STRING "Build type" FORCE) endif() +include(GNUInstallDirs) + +# always use lib instead of lib64 +set(CMAKE_INSTALL_LIBDIR "lib") + enable_testing() include(CTest) diff --git a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp index 596626fdf2..57beb1eb15 100644 --- a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp +++ b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp @@ -149,7 +149,8 @@ get_buffer_tracing_names() [](rocprofiler_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) { auto* name_info_v = static_cast(data_v); - if(kindv == ROCPROFILER_BUFFER_TRACING_HSA_API) + if(kindv == ROCPROFILER_BUFFER_TRACING_HSA_API || + kindv == ROCPROFILER_BUFFER_TRACING_HIP_API) { const char* name = nullptr; ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name( @@ -171,7 +172,7 @@ get_buffer_tracing_names() "query buffer tracing kind operation name"); if(name) name_info_v->kind_names[kind] = name; - if(kind == ROCPROFILER_BUFFER_TRACING_HSA_API) + if(kind == ROCPROFILER_BUFFER_TRACING_HSA_API || kind == ROCPROFILER_BUFFER_TRACING_HIP_API) { ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kind_operations( kind, tracing_kind_operation_cb, static_cast(data)), @@ -282,6 +283,33 @@ tool_tracing_callback(rocprofiler_context_id_t context, static_cast(user_data)->emplace_back( source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_HIP_API) + { + auto* record = + static_cast(header->payload); + auto info = std::stringstream{}; + info << "tid=" << record->thread_id << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle + << ", cid=" << record->correlation_id.internal + << ", extern_cid=" << record->correlation_id.external.value + << ", kind=" << record->kind << ", operation=" << record->operation + << ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp + << ", name=" << client_name_info.operation_names[record->kind][record->operation]; + + if(record->start_timestamp > record->end_timestamp) + { + auto msg = std::stringstream{}; + msg << "hip api: start > end (" << record->start_timestamp << " > " + << record->end_timestamp + << "). diff = " << (record->start_timestamp - record->end_timestamp); + std::cerr << "threw an exception " << msg.str() << "\n" << std::flush; + // throw std::runtime_error{msg.str()}; + } + + static_cast(user_data)->emplace_back( + source_location{__FUNCTION__, __FILE__, __LINE__, info.str()}); + } else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) { @@ -421,6 +449,10 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) client_ctx, ROCPROFILER_BUFFER_TRACING_HSA_API, nullptr, 0, client_buffer), "buffer tracing service configure"); + ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + client_ctx, ROCPROFILER_BUFFER_TRACING_HIP_API, nullptr, 0, client_buffer), + "buffer tracing service configure"); + ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service( client_ctx, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, client_buffer), diff --git a/projects/rocprofiler-sdk/samples/api_callback_tracing/client.cpp b/projects/rocprofiler-sdk/samples/api_callback_tracing/client.cpp index 22b65320e1..5fe963ba84 100644 --- a/projects/rocprofiler-sdk/samples/api_callback_tracing/client.cpp +++ b/projects/rocprofiler-sdk/samples/api_callback_tracing/client.cpp @@ -137,7 +137,8 @@ get_callback_id_names() [](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) { auto* name_info_v = static_cast(data_v); - if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API) + if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API || + kindv == ROCPROFILER_CALLBACK_TRACING_HIP_API) { const char* name = nullptr; ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name( @@ -159,7 +160,8 @@ get_callback_id_names() "query callback tracing kind operation name"); if(name) name_info_v->kind_names[kind] = name; - if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API) + if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API || + kind == ROCPROFILER_CALLBACK_TRACING_HIP_API) { ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations( kind, tracing_kind_operation_cb, static_cast(data)), @@ -270,6 +272,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) tool_data), "callback tracing service failed to configure"); + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(client_ctx, + ROCPROFILER_CALLBACK_TRACING_HIP_API, + nullptr, + 0, + tool_tracing_callback, + tool_data), + "callback tracing service failed to configure"); + int valid_ctx = 0; ROCPROFILER_CALL(rocprofiler_context_is_valid(client_ctx, &valid_ctx), "failure checking context validity"); diff --git a/projects/rocprofiler-sdk/samples/api_callback_tracing/main.cpp b/projects/rocprofiler-sdk/samples/api_callback_tracing/main.cpp index cc780e3a6f..3c15e0f6d5 100644 --- a/projects/rocprofiler-sdk/samples/api_callback_tracing/main.cpp +++ b/projects/rocprofiler-sdk/samples/api_callback_tracing/main.cpp @@ -39,9 +39,10 @@ { \ auto _hip_api_print_lk = auto_lock_t{print_lock}; \ fprintf(stderr, \ - "%s:%d :: HIP error : %s\n", \ + "%s:%d :: HIP error %i: %s\n", \ __FILE__, \ __LINE__, \ + (int) error_, \ hipGetErrorString(error_)); \ throw std::runtime_error("hip_api_call"); \ } \ diff --git a/projects/rocprofiler-sdk/source/bin/rocprofv3 b/projects/rocprofiler-sdk/source/bin/rocprofv3 index 68d8ba6037..34537b87f1 100755 --- a/projects/rocprofiler-sdk/source/bin/rocprofv3 +++ b/projects/rocprofiler-sdk/source/bin/rocprofv3 @@ -23,6 +23,9 @@ usage() { echo -e "${GREEN}--hsa-trace ${RESET} For Collecting HSA API Traces" echo -e "${GREEN}--kernel-trace ${RESET} For Collecting Kernel Dispatch Traces" echo -e "${GREEN}--memory-copy-trace ${RESET} For Collecting Memory Copy Traces" + echo -e "${GREEN}--marker-trace ${RESET} For Collecting Marker (ROCTx) Traces" + echo -e "${GREEN}--hip-trace ${RESET} For Collecting HIP Runtime Traces" + echo -e "${GREEN}--hip-compiler-trace ${RESET} For Collecting HIP Compiler generated code Traces" echo -e "${GREEN}-o | --output-file ${RESET} For the output file name" echo -e "\t#${GREY} usage e.g:(with current dir): rocprofv3 --hsa-trace -o " echo -e "\t#${GREY} usage e.g:(with custom dir): rocprofv3 --hsa-trace -d -o ${RESET}\n" @@ -89,6 +92,15 @@ while true; do elif [ "$1" == "--memory-copy-trace" ]; then export ROCPROF_MEMORY_COPY_TRACE=1 shift + elif [ "$1" == "--marker-trace" ]; then + export ROCPROF_MARKER_API_TRACE=1 + shift + elif [ "$1" == "--hip-trace" ]; then + export ROCPROF_HIP_API_TRACE=1 + shift + elif [ "$1" == "--hip-compiler-trace" ]; then + export ROCPROF_HIP_COMPILER_API_TRACE=1 + shift elif [ "$1" == "--" ]; then shift break diff --git a/projects/rocprofiler-sdk/source/docs/CMakeLists.txt b/projects/rocprofiler-sdk/source/docs/CMakeLists.txt index 50f90370a8..68f5571ab9 100644 --- a/projects/rocprofiler-sdk/source/docs/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/docs/CMakeLists.txt @@ -18,6 +18,8 @@ if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda.sh) ${PROJECT_BINARY_DIR}/external/miniconda.sh) endif() +find_program(SHELL_CMD NAME bash sh REQUIRED) + function(DOCS_EXECUTE_PROCESS) string(REPLACE ";" " " _MSG "${ARGN}") message(STATUS "[rocprofiler][docs] Executing: ${_MSG}") @@ -27,17 +29,19 @@ function(DOCS_EXECUTE_PROCESS) RESULT_VARIABLE _RET OUTPUT_VARIABLE _OUT ERROR_VARIABLE _ERR - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/external COMMAND_ERROR_IS_FATAL ANY) + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/external) if(NOT _RET EQUAL 0) message(STATUS "docs command failed: ${_RET}") message(STATUS "stderr:\n${_ERR}") message(STATUS "stdout:\n${_OUT}") + string(REPLACE ";" " " _CMD "${ARGN}") + message(WARNING "command failure: ${_CMD}") endif() endfunction() if(NOT EXISTS ${PROJECT_BINARY_DIR}/external/miniconda) - docs_execute_process(/bin/bash ${PROJECT_BINARY_DIR}/external/miniconda.sh -b -p + docs_execute_process(${SHELL_CMD} ${PROJECT_BINARY_DIR}/external/miniconda.sh -b -p ${PROJECT_BINARY_DIR}/external/miniconda) docs_execute_process(${PROJECT_BINARY_DIR}/external/miniconda/bin/conda config --set always_yes yes) @@ -52,16 +56,18 @@ endif() file( WRITE "${CMAKE_CURRENT_BINARY_DIR}/build-docs.sh" - "#!/bin/bash -e + "#!${SHELL_CMD} -e + +PATH=${PROJECT_BINARY_DIR}/external/miniconda/bin:\${PATH} +export PATH -export PATH=${PROJECT_BINARY_DIR}/external/miniconda/bin:\${PATH} source activate conda activate rocprofiler-docs ${PROJECT_SOURCE_DIR}/source/scripts/update-docs.sh 1> /dev/null rm -r ${PROJECT_SOURCE_DIR}/build-docs ") -add_custom_target(docs ALL /bin/bash ${CMAKE_CURRENT_BINARY_DIR}/build-docs.sh) +add_custom_target(docs ALL ${SHELL_CMD} ${CMAKE_CURRENT_BINARY_DIR}/build-docs.sh) install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/_build/html/ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h index 28c34b211c..64bf3de2e7 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/callback_tracing.h @@ -72,6 +72,16 @@ typedef struct rocprofiler_hip_api_retval_t retval; } rocprofiler_callback_tracing_hip_api_data_t; +/** + * @brief ROCProfiler HIP API Tracer Callback Data. + */ +typedef struct +{ + uint64_t size; ///< size of this struct + rocprofiler_hip_compiler_api_args_t args; + rocprofiler_hip_compiler_api_retval_t retval; +} rocprofiler_callback_tracing_hip_compiler_api_data_t; + /** * @brief ROCProfiler Marker Tracer Callback Data. */ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h index ad36cc4a9d..29975b24cc 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h @@ -127,11 +127,12 @@ typedef enum // NOLINT(performance-enum-size) typedef enum // NOLINT(performance-enum-size) { ROCPROFILER_CALLBACK_TRACING_NONE = 0, - ROCPROFILER_CALLBACK_TRACING_HSA_API, ///< Callbacks for HSA functions - ROCPROFILER_CALLBACK_TRACING_HIP_API, ///< Callbacks for HIP functions - ROCPROFILER_CALLBACK_TRACING_MARKER_API, ///< Callbacks for ROCTx functions - ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< Callbacks for code object info - ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, ///< Callbacks for kernel dispatches + ROCPROFILER_CALLBACK_TRACING_HSA_API, ///< Callbacks for HSA functions + ROCPROFILER_CALLBACK_TRACING_HIP_API, ///< Callbacks for HIP functions + ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, ///< Callbacks for HIP compiler functions + ROCPROFILER_CALLBACK_TRACING_MARKER_API, ///< Callbacks for ROCTx functions + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ///< Callbacks for code object info + ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, ///< Callbacks for kernel dispatches ROCPROFILER_CALLBACK_TRACING_LAST, } rocprofiler_callback_tracing_kind_t; @@ -143,6 +144,7 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_TRACING_NONE = 0, ROCPROFILER_BUFFER_TRACING_HSA_API, ///< Buffer HSA function calls ROCPROFILER_BUFFER_TRACING_HIP_API, ///< Buffer HIP function calls + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, ///< Buffer HIP compiler function calls ROCPROFILER_BUFFER_TRACING_MARKER_API, ///< Buffer ROCTx function calls ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, ///< Buffer memory copy info ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, ///< Buffer kernel dispatch info @@ -220,11 +222,13 @@ typedef enum // NOLINT(performance-enum-size) */ typedef enum { - ROCPROFILER_LIBRARY = (1 << 0), - ROCPROFILER_HSA_LIBRARY = (1 << 1), - ROCPROFILER_HIP_LIBRARY = (1 << 2), - ROCPROFILER_MARKER_LIBRARY = (1 << 3), - ROCPROFILER_LIBRARY_LAST = ROCPROFILER_MARKER_LIBRARY, + ROCPROFILER_LIBRARY = (1 << 0), + ROCPROFILER_HSA_LIBRARY = (1 << 1), + ROCPROFILER_HIP_LIBRARY = (1 << 2), + ROCPROFILER_HIP_RUNTIME_LIBRARY = ROCPROFILER_HIP_LIBRARY, + ROCPROFILER_MARKER_LIBRARY = (1 << 3), + ROCPROFILER_HIP_COMPILER_LIBRARY = (1 << 4), + ROCPROFILER_LIBRARY_LAST = ROCPROFILER_HIP_COMPILER_LIBRARY, } rocprofiler_runtime_library_t; //--------------------------------------------------------------------------------------// diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip.h index ba0af78f44..b74fd08f8a 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip.h @@ -24,3 +24,6 @@ #include #include +#include +#include +#include diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/CMakeLists.txt index 83209cfde2..c331a92dd5 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/CMakeLists.txt @@ -3,7 +3,8 @@ # Installation of public HIP headers # # -set(ROCPROFILER_HIP_HEADER_FILES api_args.h api_id.h) +set(ROCPROFILER_HIP_HEADER_FILES api_args.h api_id.h compiler_api_args.h + compiler_api_id.h table_api_id.h) install( FILES ${ROCPROFILER_HIP_HEADER_FILES} diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h index 365518447d..b613d75035 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_args.h @@ -22,68 +22,100 @@ #pragma once +#include + #include +#include +#include +// must be included after +#include typedef union rocprofiler_hip_api_retval_u { - int int_retval; - const char* const_charp_retval; - hipError_t hipError_t_retval; + int int_retval; + const char* const_charp_retval; + hipError_t hipError_t_retval; + hipChannelFormatDesc hipChannelFormatDesc_retval; } rocprofiler_hip_api_retval_t; typedef union rocprofiler_hip_api_args_u { struct { - dim3* gridDim; - dim3* blockDim; - size_t* sharedMem; - hipStream_t* stream; - } __hipPopCallConfiguration; + uint32_t id; + } hipApiName; struct { - dim3 gridDim; - dim3 blockDim; - size_t sharedMem; - hipStream_t stream; - } __hipPushCallConfiguration; - struct - { - hipArray** array; + hipArray_t* array; const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray; } hipArray3DCreate; struct { HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor; - hipArray* array; + hipArray_t array; } hipArray3DGetDescriptor; struct { - hipArray** pHandle; + hipArray_t* pHandle; const HIP_ARRAY_DESCRIPTOR* pAllocateArray; } hipArrayCreate; struct { - hipArray* array; + hipArray_t array; } hipArrayDestroy; struct { HIP_ARRAY_DESCRIPTOR* pArrayDescriptor; - hipArray* array; + hipArray_t array; } hipArrayGetDescriptor; struct { hipChannelFormatDesc* desc; hipExtent* extent; unsigned int* flags; - hipArray* array; + hipArray_t array; } hipArrayGetInfo; struct { - int* device; - const hipDeviceProp_t* prop; + size_t* offset; + const textureReference* tex; + const void* devPtr; + const hipChannelFormatDesc* desc; + size_t size; + } hipBindTexture; + struct + { + size_t* offset; + const textureReference* tex; + const void* devPtr; + const hipChannelFormatDesc* desc; + size_t width; + size_t height; + size_t pitch; + } hipBindTexture2D; + struct + { + const textureReference* tex; + hipArray_const_t array; + const hipChannelFormatDesc* desc; + } hipBindTextureToArray; + struct + { + const textureReference* tex; + hipMipmappedArray_const_t mipmappedArray; + const hipChannelFormatDesc* desc; + } hipBindTextureToMipmappedArray; + struct + { + int* device; + const hipDeviceProp_tR0600* prop; } hipChooseDevice; struct + { + int* device; + const hipDeviceProp_tR0000* prop; + } hipChooseDeviceR0000; + struct { dim3 gridDim; dim3 blockDim; @@ -96,6 +128,13 @@ typedef union rocprofiler_hip_api_args_u const hipResourceDesc* pResDesc; } hipCreateSurfaceObject; struct + { + hipTextureObject_t* pTexObject; + const hipResourceDesc* pResDesc; + const hipTextureDesc* pTexDesc; + const struct hipResourceViewDesc* pResViewDesc; + } hipCreateTextureObject; + struct { hipCtx_t* ctx; unsigned int flags; @@ -160,6 +199,9 @@ typedef union rocprofiler_hip_api_args_u hipSharedMemConfig config; } hipCtxSetSharedMemConfig; struct + { + } hipCtxSynchronize; + struct { hipExternalMemory_t extMem; } hipDestroyExternalMemory; @@ -172,6 +214,10 @@ typedef union rocprofiler_hip_api_args_u hipSurfaceObject_t surfaceObject; } hipDestroySurfaceObject; struct + { + hipTextureObject_t textureObject; + } hipDestroyTextureObject; + struct { int* canAccessPeer; int deviceId; @@ -295,6 +341,9 @@ typedef union rocprofiler_hip_api_args_u unsigned int flags; } hipDevicePrimaryCtxSetFlags; struct + { + } hipDeviceReset; + struct { hipFuncCache_t cacheConfig; } hipDeviceSetCacheConfig; @@ -319,6 +368,9 @@ typedef union rocprofiler_hip_api_args_u hipSharedMemConfig config; } hipDeviceSetSharedMemConfig; struct + { + } hipDeviceSynchronize; + struct { size_t* bytes; hipDevice_t device; @@ -328,6 +380,25 @@ typedef union rocprofiler_hip_api_args_u int* driverVersion; } hipDriverGetVersion; struct + { + hipError_t hipError; + const char** errorString; + } hipDrvGetErrorName; + struct + { + hipError_t hipError; + const char** errorString; + } hipDrvGetErrorString; + struct + { + hipGraphNode_t* phGraphNode; + hipGraph_t hGraph; + const hipGraphNode_t* dependencies; + size_t numDependencies; + const HIP_MEMCPY3D* copyParams; + hipCtx_t ctx; + } hipDrvGraphAddMemcpyNode; + struct { const hip_Memcpy2D* pCopy; } hipDrvMemcpy2DUnaligned; @@ -353,8 +424,8 @@ typedef union rocprofiler_hip_api_args_u } hipEventCreate; struct { - hipEvent_t* event; - unsigned int flags; + hipEvent_t* event; + unsigned flags; } hipEventCreateWithFlags; struct { @@ -381,10 +452,10 @@ typedef union rocprofiler_hip_api_args_u } hipEventSynchronize; struct { - int device1; - int device2; - unsigned int* linktype; - unsigned int* hopcount; + int device1; + int device2; + uint32_t* linktype; + uint32_t* hopcount; } hipExtGetLinkTypeAndHopCount; struct { @@ -412,32 +483,15 @@ typedef union rocprofiler_hip_api_args_u } hipExtMallocWithFlags; struct { - hipFunction_t f; - unsigned int globalWorkSizeX; - unsigned int globalWorkSizeY; - unsigned int globalWorkSizeZ; - unsigned int localWorkSizeX; - unsigned int localWorkSizeY; - unsigned int localWorkSizeZ; - size_t sharedMemBytes; - hipStream_t hStream; - void** kernelParams; - void** extra; - hipEvent_t startEvent; - hipEvent_t stopEvent; - unsigned int flags; - } hipExtModuleLaunchKernel; - struct - { - hipStream_t* stream; - unsigned int cuMaskSize; - const unsigned int* cuMask; + hipStream_t* stream; + uint32_t cuMaskSize; + const uint32_t* cuMask; } hipExtStreamCreateWithCUMask; struct { - hipStream_t stream; - unsigned int cuMaskSize; - unsigned int* cuMask; + hipStream_t stream; + uint32_t cuMaskSize; + uint32_t* cuMask; } hipExtStreamGetCUMask; struct { @@ -451,7 +505,7 @@ typedef union rocprofiler_hip_api_args_u } hipFree; struct { - hipArray* array; + hipArray_t array; } hipFreeArray; struct { @@ -474,8 +528,8 @@ typedef union rocprofiler_hip_api_args_u } hipFuncGetAttribute; struct { - hipFuncAttributes* attr; - const void* func; + struct hipFuncAttributes* attr; + const void* func; } hipFuncGetAttributes; struct { @@ -519,9 +573,25 @@ typedef union rocprofiler_hip_api_args_u } hipGetDeviceFlags; struct { - hipDeviceProp_t* props; - hipDevice_t device; - } hipGetDeviceProperties; + hipDeviceProp_tR0600* prop; + int deviceId; + } hipGetDevicePropertiesR0600; + struct + { + hipDeviceProp_tR0000* prop; + int deviceId; + } hipGetDevicePropertiesR0000; + struct + { + hipError_t hip_error; + } hipGetErrorName; + struct + { + hipError_t hipError; + } hipGetErrorString; + struct + { + } hipGetLastError; struct { hipArray_t* levelArray; @@ -539,6 +609,31 @@ typedef union rocprofiler_hip_api_args_u const void* symbol; } hipGetSymbolSize; struct + { + size_t* offset; + const textureReference* texref; + } hipGetTextureAlignmentOffset; + struct + { + hipResourceDesc* pResDesc; + hipTextureObject_t textureObject; + } hipGetTextureObjectResourceDesc; + struct + { + struct hipResourceViewDesc* pResViewDesc; + hipTextureObject_t textureObject; + } hipGetTextureObjectResourceViewDesc; + struct + { + hipTextureDesc* pTexDesc; + hipTextureObject_t textureObject; + } hipGetTextureObjectTextureDesc; + struct + { + const textureReference** texref; + const void* symbol; + } hipGetTextureReference; + struct { hipGraphNode_t* pGraphNode; hipGraph_t graph; @@ -1024,22 +1119,6 @@ typedef union rocprofiler_hip_api_args_u hipGraphicsResource_t resource; } hipGraphicsUnregisterResource; struct - { - hipFunction_t f; - unsigned int globalWorkSizeX; - unsigned int globalWorkSizeY; - unsigned int globalWorkSizeZ; - unsigned int blockDimX; - unsigned int blockDimY; - unsigned int blockDimZ; - size_t sharedMemBytes; - hipStream_t hStream; - void** kernelParams; - void** extra; - hipEvent_t startEvent; - hipEvent_t stopEvent; - } hipHccModuleLaunchKernel; - struct { void** ptr; size_t size; @@ -1116,8 +1195,17 @@ typedef union rocprofiler_hip_api_args_u unsigned int flags; } hipIpcOpenMemHandle; struct + { + hipFunction_t f; + } hipKernelNameRef; + struct { const void* hostFunction; + hipStream_t stream; + } hipKernelNameRefByPtr; + struct + { + const void* func; } hipLaunchByPtr; struct { @@ -1161,14 +1249,14 @@ typedef union rocprofiler_hip_api_args_u } hipMalloc3D; struct { - hipArray_t* array; - const hipChannelFormatDesc* desc; - hipExtent extent; - unsigned int flags; + hipArray_t* array; + const struct hipChannelFormatDesc* desc; + struct hipExtent extent; + unsigned int flags; } hipMalloc3DArray; struct { - hipArray** array; + hipArray_t* array; const hipChannelFormatDesc* desc; size_t width; size_t height; @@ -1200,11 +1288,11 @@ typedef union rocprofiler_hip_api_args_u } hipMallocManaged; struct { - hipMipmappedArray_t* mipmappedArray; - const hipChannelFormatDesc* desc; - hipExtent extent; - unsigned int numLevels; - unsigned int flags; + hipMipmappedArray_t* mipmappedArray; + const struct hipChannelFormatDesc* desc; + struct hipExtent extent; + unsigned int numLevels; + unsigned int flags; } hipMallocMipmappedArray; struct { @@ -1474,7 +1562,7 @@ typedef union rocprofiler_hip_api_args_u } hipMemcpy2DFromArrayAsync; struct { - hipArray* dst; + hipArray_t dst; size_t wOffset; size_t hOffset; const void* src; @@ -1485,7 +1573,7 @@ typedef union rocprofiler_hip_api_args_u } hipMemcpy2DToArray; struct { - hipArray* dst; + hipArray_t dst; size_t wOffset; size_t hOffset; const void* src; @@ -1497,12 +1585,12 @@ typedef union rocprofiler_hip_api_args_u } hipMemcpy2DToArrayAsync; struct { - const hipMemcpy3DParms* p; + const struct hipMemcpy3DParms* p; } hipMemcpy3D; struct { - const hipMemcpy3DParms* p; - hipStream_t stream; + const struct hipMemcpy3DParms* p; + hipStream_t stream; } hipMemcpy3DAsync; struct { @@ -1514,10 +1602,10 @@ typedef union rocprofiler_hip_api_args_u } hipMemcpyAsync; struct { - void* dst; - hipArray* srcArray; - size_t srcOffset; - size_t count; + void* dst; + hipArray_t srcArray; + size_t srcOffset; + size_t count; } hipMemcpyAtoH; struct { @@ -1573,7 +1661,7 @@ typedef union rocprofiler_hip_api_args_u } hipMemcpyFromSymbolAsync; struct { - hipArray* dstArray; + hipArray_t dstArray; size_t dstOffset; const void* srcHost; size_t count; @@ -1619,7 +1707,7 @@ typedef union rocprofiler_hip_api_args_u } hipMemcpyPeerAsync; struct { - hipArray* dst; + hipArray_t dst; size_t wOffset; size_t hOffset; const void* src; @@ -1817,7 +1905,7 @@ typedef union rocprofiler_hip_api_args_u const void* image; unsigned int numOptions; hipJitOption* options; - void** optionsValues; + void** optionValues; } hipModuleLoadDataEx; struct { @@ -1860,14 +1948,14 @@ typedef union rocprofiler_hip_api_args_u int* numBlocks; const void* f; int blockSize; - size_t dynamicSMemSize; + size_t dynSharedMemPerBlk; } hipOccupancyMaxActiveBlocksPerMultiprocessor; struct { int* numBlocks; const void* f; int blockSize; - size_t dynamicSMemSize; + size_t dynSharedMemPerBlk; unsigned int flags; } hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; struct @@ -1879,6 +1967,9 @@ typedef union rocprofiler_hip_api_args_u int blockSizeLimit; } hipOccupancyMaxPotentialBlockSize; struct + { + } hipPeekAtLastError; + struct { void* data; hipPointer_attribute attribute; @@ -1896,6 +1987,12 @@ typedef union rocprofiler_hip_api_args_u hipDeviceptr_t ptr; } hipPointerSetAttribute; struct + { + } hipProfilerStart; + struct + { + } hipProfilerStop; + struct { int* runtimeVersion; } hipRuntimeGetVersion; @@ -1905,7 +2002,7 @@ typedef union rocprofiler_hip_api_args_u } hipSetDevice; struct { - unsigned int flags; + unsigned flags; } hipSetDeviceFlags; struct { @@ -2023,9 +2120,9 @@ typedef union rocprofiler_hip_api_args_u { hipStream_t stream; void* ptr; - unsigned int value; + uint32_t value; unsigned int flags; - unsigned int mask; + uint32_t mask; } hipStreamWaitValue32; struct { @@ -2039,7 +2136,7 @@ typedef union rocprofiler_hip_api_args_u { hipStream_t stream; void* ptr; - unsigned int value; + uint32_t value; unsigned int flags; } hipStreamWriteValue32; struct @@ -2050,11 +2147,48 @@ typedef union rocprofiler_hip_api_args_u unsigned int flags; } hipStreamWriteValue64; struct + { + hipTextureObject_t* pTexObject; + const HIP_RESOURCE_DESC* pResDesc; + const HIP_TEXTURE_DESC* pTexDesc; + const HIP_RESOURCE_VIEW_DESC* pResViewDesc; + } hipTexObjectCreate; + struct + { + hipTextureObject_t texObject; + } hipTexObjectDestroy; + struct + { + HIP_RESOURCE_DESC* pResDesc; + hipTextureObject_t texObject; + } hipTexObjectGetResourceDesc; + struct + { + HIP_RESOURCE_VIEW_DESC* pResViewDesc; + hipTextureObject_t texObject; + } hipTexObjectGetResourceViewDesc; + struct + { + HIP_TEXTURE_DESC* pTexDesc; + hipTextureObject_t texObject; + } hipTexObjectGetTextureDesc; + struct { hipDeviceptr_t* dev_ptr; const textureReference* texRef; } hipTexRefGetAddress; struct + { + enum hipTextureAddressMode* pam; + const textureReference* texRef; + int dim; + } hipTexRefGetAddressMode; + struct + { + enum hipTextureFilterMode* pfm; + const textureReference* texRef; + } hipTexRefGetFilterMode; + struct { unsigned int* pFlags; const textureReference* texRef; @@ -2076,6 +2210,11 @@ typedef union rocprofiler_hip_api_args_u const textureReference* texRef; } hipTexRefGetMipMappedArray; struct + { + enum hipTextureFilterMode* pfm; + const textureReference* texRef; + } hipTexRefGetMipmapFilterMode; + struct { float* pbias; const textureReference* texRef; @@ -2101,6 +2240,12 @@ typedef union rocprofiler_hip_api_args_u size_t Pitch; } hipTexRefSetAddress2D; struct + { + textureReference* texRef; + int dim; + enum hipTextureAddressMode am; + } hipTexRefSetAddressMode; + struct { textureReference* tex; hipArray_const_t array; @@ -2112,6 +2257,11 @@ typedef union rocprofiler_hip_api_args_u float* pBorderColor; } hipTexRefSetBorderColor; struct + { + textureReference* texRef; + enum hipTextureFilterMode fm; + } hipTexRefSetFilterMode; + struct { textureReference* texRef; unsigned int Flags; @@ -2128,6 +2278,11 @@ typedef union rocprofiler_hip_api_args_u unsigned int maxAniso; } hipTexRefSetMaxAnisotropy; struct + { + textureReference* texRef; + enum hipTextureFilterMode fm; + } hipTexRefSetMipmapFilterMode; + struct { textureReference* texRef; float bias; @@ -2140,15 +2295,19 @@ typedef union rocprofiler_hip_api_args_u } hipTexRefSetMipmapLevelClamp; struct { - textureReference* texRef; - hipMipmappedArray* mipmappedArray; - unsigned int Flags; + textureReference* texRef; + struct hipMipmappedArray* mipmappedArray; + unsigned int Flags; } hipTexRefSetMipmappedArray; struct { hipStreamCaptureMode* mode; } hipThreadExchangeStreamCaptureMode; struct + { + const textureReference* tex; + } hipUnbindTexture; + struct { hipUserObject_t* object_out; void* ptr; @@ -2173,4 +2332,330 @@ typedef union rocprofiler_hip_api_args_u unsigned int numExtSems; hipStream_t stream; } hipWaitExternalSemaphoresAsync; + struct + { + int x; + int y; + int z; + int w; + hipChannelFormatKind f; + } hipCreateChannelDesc; + struct + { + hipFunction_t f; + uint32_t globalWorkSizeX; + uint32_t globalWorkSizeY; + uint32_t globalWorkSizeZ; + uint32_t localWorkSizeX; + uint32_t localWorkSizeY; + uint32_t localWorkSizeZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void** extra; + hipEvent_t startEvent; + hipEvent_t stopEvent; + uint32_t flags; + } hipExtModuleLaunchKernel; + struct + { + hipFunction_t f; + uint32_t globalWorkSizeX; + uint32_t globalWorkSizeY; + uint32_t globalWorkSizeZ; + uint32_t localWorkSizeX; + uint32_t localWorkSizeY; + uint32_t localWorkSizeZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void** extra; + hipEvent_t startEvent; + hipEvent_t stopEvent; + } hipHccModuleLaunchKernel; + struct + { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + } hipMemcpy_spt; + struct + { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyToSymbol_spt; + struct + { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyFromSymbol_spt; + struct + { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2D_spt; + struct + { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffset; + size_t hOffset; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DFromArray_spt; + struct + { + const struct hipMemcpy3DParms* p; + } hipMemcpy3D_spt; + struct + { + void* dst; + int value; + size_t sizeBytes; + } hipMemset_spt; + struct + { + void* dst; + int value; + size_t sizeBytes; + hipStream_t stream; + } hipMemsetAsync_spt; + struct + { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + } hipMemset2D_spt; + struct + { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemset2DAsync_spt; + struct + { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + hipStream_t stream; + } hipMemset3DAsync_spt; + struct + { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + } hipMemset3D_spt; + struct + { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyAsync_spt; + struct + { + const hipMemcpy3DParms* p; + hipStream_t stream; + } hipMemcpy3DAsync_spt; + struct + { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DAsync_spt; + struct + { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyFromSymbolAsync_spt; + struct + { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyToSymbolAsync_spt; + struct + { + void* dst; + hipArray_const_t src; + size_t wOffsetSrc; + size_t hOffset; + size_t count; + hipMemcpyKind kind; + } hipMemcpyFromArray_spt; + struct + { + hipArray_t dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DToArray_spt; + struct + { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffsetSrc; + size_t hOffsetSrc; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DFromArrayAsync_spt; + struct + { + hipArray_t dst; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DToArrayAsync_spt; + struct + { + hipStream_t stream; + } hipStreamQuery_spt; + struct + { + hipStream_t stream; + } hipStreamSynchronize_spt; + struct + { + hipStream_t stream; + int* priority; + } hipStreamGetPriority_spt; + struct + { + hipStream_t stream; + hipEvent_t event; + unsigned int flags; + } hipStreamWaitEvent_spt; + struct + { + hipStream_t stream; + unsigned int* flags; + } hipStreamGetFlags_spt; + struct + { + hipStream_t stream; + hipStreamCallback_t callback; + void* userData; + unsigned int flags; + } hipStreamAddCallback_spt; + struct + { + hipEvent_t event; + hipStream_t stream; + } hipEventRecord_spt; + struct + { + const void* f; + dim3 gridDim; + dim3 blockDim; + void** kernelParams; + uint32_t sharedMemBytes; + hipStream_t hStream; + } hipLaunchCooperativeKernel_spt; + struct + { + const void* function_address; + dim3 numBlocks; + dim3 dimBlocks; + void** args; + size_t sharedMemBytes; + hipStream_t stream; + } hipLaunchKernel_spt; + struct + { + hipGraphExec_t graphExec; + hipStream_t stream; + } hipGraphLaunch_spt; + struct + { + hipStream_t stream; + hipStreamCaptureMode mode; + } hipStreamBeginCapture_spt; + struct + { + hipStream_t stream; + hipGraph_t* pGraph; + } hipStreamEndCapture_spt; + struct + { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + } hipStreamIsCapturing_spt; + struct + { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + unsigned long long* pId; + } hipStreamGetCaptureInfo_spt; + struct + { + hipStream_t stream; + hipStreamCaptureStatus* captureStatus_out; + unsigned long long* id_out; + hipGraph_t* graph_out; + const hipGraphNode_t** dependencies_out; + size_t* numDependencies_out; + } hipStreamGetCaptureInfo_v2_spt; + struct + { + hipStream_t stream; + hipHostFn_t fn; + void* userData; + } hipLaunchHostFunc_spt; + struct + { + hipStream_t stream; + } hipGetStreamDeviceId; + // struct + // { + // hipGraphNode_t* phGraphNode; + // hipGraph_t hGraph; + // const hipGraphNode_t* dependencies; + // size_t numDependencies; + // const HIP_MEMSET_NODE_PARAMS* memsetParams; + // hipCtx_t ctx; + // } hipDrvGraphAddMemsetNode; } rocprofiler_hip_api_args_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_id.h index 2563230425..d11d0d4a91 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_id.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/api_id.h @@ -27,14 +27,23 @@ */ typedef enum // NOLINT(performance-enum-size) { - ROCPROFILER_HIP_API_ID_NONE = -1, - ROCPROFILER_HIP_API_ID___hipPopCallConfiguration = 0, - ROCPROFILER_HIP_API_ID___hipPushCallConfiguration, + ROCPROFILER_HIP_API_ID_NONE = -1, + ROCPROFILER_HIP_API_ID_hipApiName = 0, ROCPROFILER_HIP_API_ID_hipArray3DCreate, + ROCPROFILER_HIP_API_ID_hipArray3DGetDescriptor, ROCPROFILER_HIP_API_ID_hipArrayCreate, ROCPROFILER_HIP_API_ID_hipArrayDestroy, + ROCPROFILER_HIP_API_ID_hipArrayGetDescriptor, + ROCPROFILER_HIP_API_ID_hipArrayGetInfo, + ROCPROFILER_HIP_API_ID_hipBindTexture, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipBindTexture2D, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipBindTextureToArray, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipBindTextureToMipmappedArray, // deprecated or removed ROCPROFILER_HIP_API_ID_hipChooseDevice, + ROCPROFILER_HIP_API_ID_hipChooseDeviceR0000, ROCPROFILER_HIP_API_ID_hipConfigureCall, + ROCPROFILER_HIP_API_ID_hipCreateSurfaceObject, + ROCPROFILER_HIP_API_ID_hipCreateTextureObject, // deprecated or removed ROCPROFILER_HIP_API_ID_hipCtxCreate, ROCPROFILER_HIP_API_ID_hipCtxDestroy, ROCPROFILER_HIP_API_ID_hipCtxDisablePeerAccess, @@ -53,6 +62,8 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipCtxSynchronize, ROCPROFILER_HIP_API_ID_hipDestroyExternalMemory, ROCPROFILER_HIP_API_ID_hipDestroyExternalSemaphore, + ROCPROFILER_HIP_API_ID_hipDestroySurfaceObject, + ROCPROFILER_HIP_API_ID_hipDestroyTextureObject, // deprecated or removed ROCPROFILER_HIP_API_ID_hipDeviceCanAccessPeer, ROCPROFILER_HIP_API_ID_hipDeviceComputeCapability, ROCPROFILER_HIP_API_ID_hipDeviceDisablePeerAccess, @@ -61,12 +72,17 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipDeviceGetAttribute, ROCPROFILER_HIP_API_ID_hipDeviceGetByPCIBusId, ROCPROFILER_HIP_API_ID_hipDeviceGetCacheConfig, + ROCPROFILER_HIP_API_ID_hipDeviceGetDefaultMemPool, + ROCPROFILER_HIP_API_ID_hipDeviceGetGraphMemAttribute, ROCPROFILER_HIP_API_ID_hipDeviceGetLimit, + ROCPROFILER_HIP_API_ID_hipDeviceGetMemPool, ROCPROFILER_HIP_API_ID_hipDeviceGetName, ROCPROFILER_HIP_API_ID_hipDeviceGetP2PAttribute, ROCPROFILER_HIP_API_ID_hipDeviceGetPCIBusId, ROCPROFILER_HIP_API_ID_hipDeviceGetSharedMemConfig, ROCPROFILER_HIP_API_ID_hipDeviceGetStreamPriorityRange, + ROCPROFILER_HIP_API_ID_hipDeviceGetUuid, + ROCPROFILER_HIP_API_ID_hipDeviceGraphMemTrim, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxGetState, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxRelease, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxReset, @@ -74,13 +90,20 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxSetFlags, ROCPROFILER_HIP_API_ID_hipDeviceReset, ROCPROFILER_HIP_API_ID_hipDeviceSetCacheConfig, + ROCPROFILER_HIP_API_ID_hipDeviceSetGraphMemAttribute, + ROCPROFILER_HIP_API_ID_hipDeviceSetLimit, + ROCPROFILER_HIP_API_ID_hipDeviceSetMemPool, ROCPROFILER_HIP_API_ID_hipDeviceSetSharedMemConfig, ROCPROFILER_HIP_API_ID_hipDeviceSynchronize, ROCPROFILER_HIP_API_ID_hipDeviceTotalMem, - ROCPROFILER_HIP_API_ID_RESERVED_50, + ROCPROFILER_HIP_API_ID_hipDriverGetVersion, + ROCPROFILER_HIP_API_ID_hipDrvGetErrorName, + ROCPROFILER_HIP_API_ID_hipDrvGetErrorString, + ROCPROFILER_HIP_API_ID_hipDrvGraphAddMemcpyNode, ROCPROFILER_HIP_API_ID_hipDrvMemcpy2DUnaligned, ROCPROFILER_HIP_API_ID_hipDrvMemcpy3D, ROCPROFILER_HIP_API_ID_hipDrvMemcpy3DAsync, + ROCPROFILER_HIP_API_ID_hipDrvPointerGetAttributes, ROCPROFILER_HIP_API_ID_hipEventCreate, ROCPROFILER_HIP_API_ID_hipEventCreateWithFlags, ROCPROFILER_HIP_API_ID_hipEventDestroy, @@ -92,12 +115,12 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipExtLaunchKernel, ROCPROFILER_HIP_API_ID_hipExtLaunchMultiKernelMultiDevice, ROCPROFILER_HIP_API_ID_hipExtMallocWithFlags, - ROCPROFILER_HIP_API_ID_hipExtModuleLaunchKernel, ROCPROFILER_HIP_API_ID_hipExtStreamCreateWithCUMask, ROCPROFILER_HIP_API_ID_hipExtStreamGetCUMask, ROCPROFILER_HIP_API_ID_hipExternalMemoryGetMappedBuffer, ROCPROFILER_HIP_API_ID_hipFree, ROCPROFILER_HIP_API_ID_hipFreeArray, + ROCPROFILER_HIP_API_ID_hipFreeAsync, ROCPROFILER_HIP_API_ID_hipFreeHost, ROCPROFILER_HIP_API_ID_hipFreeMipmappedArray, ROCPROFILER_HIP_API_ID_hipFuncGetAttribute, @@ -105,17 +128,99 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipFuncSetAttribute, ROCPROFILER_HIP_API_ID_hipFuncSetCacheConfig, ROCPROFILER_HIP_API_ID_hipFuncSetSharedMemConfig, + ROCPROFILER_HIP_API_ID_hipGLGetDevices, + ROCPROFILER_HIP_API_ID_hipGetChannelDesc, ROCPROFILER_HIP_API_ID_hipGetDevice, ROCPROFILER_HIP_API_ID_hipGetDeviceCount, ROCPROFILER_HIP_API_ID_hipGetDeviceFlags, - ROCPROFILER_HIP_API_ID_hipGetDeviceProperties, - ROCPROFILER_HIP_API_ID_RESERVED_82, + ROCPROFILER_HIP_API_ID_hipGetDevicePropertiesR0600, + ROCPROFILER_HIP_API_ID_hipGetDevicePropertiesR0000, + ROCPROFILER_HIP_API_ID_hipGetErrorName, ROCPROFILER_HIP_API_ID_hipGetErrorString, ROCPROFILER_HIP_API_ID_hipGetLastError, ROCPROFILER_HIP_API_ID_hipGetMipmappedArrayLevel, ROCPROFILER_HIP_API_ID_hipGetSymbolAddress, ROCPROFILER_HIP_API_ID_hipGetSymbolSize, - ROCPROFILER_HIP_API_ID_hipHccModuleLaunchKernel, + ROCPROFILER_HIP_API_ID_hipGetTextureAlignmentOffset, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceDesc, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceViewDesc, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipGetTextureObjectTextureDesc, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipGetTextureReference, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipGraphAddChildGraphNode, + ROCPROFILER_HIP_API_ID_hipGraphAddDependencies, + ROCPROFILER_HIP_API_ID_hipGraphAddEmptyNode, + ROCPROFILER_HIP_API_ID_hipGraphAddEventRecordNode, + ROCPROFILER_HIP_API_ID_hipGraphAddEventWaitNode, + ROCPROFILER_HIP_API_ID_hipGraphAddHostNode, + ROCPROFILER_HIP_API_ID_hipGraphAddKernelNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemAllocNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemFreeNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode1D, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol, + ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeToSymbol, + ROCPROFILER_HIP_API_ID_hipGraphAddMemsetNode, + ROCPROFILER_HIP_API_ID_hipGraphChildGraphNodeGetGraph, + ROCPROFILER_HIP_API_ID_hipGraphClone, + ROCPROFILER_HIP_API_ID_hipGraphCreate, + ROCPROFILER_HIP_API_ID_hipGraphDebugDotPrint, + ROCPROFILER_HIP_API_ID_hipGraphDestroy, + ROCPROFILER_HIP_API_ID_hipGraphDestroyNode, + ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeGetEvent, + ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeGetEvent, + ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphExecChildGraphNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecDestroy, + ROCPROFILER_HIP_API_ID_hipGraphExecEventRecordNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphExecEventWaitNodeSetEvent, + ROCPROFILER_HIP_API_ID_hipGraphExecHostNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecKernelNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol, + ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol, + ROCPROFILER_HIP_API_ID_hipGraphExecMemsetNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphExecUpdate, + ROCPROFILER_HIP_API_ID_hipGraphGetEdges, + ROCPROFILER_HIP_API_ID_hipGraphGetNodes, + ROCPROFILER_HIP_API_ID_hipGraphGetRootNodes, + ROCPROFILER_HIP_API_ID_hipGraphHostNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphHostNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphInstantiate, + ROCPROFILER_HIP_API_ID_hipGraphInstantiateWithFlags, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeCopyAttributes, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetAttribute, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetAttribute, + ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphLaunch, + ROCPROFILER_HIP_API_ID_hipGraphMemAllocNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemFreeNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams1D, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol, + ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol, + ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeGetParams, + ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeSetParams, + ROCPROFILER_HIP_API_ID_hipGraphNodeFindInClone, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependencies, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependentNodes, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetEnabled, + ROCPROFILER_HIP_API_ID_hipGraphNodeGetType, + ROCPROFILER_HIP_API_ID_hipGraphNodeSetEnabled, + ROCPROFILER_HIP_API_ID_hipGraphReleaseUserObject, + ROCPROFILER_HIP_API_ID_hipGraphRemoveDependencies, + ROCPROFILER_HIP_API_ID_hipGraphRetainUserObject, + ROCPROFILER_HIP_API_ID_hipGraphUpload, + ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterBuffer, + ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterImage, + ROCPROFILER_HIP_API_ID_hipGraphicsMapResources, + ROCPROFILER_HIP_API_ID_hipGraphicsResourceGetMappedPointer, + ROCPROFILER_HIP_API_ID_hipGraphicsSubResourceGetMappedArray, + ROCPROFILER_HIP_API_ID_hipGraphicsUnmapResources, + ROCPROFILER_HIP_API_ID_hipGraphicsUnregisterResource, ROCPROFILER_HIP_API_ID_hipHostAlloc, ROCPROFILER_HIP_API_ID_hipHostFree, ROCPROFILER_HIP_API_ID_hipHostGetDevicePointer, @@ -131,27 +236,57 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipIpcGetMemHandle, ROCPROFILER_HIP_API_ID_hipIpcOpenEventHandle, ROCPROFILER_HIP_API_ID_hipIpcOpenMemHandle, + ROCPROFILER_HIP_API_ID_hipKernelNameRef, + ROCPROFILER_HIP_API_ID_hipKernelNameRefByPtr, ROCPROFILER_HIP_API_ID_hipLaunchByPtr, ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernel, ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernelMultiDevice, + ROCPROFILER_HIP_API_ID_hipLaunchHostFunc, ROCPROFILER_HIP_API_ID_hipLaunchKernel, ROCPROFILER_HIP_API_ID_hipMalloc, ROCPROFILER_HIP_API_ID_hipMalloc3D, ROCPROFILER_HIP_API_ID_hipMalloc3DArray, ROCPROFILER_HIP_API_ID_hipMallocArray, + ROCPROFILER_HIP_API_ID_hipMallocAsync, + ROCPROFILER_HIP_API_ID_hipMallocFromPoolAsync, ROCPROFILER_HIP_API_ID_hipMallocHost, ROCPROFILER_HIP_API_ID_hipMallocManaged, ROCPROFILER_HIP_API_ID_hipMallocMipmappedArray, ROCPROFILER_HIP_API_ID_hipMallocPitch, + ROCPROFILER_HIP_API_ID_hipMemAddressFree, + ROCPROFILER_HIP_API_ID_hipMemAddressReserve, ROCPROFILER_HIP_API_ID_hipMemAdvise, ROCPROFILER_HIP_API_ID_hipMemAllocHost, ROCPROFILER_HIP_API_ID_hipMemAllocPitch, + ROCPROFILER_HIP_API_ID_hipMemCreate, + ROCPROFILER_HIP_API_ID_hipMemExportToShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemGetAccess, ROCPROFILER_HIP_API_ID_hipMemGetAddressRange, + ROCPROFILER_HIP_API_ID_hipMemGetAllocationGranularity, + ROCPROFILER_HIP_API_ID_hipMemGetAllocationPropertiesFromHandle, ROCPROFILER_HIP_API_ID_hipMemGetInfo, + ROCPROFILER_HIP_API_ID_hipMemImportFromShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemMap, + ROCPROFILER_HIP_API_ID_hipMemMapArrayAsync, + ROCPROFILER_HIP_API_ID_hipMemPoolCreate, + ROCPROFILER_HIP_API_ID_hipMemPoolDestroy, + ROCPROFILER_HIP_API_ID_hipMemPoolExportPointer, + ROCPROFILER_HIP_API_ID_hipMemPoolExportToShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemPoolGetAccess, + ROCPROFILER_HIP_API_ID_hipMemPoolGetAttribute, + ROCPROFILER_HIP_API_ID_hipMemPoolImportFromShareableHandle, + ROCPROFILER_HIP_API_ID_hipMemPoolImportPointer, + ROCPROFILER_HIP_API_ID_hipMemPoolSetAccess, + ROCPROFILER_HIP_API_ID_hipMemPoolSetAttribute, + ROCPROFILER_HIP_API_ID_hipMemPoolTrimTo, ROCPROFILER_HIP_API_ID_hipMemPrefetchAsync, ROCPROFILER_HIP_API_ID_hipMemPtrGetInfo, ROCPROFILER_HIP_API_ID_hipMemRangeGetAttribute, ROCPROFILER_HIP_API_ID_hipMemRangeGetAttributes, + ROCPROFILER_HIP_API_ID_hipMemRelease, + ROCPROFILER_HIP_API_ID_hipMemRetainAllocationHandle, + ROCPROFILER_HIP_API_ID_hipMemSetAccess, + ROCPROFILER_HIP_API_ID_hipMemUnmap, ROCPROFILER_HIP_API_ID_hipMemcpy, ROCPROFILER_HIP_API_ID_hipMemcpy2D, ROCPROFILER_HIP_API_ID_hipMemcpy2DAsync, @@ -193,9 +328,14 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipMemsetD32Async, ROCPROFILER_HIP_API_ID_hipMemsetD8, ROCPROFILER_HIP_API_ID_hipMemsetD8Async, + ROCPROFILER_HIP_API_ID_hipMipmappedArrayCreate, + ROCPROFILER_HIP_API_ID_hipMipmappedArrayDestroy, + ROCPROFILER_HIP_API_ID_hipMipmappedArrayGetLevel, ROCPROFILER_HIP_API_ID_hipModuleGetFunction, ROCPROFILER_HIP_API_ID_hipModuleGetGlobal, ROCPROFILER_HIP_API_ID_hipModuleGetTexRef, + ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernel, + ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice, ROCPROFILER_HIP_API_ID_hipModuleLaunchKernel, ROCPROFILER_HIP_API_ID_hipModuleLoad, ROCPROFILER_HIP_API_ID_hipModuleLoadData, @@ -209,228 +349,113 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, ROCPROFILER_HIP_API_ID_hipOccupancyMaxPotentialBlockSize, ROCPROFILER_HIP_API_ID_hipPeekAtLastError, + ROCPROFILER_HIP_API_ID_hipPointerGetAttribute, ROCPROFILER_HIP_API_ID_hipPointerGetAttributes, + ROCPROFILER_HIP_API_ID_hipPointerSetAttribute, ROCPROFILER_HIP_API_ID_hipProfilerStart, ROCPROFILER_HIP_API_ID_hipProfilerStop, - ROCPROFILER_HIP_API_ID_RESERVED_185, + ROCPROFILER_HIP_API_ID_hipRuntimeGetVersion, ROCPROFILER_HIP_API_ID_hipSetDevice, ROCPROFILER_HIP_API_ID_hipSetDeviceFlags, ROCPROFILER_HIP_API_ID_hipSetupArgument, ROCPROFILER_HIP_API_ID_hipSignalExternalSemaphoresAsync, ROCPROFILER_HIP_API_ID_hipStreamAddCallback, ROCPROFILER_HIP_API_ID_hipStreamAttachMemAsync, + ROCPROFILER_HIP_API_ID_hipStreamBeginCapture, ROCPROFILER_HIP_API_ID_hipStreamCreate, ROCPROFILER_HIP_API_ID_hipStreamCreateWithFlags, ROCPROFILER_HIP_API_ID_hipStreamCreateWithPriority, ROCPROFILER_HIP_API_ID_hipStreamDestroy, + ROCPROFILER_HIP_API_ID_hipStreamEndCapture, + ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo, + ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_v2, + ROCPROFILER_HIP_API_ID_hipStreamGetDevice, ROCPROFILER_HIP_API_ID_hipStreamGetFlags, ROCPROFILER_HIP_API_ID_hipStreamGetPriority, + ROCPROFILER_HIP_API_ID_hipStreamIsCapturing, ROCPROFILER_HIP_API_ID_hipStreamQuery, ROCPROFILER_HIP_API_ID_hipStreamSynchronize, + ROCPROFILER_HIP_API_ID_hipStreamUpdateCaptureDependencies, ROCPROFILER_HIP_API_ID_hipStreamWaitEvent, ROCPROFILER_HIP_API_ID_hipStreamWaitValue32, ROCPROFILER_HIP_API_ID_hipStreamWaitValue64, ROCPROFILER_HIP_API_ID_hipStreamWriteValue32, ROCPROFILER_HIP_API_ID_hipStreamWriteValue64, - ROCPROFILER_HIP_API_ID_hipWaitExternalSemaphoresAsync, - ROCPROFILER_HIP_API_ID_hipCreateSurfaceObject, - ROCPROFILER_HIP_API_ID_hipDestroySurfaceObject, - ROCPROFILER_HIP_API_ID_hipGraphAddKernelNode, - ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode, - ROCPROFILER_HIP_API_ID_hipGraphAddMemsetNode, - ROCPROFILER_HIP_API_ID_hipGraphCreate, - ROCPROFILER_HIP_API_ID_hipGraphDestroy, - ROCPROFILER_HIP_API_ID_hipGraphExecDestroy, - ROCPROFILER_HIP_API_ID_hipGraphInstantiate, - ROCPROFILER_HIP_API_ID_hipGraphLaunch, - ROCPROFILER_HIP_API_ID_hipMipmappedArrayCreate, - ROCPROFILER_HIP_API_ID_hipMipmappedArrayDestroy, - ROCPROFILER_HIP_API_ID_hipMipmappedArrayGetLevel, - ROCPROFILER_HIP_API_ID_hipStreamBeginCapture, - ROCPROFILER_HIP_API_ID_hipStreamEndCapture, + ROCPROFILER_HIP_API_ID_hipTexObjectCreate, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexObjectDestroy, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceDesc, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceViewDesc, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexObjectGetTextureDesc, // deprecated or removed ROCPROFILER_HIP_API_ID_hipTexRefGetAddress, + ROCPROFILER_HIP_API_ID_hipTexRefGetAddressMode, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexRefGetFilterMode, // deprecated or removed ROCPROFILER_HIP_API_ID_hipTexRefGetFlags, ROCPROFILER_HIP_API_ID_hipTexRefGetFormat, ROCPROFILER_HIP_API_ID_hipTexRefGetMaxAnisotropy, ROCPROFILER_HIP_API_ID_hipTexRefGetMipMappedArray, + ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapFilterMode, // deprecated or removed ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapLevelBias, ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapLevelClamp, ROCPROFILER_HIP_API_ID_hipTexRefSetAddress, ROCPROFILER_HIP_API_ID_hipTexRefSetAddress2D, + ROCPROFILER_HIP_API_ID_hipTexRefSetAddressMode, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexRefSetArray, ROCPROFILER_HIP_API_ID_hipTexRefSetBorderColor, + ROCPROFILER_HIP_API_ID_hipTexRefSetFilterMode, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexRefSetFlags, ROCPROFILER_HIP_API_ID_hipTexRefSetFormat, ROCPROFILER_HIP_API_ID_hipTexRefSetMaxAnisotropy, + ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapFilterMode, // deprecated or removed + ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapLevelBias, ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapLevelClamp, ROCPROFILER_HIP_API_ID_hipTexRefSetMipmappedArray, - ROCPROFILER_HIP_API_ID_hipGLGetDevices, - ROCPROFILER_HIP_API_ID_hipGraphAddDependencies, - ROCPROFILER_HIP_API_ID_hipGraphAddEmptyNode, - ROCPROFILER_HIP_API_ID_hipGraphExecKernelNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphGetNodes, - ROCPROFILER_HIP_API_ID_hipGraphGetRootNodes, - ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetParams, - ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeGetParams, - ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeGetParams, - ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterBuffer, - ROCPROFILER_HIP_API_ID_hipGraphicsMapResources, - ROCPROFILER_HIP_API_ID_hipGraphicsResourceGetMappedPointer, - ROCPROFILER_HIP_API_ID_hipGraphicsUnmapResources, - ROCPROFILER_HIP_API_ID_hipGraphicsUnregisterResource, - ROCPROFILER_HIP_API_ID_hipGraphAddChildGraphNode, - ROCPROFILER_HIP_API_ID_hipGraphAddEventRecordNode, - ROCPROFILER_HIP_API_ID_hipGraphAddEventWaitNode, - ROCPROFILER_HIP_API_ID_hipGraphAddHostNode, - ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode1D, - ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol, - ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeToSymbol, - ROCPROFILER_HIP_API_ID_hipGraphChildGraphNodeGetGraph, - ROCPROFILER_HIP_API_ID_hipGraphClone, - ROCPROFILER_HIP_API_ID_hipGraphDestroyNode, - ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeGetEvent, - ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeSetEvent, - ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeGetEvent, - ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeSetEvent, - ROCPROFILER_HIP_API_ID_hipGraphExecChildGraphNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphExecEventRecordNodeSetEvent, - ROCPROFILER_HIP_API_ID_hipGraphExecEventWaitNodeSetEvent, - ROCPROFILER_HIP_API_ID_hipGraphExecHostNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D, - ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol, - ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol, - ROCPROFILER_HIP_API_ID_hipGraphExecMemsetNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphExecUpdate, - ROCPROFILER_HIP_API_ID_hipGraphGetEdges, - ROCPROFILER_HIP_API_ID_hipGraphHostNodeGetParams, - ROCPROFILER_HIP_API_ID_hipGraphHostNodeSetParams, - ROCPROFILER_HIP_API_ID_hipGraphInstantiateWithFlags, - ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams1D, - ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol, - ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol, - ROCPROFILER_HIP_API_ID_hipGraphNodeFindInClone, - ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependencies, - ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependentNodes, - ROCPROFILER_HIP_API_ID_hipGraphNodeGetType, - ROCPROFILER_HIP_API_ID_hipGraphRemoveDependencies, - ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo, - ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_v2, - ROCPROFILER_HIP_API_ID_hipStreamIsCapturing, - ROCPROFILER_HIP_API_ID_hipStreamUpdateCaptureDependencies, - ROCPROFILER_HIP_API_ID_hipDrvPointerGetAttributes, - ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterImage, - ROCPROFILER_HIP_API_ID_hipGraphicsSubResourceGetMappedArray, - ROCPROFILER_HIP_API_ID_hipPointerGetAttribute, - ROCPROFILER_HIP_API_ID_RESERVED_296, ROCPROFILER_HIP_API_ID_hipThreadExchangeStreamCaptureMode, - ROCPROFILER_HIP_API_ID_hipDeviceGetUuid, - ROCPROFILER_HIP_API_ID_hipGetChannelDesc, - ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetAttribute, - ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetAttribute, - ROCPROFILER_HIP_API_ID_hipLaunchHostFunc, - ROCPROFILER_HIP_API_ID_hipDeviceGetDefaultMemPool, - ROCPROFILER_HIP_API_ID_hipDeviceGetMemPool, - ROCPROFILER_HIP_API_ID_hipDeviceSetMemPool, - ROCPROFILER_HIP_API_ID_hipFreeAsync, - ROCPROFILER_HIP_API_ID_hipMallocAsync, - ROCPROFILER_HIP_API_ID_hipMallocFromPoolAsync, - ROCPROFILER_HIP_API_ID_hipMemPoolCreate, - ROCPROFILER_HIP_API_ID_hipMemPoolDestroy, - ROCPROFILER_HIP_API_ID_hipMemPoolExportPointer, - ROCPROFILER_HIP_API_ID_hipMemPoolExportToShareableHandle, - ROCPROFILER_HIP_API_ID_hipMemPoolGetAccess, - ROCPROFILER_HIP_API_ID_hipMemPoolGetAttribute, - ROCPROFILER_HIP_API_ID_hipMemPoolImportFromShareableHandle, - ROCPROFILER_HIP_API_ID_hipMemPoolImportPointer, - ROCPROFILER_HIP_API_ID_hipMemPoolSetAccess, - ROCPROFILER_HIP_API_ID_hipMemPoolSetAttribute, - ROCPROFILER_HIP_API_ID_hipMemPoolTrimTo, - ROCPROFILER_HIP_API_ID_hipMemAddressFree, - ROCPROFILER_HIP_API_ID_hipMemAddressReserve, - ROCPROFILER_HIP_API_ID_hipMemCreate, - ROCPROFILER_HIP_API_ID_hipMemExportToShareableHandle, - ROCPROFILER_HIP_API_ID_hipMemGetAccess, - ROCPROFILER_HIP_API_ID_hipMemGetAllocationGranularity, - ROCPROFILER_HIP_API_ID_hipMemGetAllocationPropertiesFromHandle, - ROCPROFILER_HIP_API_ID_hipMemImportFromShareableHandle, - ROCPROFILER_HIP_API_ID_hipMemMap, - ROCPROFILER_HIP_API_ID_hipMemMapArrayAsync, - ROCPROFILER_HIP_API_ID_hipMemRelease, - ROCPROFILER_HIP_API_ID_hipMemRetainAllocationHandle, - ROCPROFILER_HIP_API_ID_hipMemSetAccess, - ROCPROFILER_HIP_API_ID_hipMemUnmap, - ROCPROFILER_HIP_API_ID_hipDeviceSetGraphMemAttribute, - ROCPROFILER_HIP_API_ID_hipDeviceGetGraphMemAttribute, - ROCPROFILER_HIP_API_ID_hipDeviceGraphMemTrim, - ROCPROFILER_HIP_API_ID_hipDeviceSetLimit, - ROCPROFILER_HIP_API_ID_hipTexRefSetArray, - ROCPROFILER_HIP_API_ID_hipTexRefSetFlags, - ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapLevelBias, - ROCPROFILER_HIP_API_ID_hipDriverGetVersion, - ROCPROFILER_HIP_API_ID_hipGraphUpload, - ROCPROFILER_HIP_API_ID_hipRuntimeGetVersion, + ROCPROFILER_HIP_API_ID_hipUnbindTexture, // deprecated or removed ROCPROFILER_HIP_API_ID_hipUserObjectCreate, ROCPROFILER_HIP_API_ID_hipUserObjectRelease, ROCPROFILER_HIP_API_ID_hipUserObjectRetain, - ROCPROFILER_HIP_API_ID_hipGraphRetainUserObject, - ROCPROFILER_HIP_API_ID_hipGraphReleaseUserObject, - ROCPROFILER_HIP_API_ID_hipGraphDebugDotPrint, - ROCPROFILER_HIP_API_ID_hipGraphKernelNodeCopyAttributes, - ROCPROFILER_HIP_API_ID_hipGraphNodeGetEnabled, - ROCPROFILER_HIP_API_ID_hipGraphNodeSetEnabled, - ROCPROFILER_HIP_API_ID_hipPointerSetAttribute, - ROCPROFILER_HIP_API_ID_hipGraphAddMemAllocNode, - ROCPROFILER_HIP_API_ID_hipGraphAddMemFreeNode, - ROCPROFILER_HIP_API_ID_hipGraphMemAllocNodeGetParams, - ROCPROFILER_HIP_API_ID_hipGraphMemFreeNodeGetParams, - ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernel, - ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice, - ROCPROFILER_HIP_API_ID_hipArray3DGetDescriptor, - ROCPROFILER_HIP_API_ID_hipArrayGetDescriptor, - ROCPROFILER_HIP_API_ID_hipArrayGetInfo, - ROCPROFILER_HIP_API_ID_hipStreamGetDevice, + ROCPROFILER_HIP_API_ID_hipWaitExternalSemaphoresAsync, + ROCPROFILER_HIP_API_ID_hipCreateChannelDesc, + ROCPROFILER_HIP_API_ID_hipExtModuleLaunchKernel, + ROCPROFILER_HIP_API_ID_hipHccModuleLaunchKernel, + ROCPROFILER_HIP_API_ID_hipMemcpy_spt, + ROCPROFILER_HIP_API_ID_hipMemcpyToSymbol_spt, + ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbol_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy2D_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArray_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy3D_spt, + ROCPROFILER_HIP_API_ID_hipMemset_spt, + ROCPROFILER_HIP_API_ID_hipMemsetAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemset2D_spt, + ROCPROFILER_HIP_API_ID_hipMemset2DAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemset3DAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemset3D_spt, + ROCPROFILER_HIP_API_ID_hipMemcpyAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy3DAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy2DAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbolAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemcpyToSymbolAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemcpyFromArray_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy2DToArray_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArrayAsync_spt, + ROCPROFILER_HIP_API_ID_hipMemcpy2DToArrayAsync_spt, + ROCPROFILER_HIP_API_ID_hipStreamQuery_spt, + ROCPROFILER_HIP_API_ID_hipStreamSynchronize_spt, + ROCPROFILER_HIP_API_ID_hipStreamGetPriority_spt, + ROCPROFILER_HIP_API_ID_hipStreamWaitEvent_spt, + ROCPROFILER_HIP_API_ID_hipStreamGetFlags_spt, + ROCPROFILER_HIP_API_ID_hipStreamAddCallback_spt, + ROCPROFILER_HIP_API_ID_hipEventRecord_spt, + ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernel_spt, + ROCPROFILER_HIP_API_ID_hipLaunchKernel_spt, + ROCPROFILER_HIP_API_ID_hipGraphLaunch_spt, + ROCPROFILER_HIP_API_ID_hipStreamBeginCapture_spt, + ROCPROFILER_HIP_API_ID_hipStreamEndCapture_spt, + ROCPROFILER_HIP_API_ID_hipStreamIsCapturing_spt, + ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_spt, + ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_v2_spt, + ROCPROFILER_HIP_API_ID_hipLaunchHostFunc_spt, + ROCPROFILER_HIP_API_ID_hipGetStreamDeviceId, + // ROCPROFILER_HIP_API_ID_hipDrvGraphAddMemsetNode, ROCPROFILER_HIP_API_ID_LAST, - // - // Deprecated or removed - // - ROCPROFILER_HIP_API_ID_hipBindTexture = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipBindTexture2D = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipBindTextureToArray = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipBindTextureToMipmappedArray = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipCreateTextureObject = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipDestroyTextureObject = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipDeviceGetCount = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipGetTextureAlignmentOffset = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceDesc = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceViewDesc = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipGetTextureObjectTextureDesc = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipGetTextureReference = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpy2DArrayToArray = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyArrayToArray = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyAtoA = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyAtoD = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyAtoHAsync = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyDtoA = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyFromArrayAsync = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyHtoAAsync = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipMemcpyToArrayAsync = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipModuleLaunchKernelExt = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipSetValidDevices = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexObjectCreate = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexObjectDestroy = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceDesc = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceViewDesc = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexObjectGetTextureDesc = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefGetAddressMode = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefGetArray = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefGetBorderColor = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefGetFilterMode = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapFilterMode = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefGetMipmappedArray = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefSetAddressMode = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefSetFilterMode = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapFilterMode = ROCPROFILER_HIP_API_ID_NONE, - ROCPROFILER_HIP_API_ID_hipUnbindTexture = ROCPROFILER_HIP_API_ID_NONE, } rocprofiler_hip_api_id_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/compiler_api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/compiler_api_args.h new file mode 100644 index 0000000000..8f583100ba --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/compiler_api_args.h @@ -0,0 +1,112 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include +#include + +typedef union rocprofiler_hip_compiler_api_retval_u +{ + hipError_t hipError_t_retval; + void** voidpp_retval; +} rocprofiler_hip_compiler_api_retval_t; + +typedef union rocprofiler_hip_compiler_api_args_u +{ + struct + { + dim3* gridDim; + dim3* blockDim; + size_t* sharedMem; + hipStream_t* stream; + } __hipPopCallConfiguration; + struct + { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } __hipPushCallConfiguration; + struct + { + const void* data; + } __hipRegisterFatBinary; + struct + { + void** modules; + const void* hostFunction; + char* deviceFunction; + const char* deviceName; + unsigned int threadLimit; + uint3* tid; + uint3* bid; + dim3* blockDim; + dim3* gridDim; + int* wSize; + } __hipRegisterFunction; + struct + { + void* hipModule; + void** pointer; + void* init_value; + const char* name; + size_t size; + unsigned align; + } __hipRegisterManagedVar; + struct + { + void** modules; + void* var; + char* hostVar; + char* deviceVar; + int type; + int ext; + } __hipRegisterSurface; + struct + { + void** modules; + void* var; + char* hostVar; + char* deviceVar; + int type; + int norm; + int ext; + } __hipRegisterTexture; + struct + { + void** modules; + void* var; + char* hostVar; + char* deviceVar; + int ext; + size_t size; + int constant; + int global; + } __hipRegisterVar; + struct + { + void** modules; + } __hipUnregisterFatBinary; +} rocprofiler_hip_compiler_api_args_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/compiler_api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/compiler_api_id.h new file mode 100644 index 0000000000..e50ea46731 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/compiler_api_id.h @@ -0,0 +1,41 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +/** + * @brief ROCProfiler enumeration of HIP Compiler API tracing operations + */ +typedef enum // NOLINT(performance-enum-size) +{ + ROCPROFILER_HIP_COMPILER_API_ID_NONE = -1, + ROCPROFILER_HIP_COMPILER_API_ID___hipPopCallConfiguration = 0, + ROCPROFILER_HIP_COMPILER_API_ID___hipPushCallConfiguration, + ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterFatBinary, + ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterFunction, + ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterManagedVar, + ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterSurface, + ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterTexture, + ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterVar, + ROCPROFILER_HIP_COMPILER_API_ID___hipUnregisterFatBinary, + ROCPROFILER_HIP_COMPILER_API_ID_LAST, +} rocprofiler_hip_compiler_api_id_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/table_api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/table_api_id.h new file mode 100644 index 0000000000..eb0e07a5e1 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hip/table_api_id.h @@ -0,0 +1,32 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_HIP_API_TABLE_ID_NONE = -1, + ROCPROFILER_HIP_API_TABLE_ID_CompilerApi = 0, + ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, + ROCPROFILER_HIP_API_TABLE_ID_LAST, +} rocprofiler_hip_table_api_id_t; diff --git a/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp b/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp index f3f9e44744..a439d98213 100644 --- a/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp +++ b/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp @@ -127,7 +127,14 @@ struct record_header_buffer auto is_full() const; private: - std::atomic m_locked = {0}; + /// this is an explicit write lock that does not guard against deadlocking like lock() + void write_lock(); + + /// this is an explicit write unlock that does not guard against deadlocking like unlock() + void write_unlock(); + +private: + std::atomic m_locked = {0}; std::atomic m_index = {}; std::shared_mutex m_shared = {}; base_buffer_t m_buffer = {}; @@ -144,14 +151,14 @@ inline void record_header_buffer::lock() { auto n = m_locked.fetch_add(1, std::memory_order_release); - if(n == 0) m_shared.lock(); + if(n == 0) write_lock(); } inline void record_header_buffer::unlock() { - auto n = m_locked.fetch_add(-1, std::memory_order_release); - if(n <= 1) m_shared.unlock(); + auto n = m_locked.fetch_sub(1, std::memory_order_release); + if(n <= 1) write_unlock(); } inline void @@ -166,6 +173,18 @@ record_header_buffer::read_unlock() m_shared.unlock_shared(); } +inline void +record_header_buffer::write_lock() +{ + m_shared.lock(); +} + +inline void +record_header_buffer::write_unlock() +{ + m_shared.unlock(); +} + inline bool record_header_buffer::is_allocated() const { @@ -212,21 +231,22 @@ template bool record_header_buffer::emplace(uint64_t _hash, Tp& _v) { - if(is_locked() || m_headers.empty()) return false; + if(m_headers.empty()) return false; - // request N bytes in the buffer (where N=sizeof(Tp)) and if - // available, copy _v into the buffer region - auto _create_record = [](auto& _buf, auto& _data) { - constexpr auto buffer_sz = sizeof(Tp); - void* _ptr = _buf.request(buffer_sz, false); - if(_ptr) new(_ptr) Tp{_data}; - return _ptr; - }; + constexpr auto request_size = sizeof(Tp); + + // in theory, we shouldn't need to lock here but the thread sanitizer says there is a race. + // the lock will be short-lived so hopefully, it will scale fine + write_lock(); + auto* _addr = m_buffer.request(request_size, false); + write_unlock(); read_lock(); - auto _addr = _create_record(m_buffer, _v); if(_addr) { + // placement new + new(_addr) Tp{_v}; + // if there is space in the buffer, atomically get an index // for where the header record should be placed. // NOTE: m_headers was resized to be large enough to accomodate @@ -245,21 +265,22 @@ template bool record_header_buffer::emplace(uint32_t _category, uint32_t _kind, Tp& _v) { - if(is_locked() || m_headers.empty()) return false; + if(m_headers.empty()) return false; - // request N bytes in the buffer (where N=sizeof(Tp)) and if - // available, copy _v into the buffer region - auto _create_record = [](auto& _buf, auto& _data) { - constexpr auto buffer_sz = sizeof(Tp); - void* _ptr = _buf.request(buffer_sz, false); - if(_ptr) new(_ptr) Tp{_data}; - return _ptr; - }; + constexpr auto request_size = sizeof(Tp); + + // in theory, we shouldn't need to lock here but the thread sanitizer says there is a race. + // the lock will be short-lived so hopefully, it will scale fine + write_lock(); + auto* _addr = m_buffer.request(request_size, false); + write_unlock(); read_lock(); - auto _addr = _create_record(m_buffer, _v); if(_addr) { + // placement new + new(_addr) Tp{_v}; + // if there is space in the buffer, atomically get an index // for where the header record should be placed. // NOTE: m_headers was resized to be large enough to accomodate diff --git a/projects/rocprofiler-sdk/source/lib/common/container/stable_vector.hpp b/projects/rocprofiler-sdk/source/lib/common/container/stable_vector.hpp index 930594e1f9..ca6449b5ad 100644 --- a/projects/rocprofiler-sdk/source/lib/common/container/stable_vector.hpp +++ b/projects/rocprofiler-sdk/source/lib/common/container/stable_vector.hpp @@ -220,7 +220,7 @@ public: void push_back(Tp&& t); template - void emplace_back(Args&&... args); + reference emplace_back(Args&&... args); reference operator[](size_type i); @@ -351,10 +351,10 @@ stable_vector::push_back(Tp&& t) template template -void +typename stable_vector::reference stable_vector::emplace_back(Args&&... args) { - last_chunk().emplace_back(std::forward(args)...); + return last_chunk().emplace_back(std::forward(args)...); } template diff --git a/projects/rocprofiler-sdk/source/lib/common/units.hpp b/projects/rocprofiler-sdk/source/lib/common/units.hpp index 9d376df9b2..99d10898bb 100644 --- a/projects/rocprofiler-sdk/source/lib/common/units.hpp +++ b/projects/rocprofiler-sdk/source/lib/common/units.hpp @@ -187,7 +187,7 @@ get_memory_unit(std::string _unit) using return_type = std::tuple; using inner_t = std::tuple; - if(_unit.length() == 0) return return_type{"MB", units::megabyte}; + if(_unit.empty()) return return_type{"MB", units::megabyte}; for(auto& itr : _unit) itr = tolower(itr); @@ -228,7 +228,7 @@ get_timing_unit(std::string _unit) using return_type = std::tuple; using inner_t = std::tuple; - if(_unit.length() == 0) return return_type{"sec", units::sec}; + if(_unit.empty()) return return_type{"sec", units::sec}; for(auto& itr : _unit) itr = tolower(itr); @@ -264,7 +264,7 @@ get_frequncy_unit(std::string _unit) using return_type = std::tuple; using inner_t = std::tuple; - if(_unit.length() == 0) return return_type{"MHz", units::megahertz}; + if(_unit.empty()) return return_type{"MHz", units::megahertz}; for(auto& itr : _unit) itr = tolower(itr); @@ -295,7 +295,7 @@ get_power_unit(const std::string& _unit) using return_type = std::tuple; using inner_t = std::tuple; - if(_unit.length() == 0) return return_type{"watts", units::watt}; + if(_unit.empty()) return return_type{"watts", units::watt}; auto _lunit = _unit; for(auto& itr : _lunit) diff --git a/projects/rocprofiler-sdk/source/lib/common/utility.hpp b/projects/rocprofiler-sdk/source/lib/common/utility.hpp index 3a67942b56..1ddb66d44a 100644 --- a/projects/rocprofiler-sdk/source/lib/common/utility.hpp +++ b/projects/rocprofiler-sdk/source/lib/common/utility.hpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -210,5 +211,35 @@ private: data_type m_data = {}; functor_type m_destroy_func = {}; }; + +template > +void +yield(std::chrono::duration duration = std::chrono::milliseconds{10}) +{ + std::this_thread::yield(); + std::this_thread::sleep_for(duration); +} + +template > +bool +yield(PredicateT&& predicate, + std::chrono::duration max_yield_time, + std::chrono::duration query_interval = std::chrono::milliseconds{10}) +{ + auto now = []() { return std::chrono::steady_clock::now(); }; + auto start = now(); + auto result = false; + while(!(result = predicate())) + { + yield(query_interval); + if((now() - start) > max_yield_time) + { + break; + } + } + + // return the result of the last predicate query + return result; +} } // namespace common } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/CMakeLists.txt index a239f13014..cb3266835b 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/CMakeLists.txt @@ -26,7 +26,6 @@ set_target_properties( ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/rocprofiler-sdk SOVERSION ${PROJECT_VERSION_MAJOR} VERSION ${PROJECT_VERSION} - SKIP_BUILD_RPATH OFF BUILD_RPATH "\$ORIGIN:\$ORIGIN/.." INSTALL_RPATH "\$ORIGIN:\$ORIGIN/..") diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp index ad007539a6..3e379bcae9 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp @@ -57,17 +57,19 @@ struct config { config(); - bool demangle = get_env("ROCPROF_DEMANGLE_KERNELS", true); - bool truncate = get_env("ROCPROF_TRUNCATE_KERNELS", false); - bool kernel_trace = get_env("ROCPROF_KERNEL_TRACE", false); - bool hsa_api_trace = get_env("ROCPROF_HSA_API_TRACE", false); - bool marker_api_trace = get_env("ROCPROF_MARKER_API_TRACE", false); - bool memory_copy_trace = get_env("ROCPROF_MEMORY_COPY_TRACE", false); - bool counter_collection = get_env("ROCPROF_COUNTER_COLLECTION", false); - int mpi_size = get_mpi_size(); - int mpi_rank = get_mpi_rank(); - std::string output_path = get_env("ROCPROF_OUTPUT_PATH", fs::current_path().string()); - std::string output_file = get_env("ROCPROF_OUTPUT_FILE_NAME", std::to_string(getpid())); + bool demangle = get_env("ROCPROF_DEMANGLE_KERNELS", true); + bool truncate = get_env("ROCPROF_TRUNCATE_KERNELS", false); + bool kernel_trace = get_env("ROCPROF_KERNEL_TRACE", false); + bool hsa_api_trace = get_env("ROCPROF_HSA_API_TRACE", false); + bool marker_api_trace = get_env("ROCPROF_MARKER_API_TRACE", false); + bool memory_copy_trace = get_env("ROCPROF_MEMORY_COPY_TRACE", false); + bool counter_collection = get_env("ROCPROF_COUNTER_COLLECTION", false); + bool hip_api_trace = get_env("ROCPROF_HIP_API_TRACE", false); + bool hip_compiler_api_trace = get_env("ROCPROF_HIP_COMPILER_API_TRACE", false); + int mpi_size = get_mpi_size(); + int mpi_rank = get_mpi_rank(); + std::string output_path = get_env("ROCPROF_OUTPUT_PATH", fs::current_path().string()); + std::string output_file = get_env("ROCPROF_OUTPUT_FILE_NAME", std::to_string(getpid())); std::vector kernel_names = {}; std::set counters = {}; }; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/csv.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/csv.hpp index c2a79da342..16f063e9a0 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/csv.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/csv.hpp @@ -74,10 +74,11 @@ struct csv_encoder } }; -using hsa_csv_encoder = csv_encoder<7>; +using api_csv_encoder = csv_encoder<7>; using kernel_trace_csv_encoder = csv_encoder<16>; using counter_collection_csv_encoder = csv_encoder<14>; using memory_copy_csv_encoder = csv_encoder<7>; +using marker_csv_encoder = csv_encoder<7>; } // namespace csv } // namespace tool } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.cpp index 1c7c5c9089..e671f10e49 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.cpp @@ -230,6 +230,8 @@ get_buffer_id_names() { static auto supported = std::unordered_set{ ROCPROFILER_BUFFER_TRACING_HSA_API, + ROCPROFILER_BUFFER_TRACING_HIP_API, + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, ROCPROFILER_BUFFER_TRACING_MARKER_API}; @@ -281,3 +283,61 @@ get_buffer_id_names() return cb_name_info; } + +rocprofiler_tool_callback_name_info_t +get_callback_id_names() +{ + static auto supported = std::unordered_set{ + ROCPROFILER_CALLBACK_TRACING_HSA_API, + ROCPROFILER_CALLBACK_TRACING_HIP_API, + ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API, + ROCPROFILER_CALLBACK_TRACING_MARKER_API}; + + auto cb_name_info = rocprofiler_tool_callback_name_info_t{}; + // + // callback for each kind operation + // + static auto tracing_kind_operation_cb = + [](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) { + auto* name_info_v = static_cast(data_v); + + if(supported.count(kindv) > 0) + { + const char* name = nullptr; + ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name( + kindv, operation, &name, nullptr), + "query callback failed"); + if(name) name_info_v->operation_names[kindv][operation] = name; + } + + return 0; + }; + + // + // callback for each kind (i.e. domain) + // + static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) { + // store the callback kind name + auto* name_info_v = static_cast(data); + const char* name = nullptr; + ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr), + "query callback failed"); + + if(name) name_info_v->kind_names[kind] = name; + + if(supported.count(kind) > 0) + { + ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations( + kind, tracing_kind_operation_cb, static_cast(data)), + "query callback failed"); + } + + return 0; + }; + + ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb, + static_cast(&cb_name_info)), + "iterate_callback failed"); + + return cb_name_info; +} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp index 8011123973..813bdf1468 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/helper.hpp @@ -122,6 +122,18 @@ struct rocprofiler_tool_buffer_name_info_t rocprofiler_tool_buffer_kind_operation_names_t operation_names = {}; }; +using rocprofiler_tool_callback_kind_names_t = + std::unordered_map; +using rocprofiler_tool_callback_kind_operation_names_t = + std::unordered_map>; + +struct rocprofiler_tool_callback_name_info_t +{ + rocprofiler_tool_callback_kind_names_t kind_names = {}; + rocprofiler_tool_callback_kind_operation_names_t operation_names = {}; +}; + // std::vector // GetCounterNames(); @@ -147,3 +159,6 @@ populate_kernel_properties_data(rocprofiler_tool_kernel_properties_t* kernel_pro rocprofiler_tool_buffer_name_info_t get_buffer_id_names(); + +rocprofiler_tool_callback_name_info_t +get_callback_id_names(); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp index 9dfaa7a9bc..9139b30ba8 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -31,6 +31,7 @@ #include "lib/common/logging.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" +#include "rocprofiler-sdk/marker/api_id.h" #include #include @@ -61,7 +62,22 @@ auto& get_hsa_api_file() { static auto _v = tool::output_file{"hsa_api_trace", - tool::csv::hsa_csv_encoder{}, + tool::csv::api_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + return _v; +} + +auto& +get_hip_api_file() +{ + static auto _v = tool::output_file{"hip_api_trace", + tool::csv::api_csv_encoder{}, {"Domain", "Function", "Process_Id", @@ -133,32 +149,50 @@ get_memory_copy_trace_file() return _v; } -rocprofiler_buffer_id_t& -get_hsa_api_trace_buffer() +struct marker_entry { - static rocprofiler_buffer_id_t hsa_api_buf = {}; - return hsa_api_buf; + uint64_t cid = 0; + pid_t pid = getpid(); + pid_t tid = rocprofiler::common::get_tid(); + rocprofiler_user_data_t data = {}; + std::string message = {}; +}; + +auto& +get_marker_api_file() +{ + static auto _v = tool::output_file{"marker_api_trace", + tool::csv::marker_csv_encoder{}, + {"Domain", + "Function", + "Process_Id", + "Thread_Id", + "Correlation_Id", + "Start_Timestamp", + "End_Timestamp"}}; + return _v; } -rocprofiler_buffer_id_t& -get_kernel_trace_buffer() +struct buffer_ids { - static rocprofiler_buffer_id_t kernel_trace_buf = {}; - return kernel_trace_buf; -} + rocprofiler_buffer_id_t hsa_api_trace = {}; + rocprofiler_buffer_id_t hip_api_trace = {}; + rocprofiler_buffer_id_t kernel_trace = {}; + rocprofiler_buffer_id_t memory_copy_trace = {}; + rocprofiler_buffer_id_t counter_collection = {}; -rocprofiler_buffer_id_t& -get_counter_collection_buffer() -{ - static rocprofiler_buffer_id_t counter_collection_buf = {}; - return counter_collection_buf; -} + auto as_array() const + { + return std::array{ + hsa_api_trace, hip_api_trace, kernel_trace, memory_copy_trace, counter_collection}; + } +}; -rocprofiler_buffer_id_t& -get_memory_copy_trace_buffer() +buffer_ids& +get_buffers() { - static rocprofiler_buffer_id_t memory_copy_buf = {}; - return memory_copy_buf; + static auto _v = buffer_ids{}; + return _v; } using rocprofiler_kernel_symbol_data_t = @@ -182,7 +216,8 @@ struct kernel_symbol_data : rocprofiler_kernel_symbol_data_t using kernel_symbol_data_map_t = std::unordered_map; auto kernel_data = common::Synchronized{}; -auto name_info = get_buffer_id_names(); +auto buffered_name_info = get_buffer_id_names(); +auto callback_name_info = get_callback_id_names(); auto& get_client_ctx() @@ -194,21 +229,182 @@ get_client_ctx() void flush() { - for(auto itr : {get_memory_copy_trace_buffer(), - get_kernel_trace_buffer(), - get_counter_collection_buffer(), - get_hsa_api_trace_buffer()}) + for(auto itr : get_buffers().as_array()) { if(itr.handle > 0) ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush"); } } void -rocprofiler_tracing_callback(rocprofiler_callback_tracing_record_t record, - rocprofiler_user_data_t* user_data, - void* data) +cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* cb_data) { - throw std::runtime_error{"not implemented"}; + auto* ctx = static_cast(cb_data); + + if(ctx && record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER && + record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerPause) + { + ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing context"); + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && + record.operation == ROCPROFILER_MARKER_API_ID_roctxProfilerResume) + { + ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming context"); + } + + auto ts = rocprofiler_timestamp_t{}; + rocprofiler_get_timestamp(&ts); + + const auto* kind_name = callback_name_info.kind_names.at(record.kind); + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + user_data->value = ts; + } + else + { + const auto* op_name = + callback_name_info.operation_names.at(record.kind).at(record.operation); + auto ss = std::stringstream{}; + tool::csv::marker_csv_encoder::write_row(ss, + kind_name, + op_name, + getpid(), + rocprofiler::common::get_tid(), + record.correlation_id.internal, + user_data->value, + ts); + get_marker_api_file() << ss.str(); + } + } +} + +void +callback_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* data) +{ + static thread_local auto stacked_range = std::vector{}; + static auto global_range = + common::Synchronized>{}; + + if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API) + { + auto* marker_data = + static_cast(record.payload); + + auto ts = rocprofiler_timestamp_t{}; + rocprofiler_get_timestamp(&ts); + + const auto* kind_name = callback_name_info.kind_names.at(record.kind); + if(record.operation == ROCPROFILER_MARKER_API_ID_roctxMarkA) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + { + auto ss = std::stringstream{}; + tool::csv::marker_csv_encoder::write_row(ss, + kind_name, + marker_data->args.roctxMarkA.message, + getpid(), + rocprofiler::common::get_tid(), + record.correlation_id.internal, + ts, + ts); + get_marker_api_file() << ss.str(); + } + } + else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangePushA) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + { + if(marker_data->args.roctxRangePushA.message) + { + auto& val = stacked_range.emplace_back(); + val.message = marker_data->args.roctxRangePushA.message; + val.data.value = ts; + val.cid = record.correlation_id.internal; + } + } + } + else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangePop) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + LOG_IF(FATAL, stacked_range.empty()) + << "roctxRangePop invoked more times than roctxRangePush on thread " + << rocprofiler::common::get_tid(); + + auto val = stacked_range.back(); + stacked_range.pop_back(); + + auto ss = std::stringstream{}; + tool::csv::marker_csv_encoder::write_row( + ss, kind_name, val.message, val.pid, val.tid, val.cid, val.data.value, ts); + get_marker_api_file() << ss.str(); + } + } + else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangeStartA) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && + marker_data->args.roctxRangeStartA.message) + { + auto _id = marker_data->retval.uint64_t_retval; + auto _entry = marker_entry{}; + _entry.cid = record.correlation_id.internal; + _entry.data.value = ts; + _entry.message = marker_data->args.roctxRangeStartA.message; + + global_range.wlock( + [_id, &_entry](auto& map) { map.emplace(_id, std::move(_entry)); }); + } + } + else if(record.operation == ROCPROFILER_MARKER_API_ID_roctxRangeStop) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + auto _id = marker_data->args.roctxRangeStop.id; + auto&& _entry = global_range.rlock( + [](const auto& map, auto _key) { return map.at(_key); }, _id); + + auto ss = std::stringstream{}; + tool::csv::marker_csv_encoder::write_row(ss, + kind_name, + _entry.message, + _entry.pid, + 0, + _entry.cid, + _entry.data.value, + ts); + get_marker_api_file() << ss.str(); + + global_range.wlock([](auto& map, auto _key) { return map.erase(_key); }, _id); + } + } + else + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + user_data->value = ts; + } + else + { + const auto* op_name = + callback_name_info.operation_names.at(record.kind).at(record.operation); + auto ss = std::stringstream{}; + tool::csv::marker_csv_encoder::write_row(ss, + kind_name, + op_name, + getpid(), + rocprofiler::common::get_tid(), + record.correlation_id.internal, + user_data->value, + ts); + get_marker_api_file() << ss.str(); + } + } + } (void) record; (void) user_data; @@ -248,12 +444,12 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record, } void -buffered_callback(rocprofiler_context_id_t /*context*/, - rocprofiler_buffer_id_t /*buffer_id*/, - rocprofiler_record_header_t** headers, - size_t num_headers, - void* /*user_data*/, - uint64_t /*drop_count*/) +buffered_tracing_callback(rocprofiler_context_id_t /*context*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* /*user_data*/, + uint64_t /*drop_count*/) { static auto _sync = std::mutex{}; auto _lk = std::lock_guard{_sync}; @@ -279,14 +475,15 @@ buffered_callback(rocprofiler_context_id_t /*context*/, std::string kernel_name = kernel_data.rlock( [](const kernel_symbol_data_map_t& kdata, rocprofiler_buffer_tracing_kernel_dispatch_record_t* record_v) { - return kdata.at(record_v->kernel_id).formatted_kernel_name; + auto _name = kdata.at(record_v->kernel_id).formatted_kernel_name; + return _name; }, record); auto kernel_trace_ss = std::stringstream{}; tool::csv::kernel_trace_csv_encoder::write_row( kernel_trace_ss, - name_info.kind_names.at(record->kind), + buffered_name_info.kind_names.at(record->kind), record->agent_id.handle, record->queue_id.handle, record->kernel_id, @@ -311,10 +508,10 @@ buffered_callback(rocprofiler_context_id_t /*context*/, static_cast(header->payload); auto hsa_trace_ss = std::stringstream{}; - tool::csv::hsa_csv_encoder::write_row( + tool::csv::api_csv_encoder::write_row( hsa_trace_ss, - name_info.kind_names.at(record->kind), - name_info.operation_names.at(record->kind).at(record->operation), + buffered_name_info.kind_names.at(record->kind), + buffered_name_info.operation_names.at(record->kind).at(record->operation), getpid(), record->thread_id, record->correlation_id.internal, @@ -331,8 +528,8 @@ buffered_callback(rocprofiler_context_id_t /*context*/, auto memory_copy_trace_ss = std::stringstream{}; tool::csv::memory_copy_csv_encoder::write_row( memory_copy_trace_ss, - name_info.kind_names.at(record->kind), - name_info.operation_names.at(record->kind).at(record->operation), + buffered_name_info.kind_names.at(record->kind), + buffered_name_info.operation_names.at(record->kind).at(record->operation), record->src_agent_id.handle, record->dst_agent_id.handle, record->correlation_id.internal, @@ -341,6 +538,25 @@ buffered_callback(rocprofiler_context_id_t /*context*/, get_memory_copy_trace_file() << memory_copy_trace_ss.str(); } + else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_API || + header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API) + { + auto* record = + static_cast(header->payload); + + auto hip_trace_ss = std::stringstream{}; + tool::csv::api_csv_encoder::write_row( + hip_trace_ss, + buffered_name_info.kind_names.at(record->kind), + buffered_name_info.operation_names.at(record->kind).at(record->operation), + getpid(), + record->thread_id, + record->correlation_id.internal, + record->start_timestamp, + record->end_timestamp); + + get_hip_api_file() << hip_trace_ss.str(); + } else { LOG(FATAL) << fmt::format( @@ -520,14 +736,60 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed"); + auto code_obj_ctx = rocprofiler_context_id_t{}; + ROCPROFILER_CALL(rocprofiler_create_context(&code_obj_ctx), "failed to create context"); + ROCPROFILER_CALL( - rocprofiler_configure_callback_tracing_service(get_client_ctx(), + rocprofiler_configure_callback_tracing_service(code_obj_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0, code_object_tracing_callback, nullptr), "code object tracing configure failed"); + ROCPROFILER_CALL(rocprofiler_start_context(code_obj_ctx), "start context failed"); + + if(tool::get_config().marker_api_trace) + { + auto operations = std::vector{}; + rocprofiler_iterate_callback_tracing_kind_operations( + ROCPROFILER_CALLBACK_TRACING_MARKER_API, + [](rocprofiler_callback_tracing_kind_t, uint32_t operation, void* data) { + auto* _ops = static_cast*>(data); + if(operation != ROCPROFILER_MARKER_API_ID_roctxProfilerPause && + operation != ROCPROFILER_MARKER_API_ID_roctxProfilerResume) + _ops->emplace_back(operation); + return 0; + }, + &operations); + + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(get_client_ctx(), + ROCPROFILER_CALLBACK_TRACING_MARKER_API, + operations.data(), + operations.size(), + callback_tracing_callback, + nullptr), + "callback tracing service failed to configure"); + + auto pause_resume_ctx = rocprofiler_context_id_t{}; + ROCPROFILER_CALL(rocprofiler_create_context(&pause_resume_ctx), "failed to create context"); + + auto pause_resume_ops = + std::array{ROCPROFILER_MARKER_API_ID_roctxProfilerPause, + ROCPROFILER_MARKER_API_ID_roctxProfilerResume}; + + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(pause_resume_ctx, + ROCPROFILER_CALLBACK_TRACING_MARKER_API, + pause_resume_ops.data(), + pause_resume_ops.size(), + cntrl_tracing_callback, + static_cast(&get_client_ctx())), + "callback tracing service failed to configure"); + + ROCPROFILER_CALL(rocprofiler_start_context(pause_resume_ctx), "start context failed"); + } if(tool::get_config().kernel_trace) { @@ -535,9 +797,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) 4096, 2048, ROCPROFILER_BUFFER_POLICY_LOSSLESS, - buffered_callback, + buffered_tracing_callback, tool_data, - &get_kernel_trace_buffer()), + &get_buffers().kernel_trace), "buffer creation"); ROCPROFILER_CALL( @@ -545,7 +807,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, - get_kernel_trace_buffer()), + get_buffers().kernel_trace), "buffer tracing service for kernel dispatch configure"); } @@ -555,9 +817,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) 4096, 2048, ROCPROFILER_BUFFER_POLICY_LOSSLESS, - buffered_callback, + buffered_tracing_callback, nullptr, - &get_memory_copy_trace_buffer()), + &get_buffers().memory_copy_trace), "create memory copy buffer"); ROCPROFILER_CALL( @@ -565,7 +827,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0, - get_memory_copy_trace_buffer()), + get_buffers().memory_copy_trace), "buffer tracing service for memory copy configure"); } @@ -575,9 +837,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) 4096, 2048, ROCPROFILER_BUFFER_POLICY_LOSSLESS, - buffered_callback, + buffered_tracing_callback, tool_data, - &get_hsa_api_trace_buffer()), + &get_buffers().hsa_api_trace), "buffer creation"); ROCPROFILER_CALL( @@ -585,8 +847,42 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_BUFFER_TRACING_HSA_API, nullptr, 0, - get_hsa_api_trace_buffer()), - "buffer tracing service for memory copy configure"); + get_buffers().hsa_api_trace), + "buffer tracing service for hsa api configure"); + } + + if(tool::get_config().hip_api_trace || tool::get_config().hip_compiler_api_trace) + { + ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), + 4096, + 2048, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + buffered_tracing_callback, + tool_data, + &get_buffers().hip_api_trace), + "buffer creation"); + + if(tool::get_config().hip_api_trace) + { + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service(get_client_ctx(), + ROCPROFILER_BUFFER_TRACING_HIP_API, + nullptr, + 0, + get_buffers().hip_api_trace), + "buffer tracing service for hip api configure"); + } + + if(tool::get_config().hip_compiler_api_trace) + { + ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + get_client_ctx(), + ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API, + nullptr, + 0, + get_buffers().hip_api_trace), + "buffer tracing service for hip compiler api configure"); + } } if(tool::get_config().counter_collection) @@ -595,14 +891,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) 4096, 2048, ROCPROFILER_BUFFER_POLICY_LOSSLESS, - buffered_callback, + buffered_tracing_callback, nullptr, - &get_counter_collection_buffer()), + &get_buffers().counter_collection), "buffer creation failed"); ROCPROFILER_CALL( rocprofiler_configure_buffered_dispatch_profile_counting_service( - get_client_ctx(), get_counter_collection_buffer(), dispatch_callback, nullptr), + get_client_ctx(), get_buffers().counter_collection, dispatch_callback, nullptr), "Could not setup buffered service"); } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt index 17b9d25d7a..a6f6003dac 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/CMakeLists.txt @@ -36,6 +36,7 @@ target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_SOURCES} ${ROCPROFILER_LIB_HEADERS}) add_subdirectory(hsa) +add_subdirectory(hip) add_subdirectory(context) add_subdirectory(counters) add_subdirectory(aql) @@ -88,7 +89,6 @@ set_target_properties( PROPERTIES OUTPUT_NAME rocprofiler-sdk SOVERSION ${PROJECT_VERSION_MAJOR} VERSION ${PROJECT_VERSION} - SKIP_BUILD_RPATH OFF BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" DEFINE_SYMBOL rocprofiler_EXPORTS) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp index 88da2571d3..5d9348f643 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -21,10 +21,12 @@ // SOFTWARE. #include +#include #include #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/context/domain.hpp" +#include "lib/rocprofiler-sdk/hip/hip.hpp" #include "lib/rocprofiler-sdk/hsa/async_copy.hpp" #include "lib/rocprofiler-sdk/hsa/hsa.hpp" #include "lib/rocprofiler-sdk/marker/marker.hpp" @@ -62,6 +64,7 @@ struct buffer_tracing_kind_string; ROCPROFILER_BUFFER_TRACING_KIND_STRING(NONE) ROCPROFILER_BUFFER_TRACING_KIND_STRING(HSA_API) ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_API) +ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_COMPILER_API) ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_API) ROCPROFILER_BUFFER_TRACING_KIND_STRING(MEMORY_COPY) ROCPROFILER_BUFFER_TRACING_KIND_STRING(KERNEL_DISPATCH) @@ -154,6 +157,10 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_ val = rocprofiler::hsa::async_copy::name_by_id(operation); else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_API) val = rocprofiler::marker::name_by_id(operation); + else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_API) + val = rocprofiler::hip::name_by_id(operation); + else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API) + val = rocprofiler::hip::name_by_id(operation); else return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; @@ -196,6 +203,10 @@ rocprofiler_iterate_buffer_tracing_kind_operations( ops = rocprofiler::hsa::async_copy::get_ids(); else if(kind == ROCPROFILER_BUFFER_TRACING_MARKER_API) ops = rocprofiler::marker::get_ids(); + else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_API) + ops = rocprofiler::hip::get_ids(); + else if(kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API) + ops = rocprofiler::hip::get_ids(); else return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp index d505dd8537..dc979a338e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -21,10 +21,12 @@ // SOFTWARE. #include +#include #include #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/context/domain.hpp" +#include "lib/rocprofiler-sdk/hip/hip.hpp" #include "lib/rocprofiler-sdk/hsa/hsa.hpp" #include "lib/rocprofiler-sdk/marker/marker.hpp" #include "lib/rocprofiler-sdk/registration.hpp" @@ -60,6 +62,7 @@ struct callback_tracing_kind_string; ROCPROFILER_CALLBACK_TRACING_KIND_STRING(NONE) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HSA_API) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_API) +ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_COMPILER_API) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_API) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(CODE_OBJECT) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(KERNEL_DISPATCH) @@ -140,6 +143,10 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac val = rocprofiler::hsa::name_by_id(operation); else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API) val = rocprofiler::marker::name_by_id(operation); + else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_API) + val = rocprofiler::hip::name_by_id(operation); + else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) + val = rocprofiler::hip::name_by_id(operation); else return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; @@ -181,6 +188,10 @@ rocprofiler_iterate_callback_tracing_kind_operations( ops = rocprofiler::hsa::get_ids(); else if(kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API) ops = rocprofiler::marker::get_ids(); + else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_API) + ops = rocprofiler::hip::get_ids(); + else if(kind == ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) + ops = rocprofiler::hip::get_ids(); else return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; @@ -216,6 +227,15 @@ rocprofiler_iterate_callback_tracing_kind_operation_args( user_data); return ROCPROFILER_STATUS_SUCCESS; } + else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HIP_API) + { + rocprofiler::hip::iterate_args( + record.operation, + *static_cast(record.payload), + callback, + user_data); + return ROCPROFILER_STATUS_SUCCESS; + } return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp index 415c8d2c72..628cb61dcf 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp @@ -113,7 +113,7 @@ get_active_contexts_impl() auto*& get_correlation_id_map() { - using data_type = std::vector>; + using data_type = common::container::stable_vector>; static auto*& _v = common::static_object>::construct(); return _v; } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp index 00a32050a3..bd234fdf66 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp @@ -143,7 +143,7 @@ rocprofiler_iterate_agent_supported_counters(rocprofiler_agent_t rocprofiler_available_counters_cb_t cb, void* user_data) { - const auto& metrics = rocprofiler::counters::getMetricsForAgent(std::string(agent.name)); + auto metrics = rocprofiler::counters::getMetricsForAgent(agent.name); std::vector ids; ids.reserve(metrics.size()); for(const auto& metric : metrics) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.cpp index a0c83751c9..680a3d53b3 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.cpp @@ -218,17 +218,16 @@ getMetricMap() return map; } -const std::vector& +std::vector getMetricsForAgent(const std::string& agent) { - static const std::vector empty; - const auto& map = *CHECK_NOTNULL(getMetricMap()); + const auto& map = *CHECK_NOTNULL(getMetricMap()); if(const auto* metric_ptr = rocprofiler::common::get_val(map, agent)) { return *metric_ptr; } - return empty; + return std::vector{}; } bool diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.hpp index e0ac0b82b8..7b034a2d82 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/metrics.hpp @@ -107,7 +107,7 @@ getMetricMap(); * Get the metrics that apply to a specific agent. Supplied parameter * is the GFXIP of the agent. */ -const std::vector& +std::vector getMetricsForAgent(const std::string&); /** diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/CMakeLists.txt new file mode 100644 index 0000000000..f835c24346 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/CMakeLists.txt @@ -0,0 +1,7 @@ +set(ROCPROFILER_LIB_HIP_SOURCES hip.cpp) +set(ROCPROFILER_LIB_HIP_HEADERS defines.hpp hip.hpp types.hpp utils.hpp) + +target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HIP_SOURCES} + ${ROCPROFILER_LIB_HIP_HEADERS}) + +add_subdirectory(details) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/defines.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/defines.hpp new file mode 100644 index 0000000000..6201b93850 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/defines.hpp @@ -0,0 +1,285 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#define IMPL_DETAIL_EXPAND(X) X +#define IMPL_DETAIL_FOR_EACH_NARG(...) \ + IMPL_DETAIL_FOR_EACH_NARG_(__VA_ARGS__, IMPL_DETAIL_FOR_EACH_RSEQ_N()) +#define IMPL_DETAIL_FOR_EACH_NARG_(...) IMPL_DETAIL_EXPAND(IMPL_DETAIL_FOR_EACH_ARG_N(__VA_ARGS__)) +#define IMPL_DETAIL_FOR_EACH_ARG_N( \ + _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, N, ...) \ + N +#define IMPL_DETAIL_FOR_EACH_RSEQ_N() 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +#define IMPL_DETAIL_CONCATENATE(X, Y) X##Y +#define IMPL_DETAIL_FOR_EACH_(N, MACRO, PREFIX, ...) \ + IMPL_DETAIL_EXPAND(IMPL_DETAIL_CONCATENATE(MACRO, N)(PREFIX, __VA_ARGS__)) +#define IMPL_DETAIL_FOR_EACH(MACRO, PREFIX, ...) \ + IMPL_DETAIL_FOR_EACH_(IMPL_DETAIL_FOR_EACH_NARG(__VA_ARGS__), MACRO, PREFIX, __VA_ARGS__) + +#define ADDR_MEMBER_0(...) +#define ADDR_MEMBER_1(PREFIX, FIELD) static_cast(&PREFIX.FIELD) +#define ADDR_MEMBER_2(PREFIX, A, B) ADDR_MEMBER_1(PREFIX, A), ADDR_MEMBER_1(PREFIX, B) +#define ADDR_MEMBER_3(PREFIX, A, B, C) ADDR_MEMBER_2(PREFIX, A, B), ADDR_MEMBER_1(PREFIX, C) +#define ADDR_MEMBER_4(PREFIX, A, B, C, D) ADDR_MEMBER_3(PREFIX, A, B, C), ADDR_MEMBER_1(PREFIX, D) +#define ADDR_MEMBER_5(PREFIX, A, B, C, D, E) \ + ADDR_MEMBER_4(PREFIX, A, B, C, D), ADDR_MEMBER_1(PREFIX, E) +#define ADDR_MEMBER_6(PREFIX, A, B, C, D, E, F) \ + ADDR_MEMBER_5(PREFIX, A, B, C, D, E), ADDR_MEMBER_1(PREFIX, F) +#define ADDR_MEMBER_7(PREFIX, A, B, C, D, E, F, G) \ + ADDR_MEMBER_6(PREFIX, A, B, C, D, E, F), ADDR_MEMBER_1(PREFIX, G) +#define ADDR_MEMBER_8(PREFIX, A, B, C, D, E, F, G, H) \ + ADDR_MEMBER_7(PREFIX, A, B, C, D, E, F, G), ADDR_MEMBER_1(PREFIX, H) +#define ADDR_MEMBER_9(PREFIX, A, B, C, D, E, F, G, H, I) \ + ADDR_MEMBER_8(PREFIX, A, B, C, D, E, F, G, H), ADDR_MEMBER_1(PREFIX, I) +#define ADDR_MEMBER_10(PREFIX, A, B, C, D, E, F, G, H, I, J) \ + ADDR_MEMBER_9(PREFIX, A, B, C, D, E, F, G, H, I), ADDR_MEMBER_1(PREFIX, J) +#define ADDR_MEMBER_11(PREFIX, A, B, C, D, E, F, G, H, I, J, K) \ + ADDR_MEMBER_10(PREFIX, A, B, C, D, E, F, G, H, I, J), ADDR_MEMBER_1(PREFIX, K) +#define ADDR_MEMBER_12(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L) \ + ADDR_MEMBER_11(PREFIX, A, B, C, D, E, F, G, H, I, J, K), ADDR_MEMBER_1(PREFIX, L) +#define ADDR_MEMBER_13(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M) \ + ADDR_MEMBER_12(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L), ADDR_MEMBER_1(PREFIX, M) +#define ADDR_MEMBER_14(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ + ADDR_MEMBER_13(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M), ADDR_MEMBER_1(PREFIX, N) +#define ADDR_MEMBER_15(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ + ADDR_MEMBER_14(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M, N), ADDR_MEMBER_1(PREFIX, O) + +#define NAMED_MEMBER_0(...) +#define NAMED_MEMBER_1(PREFIX, FIELD) std::make_pair(#FIELD, PREFIX.FIELD) +#define NAMED_MEMBER_2(PREFIX, A, B) NAMED_MEMBER_1(PREFIX, A), NAMED_MEMBER_1(PREFIX, B) +#define NAMED_MEMBER_3(PREFIX, A, B, C) NAMED_MEMBER_2(PREFIX, A, B), NAMED_MEMBER_1(PREFIX, C) +#define NAMED_MEMBER_4(PREFIX, A, B, C, D) \ + NAMED_MEMBER_3(PREFIX, A, B, C), NAMED_MEMBER_1(PREFIX, D) +#define NAMED_MEMBER_5(PREFIX, A, B, C, D, E) \ + NAMED_MEMBER_4(PREFIX, A, B, C, D), NAMED_MEMBER_1(PREFIX, E) +#define NAMED_MEMBER_6(PREFIX, A, B, C, D, E, F) \ + NAMED_MEMBER_5(PREFIX, A, B, C, D, E), NAMED_MEMBER_1(PREFIX, F) +#define NAMED_MEMBER_7(PREFIX, A, B, C, D, E, F, G) \ + NAMED_MEMBER_6(PREFIX, A, B, C, D, E, F), NAMED_MEMBER_1(PREFIX, G) +#define NAMED_MEMBER_8(PREFIX, A, B, C, D, E, F, G, H) \ + NAMED_MEMBER_7(PREFIX, A, B, C, D, E, F, G), NAMED_MEMBER_1(PREFIX, H) +#define NAMED_MEMBER_9(PREFIX, A, B, C, D, E, F, G, H, I) \ + NAMED_MEMBER_8(PREFIX, A, B, C, D, E, F, G, H), NAMED_MEMBER_1(PREFIX, I) +#define NAMED_MEMBER_10(PREFIX, A, B, C, D, E, F, G, H, I, J) \ + NAMED_MEMBER_9(PREFIX, A, B, C, D, E, F, G, H, I), NAMED_MEMBER_1(PREFIX, J) +#define NAMED_MEMBER_11(PREFIX, A, B, C, D, E, F, G, H, I, J, K) \ + NAMED_MEMBER_10(PREFIX, A, B, C, D, E, F, G, H, I, J), NAMED_MEMBER_1(PREFIX, K) +#define NAMED_MEMBER_12(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L) \ + NAMED_MEMBER_11(PREFIX, A, B, C, D, E, F, G, H, I, J, K), NAMED_MEMBER_1(PREFIX, L) +#define NAMED_MEMBER_13(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M) \ + NAMED_MEMBER_12(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L), NAMED_MEMBER_1(PREFIX, M) +#define NAMED_MEMBER_14(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ + NAMED_MEMBER_13(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M), NAMED_MEMBER_1(PREFIX, N) +#define NAMED_MEMBER_15(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ + NAMED_MEMBER_14(PREFIX, A, B, C, D, E, F, G, H, I, J, K, L, M, N), NAMED_MEMBER_1(PREFIX, O) + +#define GET_ADDR_MEMBER_FIELDS(VAR, ...) IMPL_DETAIL_FOR_EACH(ADDR_MEMBER_, VAR, __VA_ARGS__) +#define GET_NAMED_MEMBER_FIELDS(VAR, ...) IMPL_DETAIL_FOR_EACH(NAMED_MEMBER_, VAR, __VA_ARGS__) + +#define HIP_API_INFO_DEFINITION_0(HIP_TABLE, HIP_API_ID, HIP_FUNC, HIP_FUNC_PTR) \ + namespace rocprofiler \ + { \ + namespace hip \ + { \ + template <> \ + struct hip_api_info : hip_domain_info \ + { \ + static constexpr auto table_idx = HIP_TABLE; \ + static constexpr auto operation_idx = HIP_API_ID; \ + static constexpr auto name = #HIP_FUNC; \ + \ + using domain_type = hip_domain_info; \ + using this_type = hip_api_info; \ + using base_type = hip_api_impl; \ + \ + using domain_type::callback_domain_idx; \ + using domain_type::buffered_domain_idx; \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto offset() \ + { \ + return offsetof(hip_table_lookup::type, HIP_FUNC_PTR); \ + } \ + \ + static_assert(offsetof(hip_table_lookup::type, HIP_FUNC_PTR) == \ + (sizeof(size_t) + (operation_idx * sizeof(void*))), \ + "ABI error for " #HIP_FUNC); \ + \ + static auto& get_table() { return hip_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return hip_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to HIP table for " #HIP_FUNC " function"); \ + return _table->HIP_FUNC_PTR; \ + } \ + else \ + { \ + return _table.HIP_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_table_func() { return get_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.HIP_FUNC; \ + } \ + \ + template \ + static auto get_functor(RetT (*)(Args...)) \ + { \ + if constexpr(std::is_void::value) \ + return [](Args... args) -> RetT { base_type::functor(args...); }; \ + else \ + return [](Args... args) -> RetT { return base_type::functor(args...); }; \ + } \ + \ + static auto get_functor() { return get_functor(get_table_func()); } \ + \ + static std::vector as_arg_addr(callback_data_type) { return std::vector{}; } \ + \ + static std::vector> as_arg_list(callback_data_type) \ + { \ + return {}; \ + } \ + }; \ + } \ + } + +#define HIP_API_INFO_DEFINITION_V(HIP_TABLE, HIP_API_ID, HIP_FUNC, HIP_FUNC_PTR, ...) \ + namespace rocprofiler \ + { \ + namespace hip \ + { \ + template <> \ + struct hip_api_info : hip_domain_info \ + { \ + static constexpr auto table_idx = HIP_TABLE; \ + static constexpr auto operation_idx = HIP_API_ID; \ + static constexpr auto name = #HIP_FUNC; \ + \ + using domain_type = hip_domain_info; \ + using this_type = hip_api_info; \ + using base_type = hip_api_impl; \ + \ + static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \ + static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \ + \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto offset() \ + { \ + return offsetof(hip_table_lookup::type, HIP_FUNC_PTR); \ + } \ + \ + static_assert(offsetof(hip_table_lookup::type, HIP_FUNC_PTR) == \ + (sizeof(size_t) + (operation_idx * sizeof(void*))), \ + "ABI error for " #HIP_FUNC); \ + \ + static auto& get_table() { return hip_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return hip_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to HIP table for " #HIP_FUNC " function"); \ + return _table->HIP_FUNC_PTR; \ + } \ + else \ + { \ + return _table.HIP_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_table_func() { return get_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.HIP_FUNC; \ + } \ + \ + template \ + static auto get_functor(RetT (*)(Args...)) \ + { \ + if constexpr(std::is_same::value) \ + return [](Args... args) -> RetT { base_type::functor(args...); }; \ + else \ + return [](Args... args) -> RetT { return base_type::functor(args...); }; \ + } \ + \ + static auto get_functor() { return get_functor(get_table_func()); } \ + \ + static std::vector as_arg_addr(callback_data_type trace_data) \ + { \ + return std::vector{ \ + GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \ + } \ + \ + static auto as_arg_list(callback_data_type trace_data) \ + { \ + return utils::stringize( \ + GET_NAMED_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)); \ + } \ + }; \ + } \ + } + +#define HIP_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, TYPE, MEMBER) \ + namespace rocprofiler \ + { \ + namespace hip \ + { \ + template <> \ + struct hip_table_lookup \ + { \ + using type = TYPE; \ + auto& operator()(hip_api_table_t& _v) const { return _v.MEMBER; } \ + auto& operator()(hip_api_table_t* _v) const { return _v->MEMBER; } \ + auto& operator()(type& _v) const { return _v; } \ + auto& operator()(type* _v) const { return *_v; } \ + auto& operator()() const { return (*this)(get_table()); } \ + }; \ + } \ + } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/CMakeLists.txt new file mode 100644 index 0000000000..fdb5498561 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/CMakeLists.txt @@ -0,0 +1,8 @@ +# +# +# +set(ROCPROFILER_LIB_HIP_DETAILS_SOURCES) +set(ROCPROFILER_LIB_HIP_DETAILS_HEADERS ostream.hpp) + +target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_HIP_DETAILS_SOURCES} + ${ROCPROFILER_LIB_HIP_DETAILS_HEADERS}) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/ostream.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/ostream.hpp new file mode 100644 index 0000000000..e98c64270e --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/details/ostream.hpp @@ -0,0 +1,5659 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include +// must be included after runtime api +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace hip +{ +namespace detail +{ +static int HIP_depth_max = 1; +static int HIP_depth_max_cnt = 0; +static std::string_view HIP_structs_regex = {}; + +inline static void +print_escaped_string(std::ostream& out, const char* v, size_t len) +{ + out << '"'; + for(size_t i = 0; i < len && v[i] != '\0'; ++i) + { + switch(v[i]) + { + case '\"': out << "\\\""; break; + case '\\': out << "\\\\"; break; + case '\b': out << "\\\b"; break; + case '\f': out << "\\\f"; break; + case '\n': out << "\\\n"; break; + case '\r': out << "\\\r"; break; + case '\t': out << "\\\t"; break; + default: + if(std::isprint((unsigned char) v[i]) != 0) + std::operator<<(out, v[i]); + else + { + std::ios_base::fmtflags flags(out.flags()); + out << "\\x" << std::setfill('0') << std::setw(2) << std::hex + << (unsigned int) (unsigned char) v[i]; + out.flags(flags); + } + break; + } + } + out << '"'; +} + +template +inline static std::ostream& +operator<<(std::ostream& out, const T& v) +{ + using std:: operator<<; + static thread_local bool recursion = false; + if(recursion == false) + { + recursion = true; + out << v; + recursion = false; + } + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const unsigned char& v) +{ + out << (unsigned int) v; + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char& v) +{ + out << (unsigned char) v; + return out; +} + +template +inline static std::ostream& +operator<<(std::ostream& out, const char (&v)[N]) +{ + print_escaped_string(out, v, N); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char* v) +{ + print_escaped_string(out, v, strlen(v)); + return out; +} +// End of basic ostream ops + +inline static std::ostream& +operator<<(std::ostream& out, const __locale_struct& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("__locale_struct::__names").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "__names="); + ::rocprofiler::hip::detail::operator<<(out, v.__names); + std:: operator<<(out, ", "); + } + if(std::string_view("__locale_struct::__ctype_toupper").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "__ctype_toupper="); + ::rocprofiler::hip::detail::operator<<(out, v.__ctype_toupper); + std:: operator<<(out, ", "); + } + if(std::string_view("__locale_struct::__ctype_tolower").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "__ctype_tolower="); + ::rocprofiler::hip::detail::operator<<(out, v.__ctype_tolower); + std:: operator<<(out, ", "); + } + if(std::string_view("__locale_struct::__ctype_b").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "__ctype_b="); + ::rocprofiler::hip::detail::operator<<(out, v.__ctype_b); + std:: operator<<(out, ", "); + } + if(std::string_view("__locale_struct::__locales").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "__locales="); + ::rocprofiler::hip::detail::operator<<(out, v.__locales); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipDeviceArch_t& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipDeviceArch_t::hasDynamicParallelism").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasDynamicParallelism="); + ::rocprofiler::hip::detail::operator<<(out, v.hasDynamicParallelism); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::has3dGrid").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "has3dGrid="); + ::rocprofiler::hip::detail::operator<<(out, v.has3dGrid); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasSurfaceFuncs").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasSurfaceFuncs="); + ::rocprofiler::hip::detail::operator<<(out, v.hasSurfaceFuncs); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasSyncThreadsExt").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasSyncThreadsExt="); + ::rocprofiler::hip::detail::operator<<(out, v.hasSyncThreadsExt); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasThreadFenceSystem").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasThreadFenceSystem="); + ::rocprofiler::hip::detail::operator<<(out, v.hasThreadFenceSystem); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasFunnelShift").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasFunnelShift="); + ::rocprofiler::hip::detail::operator<<(out, v.hasFunnelShift); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasWarpShuffle").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasWarpShuffle="); + ::rocprofiler::hip::detail::operator<<(out, v.hasWarpShuffle); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasWarpBallot").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasWarpBallot="); + ::rocprofiler::hip::detail::operator<<(out, v.hasWarpBallot); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasWarpVote").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasWarpVote="); + ::rocprofiler::hip::detail::operator<<(out, v.hasWarpVote); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasDoubles").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasDoubles="); + ::rocprofiler::hip::detail::operator<<(out, v.hasDoubles); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasSharedInt64Atomics").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasSharedInt64Atomics="); + ::rocprofiler::hip::detail::operator<<(out, v.hasSharedInt64Atomics); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasGlobalInt64Atomics").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasGlobalInt64Atomics="); + ::rocprofiler::hip::detail::operator<<(out, v.hasGlobalInt64Atomics); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasFloatAtomicAdd").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasFloatAtomicAdd="); + ::rocprofiler::hip::detail::operator<<(out, v.hasFloatAtomicAdd); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasSharedFloatAtomicExch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasSharedFloatAtomicExch="); + ::rocprofiler::hip::detail::operator<<(out, v.hasSharedFloatAtomicExch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasSharedInt32Atomics").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasSharedInt32Atomics="); + ::rocprofiler::hip::detail::operator<<(out, v.hasSharedInt32Atomics); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasGlobalFloatAtomicExch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasGlobalFloatAtomicExch="); + ::rocprofiler::hip::detail::operator<<(out, v.hasGlobalFloatAtomicExch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceArch_t::hasGlobalInt32Atomics").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hasGlobalInt32Atomics="); + ::rocprofiler::hip::detail::operator<<(out, v.hasGlobalInt32Atomics); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipUUID& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipUUID::bytes").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "bytes="); + ::rocprofiler::hip::detail::operator<<(out, v.bytes); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipDeviceProp_tR0600& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipDeviceProp_t::asicRevision").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "asicRevision="); + ::rocprofiler::hip::detail::operator<<(out, v.asicRevision); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::isLargeBar").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "isLargeBar="); + ::rocprofiler::hip::detail::operator<<(out, v.isLargeBar); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem") + .find(HIP_structs_regex) != std::string_view::npos) + { + std::operator<<(out, "cooperativeMultiDeviceUnmatchedSharedMem="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedSharedMem); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceUnmatchedBlockDim="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedBlockDim); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceUnmatchedGridDim="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedGridDim); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceUnmatchedFunc="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedFunc); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::hdpRegFlushCntl").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hdpRegFlushCntl="); + ::rocprofiler::hip::detail::operator<<(out, v.hdpRegFlushCntl); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::hdpMemFlushCntl").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hdpMemFlushCntl="); + ::rocprofiler::hip::detail::operator<<(out, v.hdpMemFlushCntl); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::arch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "arch="); + ::rocprofiler::hip::detail::operator<<(out, v.arch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::clockInstructionRate").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "clockInstructionRate="); + ::rocprofiler::hip::detail::operator<<(out, v.clockInstructionRate); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSharedMemoryPerMultiProcessor") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "maxSharedMemoryPerMultiProcessor="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSharedMemoryPerMultiProcessor); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::gcnArchName").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gcnArchName="); + ::rocprofiler::hip::detail::operator<<(out, v.gcnArchName); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::hipReserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hipReserved="); + ::rocprofiler::hip::detail::operator<<(out, v.hipReserved); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::unifiedFunctionPointers").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "unifiedFunctionPointers="); + ::rocprofiler::hip::detail::operator<<(out, v.unifiedFunctionPointers); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::clusterLaunch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "clusterLaunch="); + ::rocprofiler::hip::detail::operator<<(out, v.clusterLaunch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::ipcEventSupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "ipcEventSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.ipcEventSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::deferredMappingHipArraySupported") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "deferredMappingHipArraySupported="); + ::rocprofiler::hip::detail::operator<<(out, v.deferredMappingHipArraySupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::memoryPoolSupportedHandleTypes") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "memoryPoolSupportedHandleTypes="); + ::rocprofiler::hip::detail::operator<<(out, v.memoryPoolSupportedHandleTypes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::gpuDirectRDMAWritesOrdering") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "gpuDirectRDMAWritesOrdering="); + ::rocprofiler::hip::detail::operator<<(out, v.gpuDirectRDMAWritesOrdering); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::gpuDirectRDMAFlushWritesOptions") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "gpuDirectRDMAFlushWritesOptions="); + ::rocprofiler::hip::detail::operator<<(out, v.gpuDirectRDMAFlushWritesOptions); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::gpuDirectRDMASupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gpuDirectRDMASupported="); + ::rocprofiler::hip::detail::operator<<(out, v.gpuDirectRDMASupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::memoryPoolsSupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memoryPoolsSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.memoryPoolsSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::timelineSemaphoreInteropSupported") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "timelineSemaphoreInteropSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.timelineSemaphoreInteropSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::hostRegisterReadOnlySupported") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "hostRegisterReadOnlySupported="); + ::rocprofiler::hip::detail::operator<<(out, v.hostRegisterReadOnlySupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::sparseHipArraySupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sparseHipArraySupported="); + ::rocprofiler::hip::detail::operator<<(out, v.sparseHipArraySupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::hostRegisterSupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hostRegisterSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.hostRegisterSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::reservedSharedMemPerBlock").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reservedSharedMemPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.reservedSharedMemPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::accessPolicyMaxWindowSize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "accessPolicyMaxWindowSize="); + ::rocprofiler::hip::detail::operator<<(out, v.accessPolicyMaxWindowSize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxBlocksPerMultiProcessor") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "maxBlocksPerMultiProcessor="); + ::rocprofiler::hip::detail::operator<<(out, v.maxBlocksPerMultiProcessor); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::directManagedMemAccessFromHost") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "directManagedMemAccessFromHost="); + ::rocprofiler::hip::detail::operator<<(out, v.directManagedMemAccessFromHost); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::pageableMemoryAccessUsesHostPageTables") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "pageableMemoryAccessUsesHostPageTables="); + ::rocprofiler::hip::detail::operator<<(out, v.pageableMemoryAccessUsesHostPageTables); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::sharedMemPerBlockOptin").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sharedMemPerBlockOptin="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedMemPerBlockOptin); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::cooperativeMultiDeviceLaunch") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceLaunch="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceLaunch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::cooperativeLaunch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "cooperativeLaunch="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeLaunch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::canUseHostPointerForRegisteredMem") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "canUseHostPointerForRegisteredMem="); + ::rocprofiler::hip::detail::operator<<(out, v.canUseHostPointerForRegisteredMem); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::computePreemptionSupported") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "computePreemptionSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.computePreemptionSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::concurrentManagedAccess").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "concurrentManagedAccess="); + ::rocprofiler::hip::detail::operator<<(out, v.concurrentManagedAccess); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::pageableMemoryAccess").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pageableMemoryAccess="); + ::rocprofiler::hip::detail::operator<<(out, v.pageableMemoryAccess); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::singleToDoublePrecisionPerfRatio") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "singleToDoublePrecisionPerfRatio="); + ::rocprofiler::hip::detail::operator<<(out, v.singleToDoublePrecisionPerfRatio); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::hostNativeAtomicSupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hostNativeAtomicSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.hostNativeAtomicSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::multiGpuBoardGroupID").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "multiGpuBoardGroupID="); + ::rocprofiler::hip::detail::operator<<(out, v.multiGpuBoardGroupID); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::isMultiGpuBoard").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "isMultiGpuBoard="); + ::rocprofiler::hip::detail::operator<<(out, v.isMultiGpuBoard); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::managedMemory").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "managedMemory="); + ::rocprofiler::hip::detail::operator<<(out, v.managedMemory); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::regsPerMultiprocessor").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "regsPerMultiprocessor="); + ::rocprofiler::hip::detail::operator<<(out, v.regsPerMultiprocessor); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::sharedMemPerMultiprocessor") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "sharedMemPerMultiprocessor="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedMemPerMultiprocessor); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::localL1CacheSupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "localL1CacheSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.localL1CacheSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::globalL1CacheSupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "globalL1CacheSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.globalL1CacheSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::streamPrioritiesSupported").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "streamPrioritiesSupported="); + ::rocprofiler::hip::detail::operator<<(out, v.streamPrioritiesSupported); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxThreadsPerMultiProcessor") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "maxThreadsPerMultiProcessor="); + ::rocprofiler::hip::detail::operator<<(out, v.maxThreadsPerMultiProcessor); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::persistingL2CacheMaxSize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "persistingL2CacheMaxSize="); + ::rocprofiler::hip::detail::operator<<(out, v.persistingL2CacheMaxSize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::l2CacheSize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "l2CacheSize="); + ::rocprofiler::hip::detail::operator<<(out, v.l2CacheSize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::memoryBusWidth").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memoryBusWidth="); + ::rocprofiler::hip::detail::operator<<(out, v.memoryBusWidth); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::memoryClockRate").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memoryClockRate="); + ::rocprofiler::hip::detail::operator<<(out, v.memoryClockRate); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::unifiedAddressing").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "unifiedAddressing="); + ::rocprofiler::hip::detail::operator<<(out, v.unifiedAddressing); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::asyncEngineCount").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "asyncEngineCount="); + ::rocprofiler::hip::detail::operator<<(out, v.asyncEngineCount); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::tccDriver").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "tccDriver="); + ::rocprofiler::hip::detail::operator<<(out, v.tccDriver); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::pciDomainID").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pciDomainID="); + ::rocprofiler::hip::detail::operator<<(out, v.pciDomainID); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::pciDeviceID").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pciDeviceID="); + ::rocprofiler::hip::detail::operator<<(out, v.pciDeviceID); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::pciBusID").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pciBusID="); + ::rocprofiler::hip::detail::operator<<(out, v.pciBusID); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::ECCEnabled").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "ECCEnabled="); + ::rocprofiler::hip::detail::operator<<(out, v.ECCEnabled); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::concurrentKernels").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "concurrentKernels="); + ::rocprofiler::hip::detail::operator<<(out, v.concurrentKernels); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::surfaceAlignment").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "surfaceAlignment="); + ::rocprofiler::hip::detail::operator<<(out, v.surfaceAlignment); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSurfaceCubemapLayered").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxSurfaceCubemapLayered="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSurfaceCubemapLayered); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSurfaceCubemap").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxSurfaceCubemap="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSurfaceCubemap); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSurface2DLayered").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxSurface2DLayered="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSurface2DLayered); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSurface1DLayered").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxSurface1DLayered="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSurface1DLayered); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSurface3D").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxSurface3D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSurface3D); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSurface2D").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxSurface2D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSurface2D); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxSurface1D").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxSurface1D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSurface1D); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTextureCubemapLayered").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTextureCubemapLayered="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTextureCubemapLayered); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture2DLayered").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture2DLayered="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture2DLayered); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture1DLayered").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture1DLayered="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture1DLayered); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTextureCubemap").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTextureCubemap="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTextureCubemap); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture3DAlt").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture3DAlt="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture3DAlt); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture3D").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture3D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture3D); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture2DGather").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture2DGather="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture2DGather); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture2DLinear").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture2DLinear="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture2DLinear); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture2DMipmap").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture2DMipmap="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture2DMipmap); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture2D").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture2D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture2D); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture1DLinear").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture1DLinear="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture1DLinear); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture1DMipmap").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture1DMipmap="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture1DMipmap); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxTexture1D").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture1D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture1D); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::computeMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "computeMode="); + ::rocprofiler::hip::detail::operator<<(out, v.computeMode); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::canMapHostMemory").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "canMapHostMemory="); + ::rocprofiler::hip::detail::operator<<(out, v.canMapHostMemory); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::integrated").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "integrated="); + ::rocprofiler::hip::detail::operator<<(out, v.integrated); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::kernelExecTimeoutEnabled").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "kernelExecTimeoutEnabled="); + ::rocprofiler::hip::detail::operator<<(out, v.kernelExecTimeoutEnabled); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::multiProcessorCount").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "multiProcessorCount="); + ::rocprofiler::hip::detail::operator<<(out, v.multiProcessorCount); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::deviceOverlap").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "deviceOverlap="); + ::rocprofiler::hip::detail::operator<<(out, v.deviceOverlap); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::texturePitchAlignment").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "texturePitchAlignment="); + ::rocprofiler::hip::detail::operator<<(out, v.texturePitchAlignment); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::textureAlignment").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "textureAlignment="); + ::rocprofiler::hip::detail::operator<<(out, v.textureAlignment); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::minor").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "minor="); + ::rocprofiler::hip::detail::operator<<(out, v.minor); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::major").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "major="); + ::rocprofiler::hip::detail::operator<<(out, v.major); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::totalConstMem").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "totalConstMem="); + ::rocprofiler::hip::detail::operator<<(out, v.totalConstMem); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::clockRate").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "clockRate="); + ::rocprofiler::hip::detail::operator<<(out, v.clockRate); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxGridSize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxGridSize="); + ::rocprofiler::hip::detail::operator<<(out, v.maxGridSize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxThreadsDim").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxThreadsDim="); + ::rocprofiler::hip::detail::operator<<(out, v.maxThreadsDim); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::maxThreadsPerBlock").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxThreadsPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.maxThreadsPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::memPitch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memPitch="); + ::rocprofiler::hip::detail::operator<<(out, v.memPitch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::warpSize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "warpSize="); + ::rocprofiler::hip::detail::operator<<(out, v.warpSize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::regsPerBlock").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "regsPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.regsPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::sharedMemPerBlock").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sharedMemPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedMemPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::totalGlobalMem").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "totalGlobalMem="); + ::rocprofiler::hip::detail::operator<<(out, v.totalGlobalMem); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::luidDeviceNodeMask").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "luidDeviceNodeMask="); + ::rocprofiler::hip::detail::operator<<(out, v.luidDeviceNodeMask); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::luid").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "luid="); + ::rocprofiler::hip::detail::operator<<(out, v.luid); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::uuid").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "uuid="); + ::rocprofiler::hip::detail::operator<<(out, v.uuid); + std:: operator<<(out, ", "); + } + if(std::string_view("hipDeviceProp_t::name").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "name="); + ::rocprofiler::hip::detail::operator<<(out, v.name); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipDeviceProp_tR0000& v) +{ + using namespace ::rocprofiler::hip::detail; + std::operator<<(out, '{'); + HIP_depth_max_cnt++; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view{"hipDeviceProp_t::pageableMemoryAccessUsesHostPageTables"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "pageableMemoryAccessUsesHostPageTables="); + ::rocprofiler::hip::detail::operator<<(out, v.pageableMemoryAccessUsesHostPageTables); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::pageableMemoryAccess"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pageableMemoryAccess="); + ::rocprofiler::hip::detail::operator<<(out, v.pageableMemoryAccess); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::concurrentManagedAccess"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "concurrentManagedAccess="); + ::rocprofiler::hip::detail::operator<<(out, v.concurrentManagedAccess); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::directManagedMemAccessFromHost"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "directManagedMemAccessFromHost="); + ::rocprofiler::hip::detail::operator<<(out, v.directManagedMemAccessFromHost); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::managedMemory"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "managedMemory="); + ::rocprofiler::hip::detail::operator<<(out, v.managedMemory); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::asicRevision"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "asicRevision="); + ::rocprofiler::hip::detail::operator<<(out, v.asicRevision); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::isLargeBar"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "isLargeBar="); + ::rocprofiler::hip::detail::operator<<(out, v.isLargeBar); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::cooperativeMultiDeviceUnmatchedSharedMem"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std::operator<<(out, "cooperativeMultiDeviceUnmatchedSharedMem="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedSharedMem); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::cooperativeMultiDeviceUnmatchedBlockDim"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceUnmatchedBlockDim="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedBlockDim); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::cooperativeMultiDeviceUnmatchedGridDim"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceUnmatchedGridDim="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedGridDim); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::cooperativeMultiDeviceUnmatchedFunc"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceUnmatchedFunc="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceUnmatchedFunc); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::tccDriver"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "tccDriver="); + ::rocprofiler::hip::detail::operator<<(out, v.tccDriver); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::ECCEnabled"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "ECCEnabled="); + ::rocprofiler::hip::detail::operator<<(out, v.ECCEnabled); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::kernelExecTimeoutEnabled"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "kernelExecTimeoutEnabled="); + ::rocprofiler::hip::detail::operator<<(out, v.kernelExecTimeoutEnabled); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::texturePitchAlignment"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "texturePitchAlignment="); + ::rocprofiler::hip::detail::operator<<(out, v.texturePitchAlignment); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::textureAlignment"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "textureAlignment="); + ::rocprofiler::hip::detail::operator<<(out, v.textureAlignment); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::memPitch"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memPitch="); + ::rocprofiler::hip::detail::operator<<(out, v.memPitch); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::hdpRegFlushCntl"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hdpRegFlushCntl="); + ::rocprofiler::hip::detail::operator<<(out, v.hdpRegFlushCntl); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::hdpMemFlushCntl"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hdpMemFlushCntl="); + ::rocprofiler::hip::detail::operator<<(out, v.hdpMemFlushCntl); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxTexture3D"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture3D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture3D); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxTexture2D"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture2D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture2D); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxTexture1D"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture1D="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture1D); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxTexture1DLinear"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxTexture1DLinear="); + ::rocprofiler::hip::detail::operator<<(out, v.maxTexture1DLinear); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::cooperativeMultiDeviceLaunch"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "cooperativeMultiDeviceLaunch="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeMultiDeviceLaunch); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::cooperativeLaunch"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "cooperativeLaunch="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperativeLaunch); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::integrated"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "integrated="); + ::rocprofiler::hip::detail::operator<<(out, v.integrated); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::gcnArchName"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gcnArchName="); + ::rocprofiler::hip::detail::operator<<(out, v.gcnArchName); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::gcnArch"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gcnArch="); + ::rocprofiler::hip::detail::operator<<(out, v.gcnArch); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::canMapHostMemory"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "canMapHostMemory="); + ::rocprofiler::hip::detail::operator<<(out, v.canMapHostMemory); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::isMultiGpuBoard"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "isMultiGpuBoard="); + ::rocprofiler::hip::detail::operator<<(out, v.isMultiGpuBoard); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxSharedMemoryPerMultiProcessor"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "maxSharedMemoryPerMultiProcessor="); + ::rocprofiler::hip::detail::operator<<(out, v.maxSharedMemoryPerMultiProcessor); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::pciDeviceID"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pciDeviceID="); + ::rocprofiler::hip::detail::operator<<(out, v.pciDeviceID); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::pciBusID"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pciBusID="); + ::rocprofiler::hip::detail::operator<<(out, v.pciBusID); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::pciDomainID"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pciDomainID="); + ::rocprofiler::hip::detail::operator<<(out, v.pciDomainID); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::concurrentKernels"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "concurrentKernels="); + ::rocprofiler::hip::detail::operator<<(out, v.concurrentKernels); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::arch"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "arch="); + ::rocprofiler::hip::detail::operator<<(out, v.arch); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::clockInstructionRate"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "clockInstructionRate="); + ::rocprofiler::hip::detail::operator<<(out, v.clockInstructionRate); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::computeMode"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "computeMode="); + ::rocprofiler::hip::detail::operator<<(out, v.computeMode); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxThreadsPerMultiProcessor"}.find( + HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "maxThreadsPerMultiProcessor="); + ::rocprofiler::hip::detail::operator<<(out, v.maxThreadsPerMultiProcessor); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::l2CacheSize"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "l2CacheSize="); + ::rocprofiler::hip::detail::operator<<(out, v.l2CacheSize); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::multiProcessorCount"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "multiProcessorCount="); + ::rocprofiler::hip::detail::operator<<(out, v.multiProcessorCount); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::minor"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "minor="); + ::rocprofiler::hip::detail::operator<<(out, v.minor); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::major"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "major="); + ::rocprofiler::hip::detail::operator<<(out, v.major); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::totalConstMem"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "totalConstMem="); + ::rocprofiler::hip::detail::operator<<(out, v.totalConstMem); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::memoryBusWidth"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memoryBusWidth="); + ::rocprofiler::hip::detail::operator<<(out, v.memoryBusWidth); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::memoryClockRate"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memoryClockRate="); + ::rocprofiler::hip::detail::operator<<(out, v.memoryClockRate); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::clockRate"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "clockRate="); + ::rocprofiler::hip::detail::operator<<(out, v.clockRate); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxGridSize"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxGridSize="); + ::rocprofiler::hip::detail::operator<<(out, v.maxGridSize); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxThreadsDim"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxThreadsDim="); + ::rocprofiler::hip::detail::operator<<(out, v.maxThreadsDim); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::maxThreadsPerBlock"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxThreadsPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.maxThreadsPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::warpSize"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "warpSize="); + ::rocprofiler::hip::detail::operator<<(out, v.warpSize); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::regsPerBlock"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "regsPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.regsPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::sharedMemPerBlock"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sharedMemPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedMemPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::totalGlobalMem"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "totalGlobalMem="); + ::rocprofiler::hip::detail::operator<<(out, v.totalGlobalMem); + std:: operator<<(out, ", "); + } + if(std::string_view{"hipDeviceProp_t::name"}.find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "name="); + ::rocprofiler::hip::detail::operator<<(out, v.name); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipPointerAttribute_t& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipPointerAttribute_t::allocationFlags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "allocationFlags="); + ::rocprofiler::hip::detail::operator<<(out, v.allocationFlags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipPointerAttribute_t::isManaged").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "isManaged="); + ::rocprofiler::hip::detail::operator<<(out, v.isManaged); + std:: operator<<(out, ", "); + } + if(std::string_view("hipPointerAttribute_t::device").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "device="); + ::rocprofiler::hip::detail::operator<<(out, v.device); + std:: operator<<(out, ", "); + } + if(std::string_view("hipPointerAttribute_t::type").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "type="); + ::rocprofiler::hip::detail::operator<<(out, v.type); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipChannelFormatDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipChannelFormatDesc::f").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "f="); + ::rocprofiler::hip::detail::operator<<(out, v.f); + std:: operator<<(out, ", "); + } + if(std::string_view("hipChannelFormatDesc::w").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("hipChannelFormatDesc::z").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("hipChannelFormatDesc::y").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("hipChannelFormatDesc::x").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_ARRAY_DESCRIPTOR& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("HIP_ARRAY_DESCRIPTOR::NumChannels").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "NumChannels="); + ::rocprofiler::hip::detail::operator<<(out, v.NumChannels); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY_DESCRIPTOR::Format").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Format="); + ::rocprofiler::hip::detail::operator<<(out, v.Format); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY_DESCRIPTOR::Height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Height="); + ::rocprofiler::hip::detail::operator<<(out, v.Height); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY_DESCRIPTOR::Width").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Width="); + ::rocprofiler::hip::detail::operator<<(out, v.Width); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_ARRAY3D_DESCRIPTOR& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("HIP_ARRAY3D_DESCRIPTOR::Flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Flags="); + ::rocprofiler::hip::detail::operator<<(out, v.Flags); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY3D_DESCRIPTOR::NumChannels").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "NumChannels="); + ::rocprofiler::hip::detail::operator<<(out, v.NumChannels); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY3D_DESCRIPTOR::Format").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Format="); + ::rocprofiler::hip::detail::operator<<(out, v.Format); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY3D_DESCRIPTOR::Depth").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Depth="); + ::rocprofiler::hip::detail::operator<<(out, v.Depth); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY3D_DESCRIPTOR::Height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Height="); + ::rocprofiler::hip::detail::operator<<(out, v.Height); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_ARRAY3D_DESCRIPTOR::Width").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Width="); + ::rocprofiler::hip::detail::operator<<(out, v.Width); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hip_Memcpy2D& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hip_Memcpy2D::Height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Height="); + ::rocprofiler::hip::detail::operator<<(out, v.Height); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::WidthInBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "WidthInBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.WidthInBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::dstPitch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstPitch="); + ::rocprofiler::hip::detail::operator<<(out, v.dstPitch); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::dstArray").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstArray="); + ::rocprofiler::hip::detail::operator<<(out, v.dstArray); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::dstDevice").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstDevice="); + ::rocprofiler::hip::detail::operator<<(out, v.dstDevice); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::dstMemoryType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstMemoryType="); + ::rocprofiler::hip::detail::operator<<(out, v.dstMemoryType); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::dstY").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "dstY="); + ::rocprofiler::hip::detail::operator<<(out, v.dstY); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::dstXInBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstXInBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.dstXInBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::srcPitch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcPitch="); + ::rocprofiler::hip::detail::operator<<(out, v.srcPitch); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::srcArray").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcArray="); + ::rocprofiler::hip::detail::operator<<(out, v.srcArray); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::srcDevice").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcDevice="); + ::rocprofiler::hip::detail::operator<<(out, v.srcDevice); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::srcMemoryType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcMemoryType="); + ::rocprofiler::hip::detail::operator<<(out, v.srcMemoryType); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::srcY").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "srcY="); + ::rocprofiler::hip::detail::operator<<(out, v.srcY); + std:: operator<<(out, ", "); + } + if(std::string_view("hip_Memcpy2D::srcXInBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcXInBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.srcXInBytes); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMipmappedArray& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMipmappedArray::num_channels").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "num_channels="); + ::rocprofiler::hip::detail::operator<<(out, v.num_channels); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::format").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "format="); + ::rocprofiler::hip::detail::operator<<(out, v.format); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::max_mipmap_level").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "max_mipmap_level="); + ::rocprofiler::hip::detail::operator<<(out, v.max_mipmap_level); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::min_mipmap_level").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "min_mipmap_level="); + ::rocprofiler::hip::detail::operator<<(out, v.min_mipmap_level); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::depth").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "depth="); + ::rocprofiler::hip::detail::operator<<(out, v.depth); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "height="); + ::rocprofiler::hip::detail::operator<<(out, v.height); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::width").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "width="); + ::rocprofiler::hip::detail::operator<<(out, v.width); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::type").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "type="); + ::rocprofiler::hip::detail::operator<<(out, v.type); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMipmappedArray::desc").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "desc="); + ::rocprofiler::hip::detail::operator<<(out, v.desc); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_TEXTURE_DESC& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("HIP_TEXTURE_DESC::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::borderColor").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "borderColor="); + ::rocprofiler::hip::detail::operator<<(out, v.borderColor); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::maxMipmapLevelClamp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxMipmapLevelClamp="); + ::rocprofiler::hip::detail::operator<<(out, v.maxMipmapLevelClamp); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::minMipmapLevelClamp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "minMipmapLevelClamp="); + ::rocprofiler::hip::detail::operator<<(out, v.minMipmapLevelClamp); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::mipmapLevelBias").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "mipmapLevelBias="); + ::rocprofiler::hip::detail::operator<<(out, v.mipmapLevelBias); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::mipmapFilterMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "mipmapFilterMode="); + ::rocprofiler::hip::detail::operator<<(out, v.mipmapFilterMode); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::maxAnisotropy").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxAnisotropy="); + ::rocprofiler::hip::detail::operator<<(out, v.maxAnisotropy); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::filterMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "filterMode="); + ::rocprofiler::hip::detail::operator<<(out, v.filterMode); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_TEXTURE_DESC::addressMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "addressMode="); + ::rocprofiler::hip::detail::operator<<(out, v.addressMode); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipResourceDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipResourceDesc::resType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "resType="); + ::rocprofiler::hip::detail::operator<<(out, v.resType); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_RESOURCE_DESC& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("HIP_RESOURCE_DESC::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_DESC::resType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "resType="); + ::rocprofiler::hip::detail::operator<<(out, v.resType); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipResourceViewDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipResourceViewDesc::lastLayer").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "lastLayer="); + ::rocprofiler::hip::detail::operator<<(out, v.lastLayer); + std:: operator<<(out, ", "); + } + if(std::string_view("hipResourceViewDesc::firstLayer").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "firstLayer="); + ::rocprofiler::hip::detail::operator<<(out, v.firstLayer); + std:: operator<<(out, ", "); + } + if(std::string_view("hipResourceViewDesc::lastMipmapLevel").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "lastMipmapLevel="); + ::rocprofiler::hip::detail::operator<<(out, v.lastMipmapLevel); + std:: operator<<(out, ", "); + } + if(std::string_view("hipResourceViewDesc::firstMipmapLevel").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "firstMipmapLevel="); + ::rocprofiler::hip::detail::operator<<(out, v.firstMipmapLevel); + std:: operator<<(out, ", "); + } + if(std::string_view("hipResourceViewDesc::depth").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "depth="); + ::rocprofiler::hip::detail::operator<<(out, v.depth); + std:: operator<<(out, ", "); + } + if(std::string_view("hipResourceViewDesc::height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "height="); + ::rocprofiler::hip::detail::operator<<(out, v.height); + std:: operator<<(out, ", "); + } + if(std::string_view("hipResourceViewDesc::width").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "width="); + ::rocprofiler::hip::detail::operator<<(out, v.width); + std:: operator<<(out, ", "); + } + if(std::string_view("hipResourceViewDesc::format").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "format="); + ::rocprofiler::hip::detail::operator<<(out, v.format); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_RESOURCE_VIEW_DESC& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("HIP_RESOURCE_VIEW_DESC::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::lastLayer").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "lastLayer="); + ::rocprofiler::hip::detail::operator<<(out, v.lastLayer); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::firstLayer").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "firstLayer="); + ::rocprofiler::hip::detail::operator<<(out, v.firstLayer); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::lastMipmapLevel").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "lastMipmapLevel="); + ::rocprofiler::hip::detail::operator<<(out, v.lastMipmapLevel); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::firstMipmapLevel").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "firstMipmapLevel="); + ::rocprofiler::hip::detail::operator<<(out, v.firstMipmapLevel); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::depth").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "depth="); + ::rocprofiler::hip::detail::operator<<(out, v.depth); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "height="); + ::rocprofiler::hip::detail::operator<<(out, v.height); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::width").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "width="); + ::rocprofiler::hip::detail::operator<<(out, v.width); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_RESOURCE_VIEW_DESC::format").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "format="); + ::rocprofiler::hip::detail::operator<<(out, v.format); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipPitchedPtr& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipPitchedPtr::ysize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "ysize="); + ::rocprofiler::hip::detail::operator<<(out, v.ysize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipPitchedPtr::xsize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "xsize="); + ::rocprofiler::hip::detail::operator<<(out, v.xsize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipPitchedPtr::pitch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pitch="); + ::rocprofiler::hip::detail::operator<<(out, v.pitch); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExtent& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExtent::depth").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "depth="); + ::rocprofiler::hip::detail::operator<<(out, v.depth); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExtent::height").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "height="); + ::rocprofiler::hip::detail::operator<<(out, v.height); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExtent::width").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "width="); + ::rocprofiler::hip::detail::operator<<(out, v.width); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipPos& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipPos::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("hipPos::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("hipPos::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemcpy3DParms& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemcpy3DParms::kind").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "kind="); + ::rocprofiler::hip::detail::operator<<(out, v.kind); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemcpy3DParms::extent").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "extent="); + ::rocprofiler::hip::detail::operator<<(out, v.extent); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemcpy3DParms::dstPtr").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstPtr="); + ::rocprofiler::hip::detail::operator<<(out, v.dstPtr); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemcpy3DParms::dstPos").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstPos="); + ::rocprofiler::hip::detail::operator<<(out, v.dstPos); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemcpy3DParms::dstArray").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstArray="); + ::rocprofiler::hip::detail::operator<<(out, v.dstArray); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemcpy3DParms::srcPtr").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcPtr="); + ::rocprofiler::hip::detail::operator<<(out, v.srcPtr); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemcpy3DParms::srcPos").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcPos="); + ::rocprofiler::hip::detail::operator<<(out, v.srcPos); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemcpy3DParms::srcArray").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcArray="); + ::rocprofiler::hip::detail::operator<<(out, v.srcArray); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_MEMCPY3D& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("HIP_MEMCPY3D::Depth").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Depth="); + ::rocprofiler::hip::detail::operator<<(out, v.Depth); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::Height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "Height="); + ::rocprofiler::hip::detail::operator<<(out, v.Height); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::WidthInBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "WidthInBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.WidthInBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstHeight").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstHeight="); + ::rocprofiler::hip::detail::operator<<(out, v.dstHeight); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstPitch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstPitch="); + ::rocprofiler::hip::detail::operator<<(out, v.dstPitch); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstArray").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstArray="); + ::rocprofiler::hip::detail::operator<<(out, v.dstArray); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstDevice").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstDevice="); + ::rocprofiler::hip::detail::operator<<(out, v.dstDevice); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstMemoryType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstMemoryType="); + ::rocprofiler::hip::detail::operator<<(out, v.dstMemoryType); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstLOD").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstLOD="); + ::rocprofiler::hip::detail::operator<<(out, v.dstLOD); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstZ").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "dstZ="); + ::rocprofiler::hip::detail::operator<<(out, v.dstZ); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstY").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "dstY="); + ::rocprofiler::hip::detail::operator<<(out, v.dstY); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::dstXInBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "dstXInBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.dstXInBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcHeight").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcHeight="); + ::rocprofiler::hip::detail::operator<<(out, v.srcHeight); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcPitch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcPitch="); + ::rocprofiler::hip::detail::operator<<(out, v.srcPitch); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcArray").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcArray="); + ::rocprofiler::hip::detail::operator<<(out, v.srcArray); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcDevice").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcDevice="); + ::rocprofiler::hip::detail::operator<<(out, v.srcDevice); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcMemoryType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcMemoryType="); + ::rocprofiler::hip::detail::operator<<(out, v.srcMemoryType); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcLOD").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcLOD="); + ::rocprofiler::hip::detail::operator<<(out, v.srcLOD); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcZ").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "srcZ="); + ::rocprofiler::hip::detail::operator<<(out, v.srcZ); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcY").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "srcY="); + ::rocprofiler::hip::detail::operator<<(out, v.srcY); + std:: operator<<(out, ", "); + } + if(std::string_view("HIP_MEMCPY3D::srcXInBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "srcXInBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.srcXInBytes); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uchar1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uchar2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("uchar2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uchar3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("uchar3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("uchar3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uchar4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("uchar4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("uchar4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("uchar4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("char1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("char2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("char2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("char3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("char3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("char3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("char4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("char4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("char4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("char4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ushort1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ushort2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ushort2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ushort3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("ushort3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ushort3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ushort4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("ushort4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("ushort4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ushort4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("short1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("short2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("short2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("short3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("short3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("short3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("short4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("short4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("short4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("short4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uint1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uint2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("uint2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uint3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("uint3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("uint3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("uint4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("uint4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("uint4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("uint4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("int1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("int2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("int2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("int3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("int3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("int3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("int4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("int4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("int4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("int4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulong1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulong2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ulong2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulong3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("ulong3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ulong3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulong4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("ulong4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("ulong4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ulong4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("long1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("long2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("long2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("long3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("long3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("long3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("long4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("long4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("long4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("long4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulonglong1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulonglong2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ulonglong2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulonglong3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("ulonglong3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ulonglong3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("ulonglong4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("ulonglong4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("ulonglong4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("ulonglong4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("longlong1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("longlong2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("longlong2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("longlong3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("longlong3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("longlong3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("longlong4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("longlong4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("longlong4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("longlong4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("float1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("float2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("float2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("float3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("float3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("float3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("float4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("float4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("float4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("float4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double1& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("double1::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double2& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("double2::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("double2::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("double3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("double3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("double3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double4& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("double4::w").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "w="); + ::rocprofiler::hip::detail::operator<<(out, v.w); + std:: operator<<(out, ", "); + } + if(std::string_view("double4::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("double4::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("double4::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const textureReference& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("textureReference::format").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "format="); + ::rocprofiler::hip::detail::operator<<(out, v.format); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::numChannels").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "numChannels="); + ::rocprofiler::hip::detail::operator<<(out, v.numChannels); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::textureObject").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "textureObject="); + ::rocprofiler::hip::detail::operator<<(out, v.textureObject); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::maxMipmapLevelClamp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxMipmapLevelClamp="); + ::rocprofiler::hip::detail::operator<<(out, v.maxMipmapLevelClamp); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::minMipmapLevelClamp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "minMipmapLevelClamp="); + ::rocprofiler::hip::detail::operator<<(out, v.minMipmapLevelClamp); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::mipmapLevelBias").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "mipmapLevelBias="); + ::rocprofiler::hip::detail::operator<<(out, v.mipmapLevelBias); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::mipmapFilterMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "mipmapFilterMode="); + ::rocprofiler::hip::detail::operator<<(out, v.mipmapFilterMode); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::maxAnisotropy").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxAnisotropy="); + ::rocprofiler::hip::detail::operator<<(out, v.maxAnisotropy); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::sRGB").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sRGB="); + ::rocprofiler::hip::detail::operator<<(out, v.sRGB); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::channelDesc").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "channelDesc="); + ::rocprofiler::hip::detail::operator<<(out, v.channelDesc); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::filterMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "filterMode="); + ::rocprofiler::hip::detail::operator<<(out, v.filterMode); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::readMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "readMode="); + ::rocprofiler::hip::detail::operator<<(out, v.readMode); + std:: operator<<(out, ", "); + } + if(std::string_view("textureReference::normalized").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "normalized="); + ::rocprofiler::hip::detail::operator<<(out, v.normalized); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipTextureDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipTextureDesc::maxMipmapLevelClamp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxMipmapLevelClamp="); + ::rocprofiler::hip::detail::operator<<(out, v.maxMipmapLevelClamp); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::minMipmapLevelClamp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "minMipmapLevelClamp="); + ::rocprofiler::hip::detail::operator<<(out, v.minMipmapLevelClamp); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::mipmapLevelBias").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "mipmapLevelBias="); + ::rocprofiler::hip::detail::operator<<(out, v.mipmapLevelBias); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::mipmapFilterMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "mipmapFilterMode="); + ::rocprofiler::hip::detail::operator<<(out, v.mipmapFilterMode); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::maxAnisotropy").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxAnisotropy="); + ::rocprofiler::hip::detail::operator<<(out, v.maxAnisotropy); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::normalizedCoords").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "normalizedCoords="); + ::rocprofiler::hip::detail::operator<<(out, v.normalizedCoords); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::borderColor").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "borderColor="); + ::rocprofiler::hip::detail::operator<<(out, v.borderColor); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::sRGB").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sRGB="); + ::rocprofiler::hip::detail::operator<<(out, v.sRGB); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::readMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "readMode="); + ::rocprofiler::hip::detail::operator<<(out, v.readMode); + std:: operator<<(out, ", "); + } + if(std::string_view("hipTextureDesc::filterMode").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "filterMode="); + ::rocprofiler::hip::detail::operator<<(out, v.filterMode); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const surfaceReference& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("surfaceReference::surfaceObject").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "surfaceObject="); + ::rocprofiler::hip::detail::operator<<(out, v.surfaceObject); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipIpcMemHandle_t&) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipIpcMemHandle_t::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipIpcEventHandle_t&) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipIpcEventHandle_t::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipFuncAttributes& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipFuncAttributes::sharedSizeBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sharedSizeBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedSizeBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::ptxVersion").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "ptxVersion="); + ::rocprofiler::hip::detail::operator<<(out, v.ptxVersion); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::preferredShmemCarveout").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "preferredShmemCarveout="); + ::rocprofiler::hip::detail::operator<<(out, v.preferredShmemCarveout); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::numRegs").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "numRegs="); + ::rocprofiler::hip::detail::operator<<(out, v.numRegs); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::maxThreadsPerBlock").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "maxThreadsPerBlock="); + ::rocprofiler::hip::detail::operator<<(out, v.maxThreadsPerBlock); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::maxDynamicSharedSizeBytes") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "maxDynamicSharedSizeBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.maxDynamicSharedSizeBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::localSizeBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "localSizeBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.localSizeBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::constSizeBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "constSizeBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.constSizeBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::cacheModeCA").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "cacheModeCA="); + ::rocprofiler::hip::detail::operator<<(out, v.cacheModeCA); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFuncAttributes::binaryVersion").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "binaryVersion="); + ::rocprofiler::hip::detail::operator<<(out, v.binaryVersion); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemLocation& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemLocation::id").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "id="); + ::rocprofiler::hip::detail::operator<<(out, v.id); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemLocation::type").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "type="); + ::rocprofiler::hip::detail::operator<<(out, v.type); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemAccessDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemAccessDesc::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemAccessDesc::location").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "location="); + ::rocprofiler::hip::detail::operator<<(out, v.location); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemPoolProps& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemPoolProps::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemPoolProps::location").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "location="); + ::rocprofiler::hip::detail::operator<<(out, v.location); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemPoolProps::handleTypes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "handleTypes="); + ::rocprofiler::hip::detail::operator<<(out, v.handleTypes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemPoolProps::allocType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "allocType="); + ::rocprofiler::hip::detail::operator<<(out, v.allocType); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemPoolPtrExportData&) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemPoolPtrExportData::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const dim3& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("dim3::z").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "z="); + ::rocprofiler::hip::detail::operator<<(out, v.z); + std:: operator<<(out, ", "); + } + if(std::string_view("dim3::y").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "y="); + ::rocprofiler::hip::detail::operator<<(out, v.y); + std:: operator<<(out, ", "); + } + if(std::string_view("dim3::x").find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "x="); + ::rocprofiler::hip::detail::operator<<(out, v.x); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipLaunchParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipLaunchParams::stream").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "stream="); + ::rocprofiler::hip::detail::operator<<(out, v.stream); + std:: operator<<(out, ", "); + } + if(std::string_view("hipLaunchParams::sharedMem").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sharedMem="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedMem); + std:: operator<<(out, ", "); + } + if(std::string_view("hipLaunchParams::blockDim").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "blockDim="); + ::rocprofiler::hip::detail::operator<<(out, v.blockDim); + std:: operator<<(out, ", "); + } + if(std::string_view("hipLaunchParams::gridDim").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gridDim="); + ::rocprofiler::hip::detail::operator<<(out, v.gridDim); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipFunctionLaunchParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipFunctionLaunchParams::hStream").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hStream="); + ::rocprofiler::hip::detail::operator<<(out, v.hStream); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::sharedMemBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sharedMemBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedMemBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::blockDimZ").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "blockDimZ="); + ::rocprofiler::hip::detail::operator<<(out, v.blockDimZ); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::blockDimY").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "blockDimY="); + ::rocprofiler::hip::detail::operator<<(out, v.blockDimY); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::blockDimX").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "blockDimX="); + ::rocprofiler::hip::detail::operator<<(out, v.blockDimX); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::gridDimZ").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gridDimZ="); + ::rocprofiler::hip::detail::operator<<(out, v.gridDimZ); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::gridDimY").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gridDimY="); + ::rocprofiler::hip::detail::operator<<(out, v.gridDimY); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::gridDimX").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gridDimX="); + ::rocprofiler::hip::detail::operator<<(out, v.gridDimX); + std:: operator<<(out, ", "); + } + if(std::string_view("hipFunctionLaunchParams::function").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "function="); + ::rocprofiler::hip::detail::operator<<(out, v.function); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalMemoryHandleDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalMemoryHandleDesc::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryHandleDesc::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryHandleDesc::size").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "size="); + ::rocprofiler::hip::detail::operator<<(out, v.size); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryHandleDesc_st::union ::handle.fd") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "handle.fd="); + ::rocprofiler::hip::detail::operator<<(out, v.handle.fd); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryHandleDesc::type").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "type="); + ::rocprofiler::hip::detail::operator<<(out, v.type); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalMemoryBufferDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalMemoryBufferDesc::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryBufferDesc::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryBufferDesc::size").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "size="); + ::rocprofiler::hip::detail::operator<<(out, v.size); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryBufferDesc::offset").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "offset="); + ::rocprofiler::hip::detail::operator<<(out, v.offset); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +#if HIP_VERSION_MAJOR >= 6 +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalMemoryMipmappedArrayDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalMemoryMipmappedArrayDesc::numLevels") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "numLevels="); + ::rocprofiler::hip::detail::operator<<(out, v.numLevels); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryMipmappedArrayDesc::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryMipmappedArrayDesc::extent") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "extent="); + ::rocprofiler::hip::detail::operator<<(out, v.extent); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryMipmappedArrayDesc::formatDesc") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "formatDesc="); + ::rocprofiler::hip::detail::operator<<(out, v.formatDesc); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalMemoryMipmappedArrayDesc::offset") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "offset="); + ::rocprofiler::hip::detail::operator<<(out, v.offset); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} +#endif + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreHandleDesc& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalSemaphoreHandleDesc::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreHandleDesc::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreHandleDesc_st::union ::handle.fd") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "handle.fd="); + ::rocprofiler::hip::detail::operator<<(out, v.handle.fd); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreHandleDesc::type").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "type="); + ::rocprofiler::hip::detail::operator<<(out, v.type); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreSignalParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalSemaphoreSignalParams::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreSignalParams::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreWaitParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalSemaphoreWaitParams::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreWaitParams::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipHostNodeParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipHostNodeParams::fn").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "fn="); + ::rocprofiler::hip::detail::operator<<(out, v.fn); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipKernelNodeParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipKernelNodeParams::sharedMemBytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "sharedMemBytes="); + ::rocprofiler::hip::detail::operator<<(out, v.sharedMemBytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipKernelNodeParams::gridDim").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "gridDim="); + ::rocprofiler::hip::detail::operator<<(out, v.gridDim); + std:: operator<<(out, ", "); + } + if(std::string_view("hipKernelNodeParams::blockDim").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "blockDim="); + ::rocprofiler::hip::detail::operator<<(out, v.blockDim); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemsetParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemsetParams::width").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "width="); + ::rocprofiler::hip::detail::operator<<(out, v.width); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemsetParams::value").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "value="); + ::rocprofiler::hip::detail::operator<<(out, v.value); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemsetParams::pitch").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "pitch="); + ::rocprofiler::hip::detail::operator<<(out, v.pitch); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemsetParams::height").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "height="); + ::rocprofiler::hip::detail::operator<<(out, v.height); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemsetParams::elementSize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "elementSize="); + ::rocprofiler::hip::detail::operator<<(out, v.elementSize); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemAllocNodeParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemAllocNodeParams::bytesize").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "bytesize="); + ::rocprofiler::hip::detail::operator<<(out, v.bytesize); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemAllocNodeParams::accessDescCount").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "accessDescCount="); + ::rocprofiler::hip::detail::operator<<(out, v.accessDescCount); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemAllocNodeParams::accessDescs").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "accessDescs="); + ::rocprofiler::hip::detail::operator<<(out, v.accessDescs); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemAllocNodeParams::poolProps").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "poolProps="); + ::rocprofiler::hip::detail::operator<<(out, v.poolProps); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipAccessPolicyWindow& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipAccessPolicyWindow::num_bytes").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "num_bytes="); + ::rocprofiler::hip::detail::operator<<(out, v.num_bytes); + std:: operator<<(out, ", "); + } + if(std::string_view("hipAccessPolicyWindow::missProp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "missProp="); + ::rocprofiler::hip::detail::operator<<(out, v.missProp); + std:: operator<<(out, ", "); + } + if(std::string_view("hipAccessPolicyWindow::hitRatio").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hitRatio="); + ::rocprofiler::hip::detail::operator<<(out, v.hitRatio); + std:: operator<<(out, ", "); + } + if(std::string_view("hipAccessPolicyWindow::hitProp").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "hitProp="); + ::rocprofiler::hip::detail::operator<<(out, v.hitProp); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipKernelNodeAttrValue& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipKernelNodeAttrValue::cooperative").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "cooperative="); + ::rocprofiler::hip::detail::operator<<(out, v.cooperative); + std:: operator<<(out, ", "); + } + if(std::string_view("hipKernelNodeAttrValue::accessPolicyWindow").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "accessPolicyWindow="); + ::rocprofiler::hip::detail::operator<<(out, v.accessPolicyWindow); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +// inline static std::ostream& +// operator<<(std::ostream& out, const HIP_MEMSET_NODE_PARAMS& v) +// { +// std::operator<<(out, '{'); +// ++HIP_depth_max_cnt; +// if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) +// { +// if(std::string_view("HIP_MEMSET_NODE_PARAMS::height").find(HIP_structs_regex) != +// std::string_view::npos) +// { +// std:: operator<<(out, "height="); +// ::rocprofiler::hip::detail::operator<<(out, v.height); +// std:: operator<<(out, ", "); +// } +// if(std::string_view("HIP_MEMSET_NODE_PARAMS::width").find(HIP_structs_regex) != +// std::string_view::npos) +// { +// std:: operator<<(out, "width="); +// ::rocprofiler::hip::detail::operator<<(out, v.width); +// std:: operator<<(out, ", "); +// } +// if(std::string_view("HIP_MEMSET_NODE_PARAMS::elementSize").find(HIP_structs_regex) != +// std::string_view::npos) +// { +// std:: operator<<(out, "elementSize="); +// ::rocprofiler::hip::detail::operator<<(out, v.elementSize); +// std:: operator<<(out, ", "); +// } +// if(std::string_view("HIP_MEMSET_NODE_PARAMS::value").find(HIP_structs_regex) != +// std::string_view::npos) +// { +// std:: operator<<(out, "value="); +// ::rocprofiler::hip::detail::operator<<(out, v.value); +// std:: operator<<(out, ", "); +// } +// if(std::string_view("HIP_MEMSET_NODE_PARAMS::pitch").find(HIP_structs_regex) != +// std::string_view::npos) +// { +// std:: operator<<(out, "pitch="); +// ::rocprofiler::hip::detail::operator<<(out, v.pitch); +// std:: operator<<(out, ", "); +// } +// if(std::string_view("HIP_MEMSET_NODE_PARAMS::dst").find(HIP_structs_regex) != +// std::string_view::npos) +// { +// std:: operator<<(out, "dst="); +// ::rocprofiler::hip::detail::operator<<(out, v.dst); +// } +// }; +// HIP_depth_max_cnt--; +// std::operator<<(out, '}'); +// return out; +// } + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemAllocationProp& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipMemAllocationProp::location").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "location="); + ::rocprofiler::hip::detail::operator<<(out, v.location); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemAllocationProp::requestedHandleType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "requestedHandleType="); + ::rocprofiler::hip::detail::operator<<(out, v.requestedHandleType); + std:: operator<<(out, ", "); + } + if(std::string_view("hipMemAllocationProp::type").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "type="); + ::rocprofiler::hip::detail::operator<<(out, v.type); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreSignalNodeParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalSemaphoreSignalNodeParams::numExtSems") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "numExtSems="); + ::rocprofiler::hip::detail::operator<<(out, v.numExtSems); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreSignalNodeParams::paramsArray") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "paramsArray="); + ::rocprofiler::hip::detail::operator<<(out, v.paramsArray); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreSignalNodeParams::extSemArray") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "extSemArray="); + ::rocprofiler::hip::detail::operator<<(out, v.extSemArray); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreWaitNodeParams& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipExternalSemaphoreWaitNodeParams::numExtSems") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "numExtSems="); + ::rocprofiler::hip::detail::operator<<(out, v.numExtSems); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreWaitNodeParams::paramsArray") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "paramsArray="); + ::rocprofiler::hip::detail::operator<<(out, v.paramsArray); + std:: operator<<(out, ", "); + } + if(std::string_view("hipExternalSemaphoreWaitNodeParams::extSemArray") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "extSemArray="); + ::rocprofiler::hip::detail::operator<<(out, v.extSemArray); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipArrayMapInfo& v) +{ + std::operator<<(out, '{'); + ++HIP_depth_max_cnt; + if(HIP_depth_max == -1 || HIP_depth_max_cnt <= HIP_depth_max) + { + if(std::string_view("hipArrayMapInfo::reserved").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "reserved="); + ::rocprofiler::hip::detail::operator<<(out, 0); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::flags").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "flags="); + ::rocprofiler::hip::detail::operator<<(out, v.flags); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::deviceBitMask").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "deviceBitMask="); + ::rocprofiler::hip::detail::operator<<(out, v.deviceBitMask); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::offset").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "offset="); + ::rocprofiler::hip::detail::operator<<(out, v.offset); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::union ::memHandle.memHandle") + .find(HIP_structs_regex) != std::string_view::npos) + { + std:: operator<<(out, "memHandle.memHandle="); + ::rocprofiler::hip::detail::operator<<(out, v.memHandle.memHandle); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::memHandleType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memHandleType="); + ::rocprofiler::hip::detail::operator<<(out, v.memHandleType); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::memOperationType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "memOperationType="); + ::rocprofiler::hip::detail::operator<<(out, v.memOperationType); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::subresourceType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "subresourceType="); + ::rocprofiler::hip::detail::operator<<(out, v.subresourceType); + std:: operator<<(out, ", "); + } + if(std::string_view("hipArrayMapInfo::resourceType").find(HIP_structs_regex) != + std::string_view::npos) + { + std:: operator<<(out, "resourceType="); + ::rocprofiler::hip::detail::operator<<(out, v.resourceType); + } + }; + HIP_depth_max_cnt--; + std::operator<<(out, '}'); + return out; +} +// end ostream ops for HIP +} // namespace detail +} // namespace hip +} // namespace rocprofiler + +inline static std::ostream& +operator<<(std::ostream& out, const __locale_struct& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipDeviceArch_t& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipUUID& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipDeviceProp_t& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipPointerAttribute_t& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipChannelFormatDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_ARRAY_DESCRIPTOR& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_ARRAY3D_DESCRIPTOR& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hip_Memcpy2D& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMipmappedArray& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_TEXTURE_DESC& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipResourceDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_RESOURCE_DESC& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipResourceViewDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_RESOURCE_VIEW_DESC& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipPitchedPtr& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExtent& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipPos& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemcpy3DParms& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const HIP_MEMCPY3D& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uchar4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const char4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ushort4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const short4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const uint4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const int4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulong4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const long4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const ulonglong4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const longlong4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const float4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double1& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double2& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const double4& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const textureReference& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipTextureDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const surfaceReference& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipIpcMemHandle_t& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipIpcEventHandle_t& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipFuncAttributes& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemLocation& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemAccessDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemPoolProps& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemPoolPtrExportData& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const dim3& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipLaunchParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipFunctionLaunchParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalMemoryHandleDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalMemoryBufferDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalMemoryMipmappedArrayDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreHandleDesc& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreSignalParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreWaitParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipHostNodeParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipKernelNodeParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemsetParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemAllocNodeParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipAccessPolicyWindow& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipKernelNodeAttrValue& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +// inline static std::ostream& +// operator<<(std::ostream& out, const HIP_MEMSET_NODE_PARAMS& v) +// { +// ::rocprofiler::hip::detail::operator<<(out, v); +// return out; +// } + +inline static std::ostream& +operator<<(std::ostream& out, const hipMemAllocationProp& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreSignalNodeParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipExternalSemaphoreWaitNodeParams& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} + +inline static std::ostream& +operator<<(std::ostream& out, const hipArrayMapInfo& v) +{ + ::rocprofiler::hip::detail::operator<<(out, v); + return out; +} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.cpp new file mode 100644 index 0000000000..098b44496a --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.cpp @@ -0,0 +1,684 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/hip/hip.hpp" +#include "lib/common/defines.hpp" +#include "lib/common/utility.hpp" +#include "lib/rocprofiler-sdk/buffer.hpp" +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/hip/details/ostream.hpp" +#include "lib/rocprofiler-sdk/hip/types.hpp" +#include "lib/rocprofiler-sdk/hip/utils.hpp" + +#include +#include +#include + +#include +#include +#include +// must be included after runtime api +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace hip +{ +namespace +{ +struct null_type +{}; + +template +auto +get_default_retval() +{ + if constexpr(std::is_pointer::value) + { + Tp v = nullptr; + return v; + } + else if constexpr(std::is_same::value) + return hipErrorUnknown; + else if constexpr(std::is_same::value) + return hipChannelFormatDesc{}; + else if constexpr(std::is_same::value) + return -1; + else if constexpr(std::is_void::value) + return null_type{}; + else + static_assert(std::is_empty::value, "Error! unsupported return type"); +} + +template +void +set_data_retval(DataT& _data, Tp _val) +{ + if constexpr(std::is_same::value) + { + (void) _data; + (void) _val; + } + else if constexpr(std::is_same::value) + { + _data.hipError_t_retval = _val; + } + else if constexpr(std::is_same::value) + { + _data.hipChannelFormatDesc_retval = _val; + } + else if constexpr(std::is_same::value) + { + _data.const_charp_retval = _val; + } + else if constexpr(std::is_same::value) + { + _data.voidpp_retval = _val; + } + else if constexpr(std::is_same::value) + { + _data.int_retval = _val; + } + else + { + static_assert(std::is_empty::value, "Error! unsupported return type"); + } +} +} // namespace + +hip_api_table_t& +get_table() +{ + static auto _compiler = hip_compiler_api_table_t{}; + static auto _runtime = hip_runtime_api_table_t{}; + static auto _v = []() { + _compiler.size = sizeof(_compiler); + _runtime.size = sizeof(_runtime); + auto _val = hip_api_table_t{&_compiler, &_runtime}; + return _val; + }(); + return _v; +} + +template +template +auto +hip_api_impl::set_data_args(DataArgsT& _data_args, Args... args) +{ + _data_args = DataArgsT{args...}; +} + +template +template +auto +hip_api_impl::exec(FuncT&& _func, Args&&... args) +{ + using return_type = std::decay_t>; + + if(_func) + { + if constexpr(std::is_void::value) + { + _func(std::forward(args)...); + return null_type{}; + } + else + { + return _func(std::forward(args)...); + } + } + + using info_type = hip_api_info; + LOG(ERROR) << "nullptr to next hip function for " << info_type::name << " (" + << info_type::operation_idx << ")"; + + return get_default_retval(); +} + +namespace +{ +using correlation_service = context::correlation_tracing_service; +using buffer_hip_api_record_t = rocprofiler_buffer_tracing_hip_api_record_t; + +struct callback_context_data +{ + const context::context* ctx = nullptr; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_user_data_t user_data = {.value = 0}; +}; + +struct buffered_context_data +{ + const context::context* ctx = nullptr; + rocprofiler_user_data_t external_correlation = {}; +}; + +constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0}; + +void +populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx, + rocprofiler_buffer_tracing_kind_t buffered_domain_idx, + int operation_idx, + std::vector& callback_contexts, + std::vector& buffered_contexts) +{ + auto active_contexts = context::context_array_t{}; + auto thr_id = common::get_tid(); + for(const auto* itr : context::get_active_contexts(active_contexts)) + { + if(!itr) continue; + + // if(itr->pc_sampler) has_pc_sampling = true; + + if(itr->callback_tracer) + { + // if the given domain + op is not enabled, skip this context + if(itr->callback_tracer->domains(callback_domain_idx, operation_idx)) + callback_contexts.emplace_back( + callback_context_data{itr, rocprofiler_callback_tracing_record_t{}}); + } + + if(itr->buffered_tracer) + { + // if the given domain + op is not enabled, skip this context + if(itr->buffered_tracer->domains(buffered_domain_idx, operation_idx)) + buffered_contexts.emplace_back(buffered_context_data{ + itr, itr->correlation_tracer.external_correlator.get(thr_id)}); + } + } +} +} // namespace + +template +template +auto +hip_api_impl::functor(Args&&... args) +{ + using info_type = hip_api_info; + using callback_api_data_t = typename hip_domain_info::callback_data_type; + + auto thr_id = common::get_tid(); + auto callback_contexts = std::vector{}; + auto buffered_contexts = std::vector{}; + auto has_pc_sampling = false; + + populate_contexts(info_type::callback_domain_idx, + info_type::buffered_domain_idx, + info_type::operation_idx, + callback_contexts, + buffered_contexts); + + if(callback_contexts.empty() && buffered_contexts.empty()) + { + auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + if constexpr(!std::is_same::value) + return _ret; + else + return 0; + } + + auto ref_count = (has_pc_sampling) ? 4 : 2; + auto buffer_record = common::init_public_api_struct(buffer_hip_api_record_t{}); + auto tracer_data = callback_api_data_t{.size = sizeof(callback_api_data_t)}; + auto* corr_id = correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; + + // construct the buffered info before the callback so the callbacks are as closely wrapped + // around the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.kind = info_type::buffered_domain_idx; + // external correlation will be updated right before record is placed in buffer + buffer_record.correlation_id = + rocprofiler_correlation_id_t{internal_corr_id, empty_user_data}; + buffer_record.operation = info_type::operation_idx; + buffer_record.thread_id = thr_id; + } + + tracer_data.size = sizeof(callback_api_data_t); + set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward(args)...); + + // invoke the callbacks + if(!callback_contexts.empty()) + { + set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward(args)...); + + for(auto& itr : callback_contexts) + { + auto& ctx = itr.ctx; + auto& record = itr.record; + auto& user_data = itr.user_data; + + auto extern_corr_id_v = ctx->correlation_tracer.external_correlator.get(thr_id); + + auto corr_id_v = rocprofiler_correlation_id_t{internal_corr_id, extern_corr_id_v}; + record = + rocprofiler_callback_tracing_record_t{rocprofiler_context_id_t{ctx->context_idx}, + thr_id, + corr_id_v, + info_type::callback_domain_idx, + info_type::operation_idx, + ROCPROFILER_CALLBACK_PHASE_ENTER, + static_cast(&tracer_data)}; + + auto& callback_info = + ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx); + callback_info.callback(record, &user_data, callback_info.data); + + // enter callback may update the external correlation id field + record.correlation_id.external = + ctx->correlation_tracer.external_correlator.get(thr_id); + } + } + + // record the start timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + for(auto& itr : buffered_contexts) + { + itr.external_correlation = itr.ctx->correlation_tracer.external_correlator.get(thr_id); + } + + buffer_record.start_timestamp = common::timestamp_ns(); + } + + // decrement the reference count before invoking + corr_id->ref_count.fetch_sub(1); + + auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + + // record the end timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.end_timestamp = common::timestamp_ns(); + } + + if(!callback_contexts.empty()) + { + set_data_retval(tracer_data.retval, _ret); + + for(auto& itr : callback_contexts) + { + auto& ctx = itr.ctx; + auto& record = itr.record; + auto& user_data = itr.user_data; + + record.phase = ROCPROFILER_CALLBACK_PHASE_EXIT; + record.payload = static_cast(&tracer_data); + + auto& callback_info = + ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx); + callback_info.callback(record, &user_data, callback_info.data); + } + } + + if(!buffered_contexts.empty()) + { + for(auto& itr : buffered_contexts) + { + assert(itr.ctx->buffered_tracer); + auto buffer_id = + itr.ctx->buffered_tracer->buffer_data.at(info_type::buffered_domain_idx); + auto buffer_v = buffer::get_buffer(buffer_id); + if(buffer_v && buffer_v->context_id == itr.ctx->context_idx && + buffer_v->buffer_id == buffer_id.handle) + { + // make copy of record + auto record_v = buffer_record; + // update the record with the correlation + record_v.correlation_id.external = itr.external_correlation; + + buffer_v->emplace( + ROCPROFILER_BUFFER_CATEGORY_TRACING, info_type::buffered_domain_idx, record_v); + } + } + } + + // decrement the reference count after usage in the callback/buffers + corr_id->ref_count.fetch_sub(1); + + context::pop_latest_correlation_id(corr_id); + + if constexpr(!std::is_same::value) + return _ret; + else + return 0; +} +} // namespace hip +} // namespace rocprofiler + +#define ROCPROFILER_LIB_ROCPROFILER_HIP_HIP_CPP_IMPL 1 + +// template specializations +#include "hip.def.cpp" + +namespace rocprofiler +{ +namespace hip +{ +namespace +{ +template +struct api_id_bounds; + +template <> +struct api_id_bounds +{ + static constexpr auto none = ROCPROFILER_HIP_API_ID_NONE; + static constexpr auto last = ROCPROFILER_HIP_API_ID_LAST; +}; + +template <> +struct api_id_bounds +{ + static constexpr auto none = ROCPROFILER_HIP_COMPILER_API_ID_NONE; + static constexpr auto last = ROCPROFILER_HIP_COMPILER_API_ID_LAST; +}; + +template +const char* +name_by_id(const uint32_t id, std::index_sequence) +{ + if(OpIdx == id) return hip_api_info::name; + + if constexpr(sizeof...(OpIdxTail) > 0) + return name_by_id(id, std::index_sequence{}); + else + return nullptr; +} + +template +uint32_t +id_by_name(const char* name, std::index_sequence) +{ + if(std::string_view{hip_api_info::name} == std::string_view{name}) + return hip_api_info::operation_idx; + + if constexpr(sizeof...(OpIdxTail) > 0) + return id_by_name(name, std::index_sequence{}); + else + return api_id_bounds::none; +} + +template +void +get_ids(std::vector& _id_list, std::index_sequence) +{ + auto _idx = hip_api_info::operation_idx; + if(_idx < api_id_bounds::last) _id_list.emplace_back(_idx); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_ids(_id_list, std::index_sequence{}); +} + +template +void +get_names(std::vector& _name_list, std::index_sequence) +{ + auto&& _name = hip_api_info::name; + if(_name != nullptr && strnlen(_name, 1) > 0) _name_list.emplace_back(_name); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_names(_name_list, std::index_sequence{}); +} + +template +void +iterate_args(const uint32_t id, + const rocprofiler_callback_tracing_hip_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t func, + void* user_data, + std::index_sequence) +{ + if(OpIdx == id) + { + using info_type = hip_api_info; + auto&& arg_list = info_type::as_arg_list(data); + auto&& arg_addr = info_type::as_arg_addr(data); + for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i) + { + auto ret = func(info_type::callback_domain_idx, // kind + id, // operation + i, // arg_number + arg_list.at(i).first.c_str(), // arg_name + arg_list.at(i).second.c_str(), // arg_value_str + arg_addr.at(i), // arg_value_addr + user_data); + if(ret != 0) break; + } + } + if constexpr(sizeof...(OpIdxTail) > 0) + iterate_args(id, data, func, user_data, std::index_sequence{}); +} + +bool +should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, + rocprofiler_buffer_tracing_kind_t _buffered_domain, + int _operation) +{ + // we loop over all the *registered* contexts and see if any of them, at any point in time, + // might require callback or buffered API tracing + for(const auto& itr : context::get_registered_contexts()) + { + if(!itr) continue; + + // if there is a callback tracer enabled for the given domain and op, we need to wrap + if(itr->callback_tracer && itr->callback_tracer->domains(_callback_domain) && + itr->callback_tracer->domains(_callback_domain, _operation)) + return true; + + // if there is a buffered tracer enabled for the given domain and op, we need to wrap + if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) && + itr->buffered_tracer->domains(_buffered_domain, _operation)) + return true; + } + return false; +} + +template +void +copy_table(Tp* _orig, std::integral_constant) +{ + using table_type = typename hip_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = hip_api_info{}; + + LOG(INFO) << "copying table entry for " << _info.name; + + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + auto& _table = _info.get_table(_orig); + auto& _func = _info.get_table_func(_table); + // 3. get the sub-table containing the function pointer in saved table + // 4. get reference to function pointer in sub-table in saved table + // 5. save the original function in the saved table + auto& _saved = _info.get_table(get_table()); + auto& _ofunc = _info.get_table_func(_saved); + _ofunc = _func; + } + + (void) _orig; +} + +template +void +update_table(Tp* _orig, std::integral_constant) +{ + using table_type = typename hip_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = hip_api_info{}; + + LOG(INFO) << "updating table entry for " << _info.name; + + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // check to see if there are any contexts which enable this operation in the HIP API domain + if(!should_wrap_functor( + _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + return; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + // 3. update function pointer with wrapper + auto& _table = _info.get_table(_orig); + auto& _func = _info.get_table_func(_table); + _func = _info.get_functor(_func); + } + + (void) _orig; +} + +template +void +copy_table(Tp* _orig, std::index_sequence) +{ + copy_table(_orig, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + copy_table(_orig, std::index_sequence{}); +} + +template +void +update_table(Tp* _orig, std::index_sequence) +{ + update_table(_orig, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + update_table(_orig, std::index_sequence{}); +} +} // namespace + +// check out the assembly here... this compiles to a switch statement +template +const char* +name_by_id(uint32_t id) +{ + return name_by_id(id, std::make_index_sequence::last>{}); +} + +template +uint32_t +id_by_name(const char* name) +{ + return id_by_name(name, std::make_index_sequence::last>{}); +} + +template +std::vector +get_ids() +{ + constexpr auto last_api_id = api_id_bounds::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_ids(_data, std::make_index_sequence{}); + return _data; +} + +template +std::vector +get_names() +{ + constexpr auto last_api_id = api_id_bounds::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_names(_data, std::make_index_sequence{}); + return _data; +} + +void +iterate_args(uint32_t id, + const rocprofiler_callback_tracing_hip_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + void* user_data) +{ + if(callback) + iterate_args( + id, data, callback, user_data, std::make_index_sequence{}); +} + +// void +// iterate_args(uint32_t id, +// const rocprofiler_callback_tracing_hip_compiler_api_data_t& data, +// rocprofiler_callback_tracing_operation_args_cb_t callback, +// void* user_data) +// { +// if(callback) +// iterate_args( +// id, data, callback, user_data, +// std::make_index_sequence{}); +// } + +#define INSTANTIATE_HIP_TABLE_FUNC(TABLE) \ + template const char* name_by_id(uint32_t); \ + template uint32_t id_by_name
(const char*); \ + template std::vector get_ids
(); \ + template std::vector get_names
(); + +INSTANTIATE_HIP_TABLE_FUNC(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi) +INSTANTIATE_HIP_TABLE_FUNC(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi) + +void +copy_table(hip_compiler_api_table_t* _orig) +{ + if(_orig) + copy_table( + _orig, std::make_index_sequence{}); +} + +void +copy_table(hip_runtime_api_table_t* _orig) +{ + if(_orig) + copy_table( + _orig, std::make_index_sequence{}); +} + +void +update_table(hip_compiler_api_table_t* _orig) +{ + if(_orig) + update_table( + _orig, std::make_index_sequence{}); +} + +void +update_table(hip_runtime_api_table_t* _orig) +{ + if(_orig) + update_table( + _orig, std::make_index_sequence{}); +} +} // namespace hip +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.def.cpp new file mode 100644 index 0000000000..25e83d5031 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.def.cpp @@ -0,0 +1,503 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if defined(ROCPROFILER_LIB_ROCPROFILER_HIP_HIP_CPP_IMPL) && \ + ROCPROFILER_LIB_ROCPROFILER_HIP_HIP_CPP_IMPL == 1 + +# include + +# include "defines.hpp" +# include "hip.hpp" + +namespace rocprofiler +{ +namespace hip +{ +template <> +struct hip_domain_info +{ + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_API; + using args_type = rocprofiler_hip_api_args_t; + using retval_type = rocprofiler_hip_api_retval_t; + using callback_data_type = rocprofiler_callback_tracing_hip_api_data_t; +}; + +template <> +struct hip_domain_info +{ + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API; + using args_type = rocprofiler_hip_compiler_api_args_t; + using retval_type = rocprofiler_hip_compiler_api_retval_t; + using callback_data_type = rocprofiler_callback_tracing_hip_compiler_api_data_t; +}; +} // namespace hip +} // namespace rocprofiler + +// clang-format off +HIP_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, hip_compiler_api_table_t, compiler) +HIP_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, hip_runtime_api_table_t, runtime) + +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipPopCallConfiguration, __hipPopCallConfiguration, __hipPopCallConfiguration_fn, gridDim, blockDim, sharedMem, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipPushCallConfiguration, __hipPushCallConfiguration, __hipPushCallConfiguration_fn, gridDim, blockDim, sharedMem, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterFatBinary, __hipRegisterFatBinary, __hipRegisterFatBinary_fn, data) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterFunction, __hipRegisterFunction, __hipRegisterFunction_fn, modules, hostFunction, deviceFunction, deviceName, threadLimit, tid, bid, blockDim, gridDim, wSize) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterManagedVar, __hipRegisterManagedVar, __hipRegisterManagedVar_fn, hipModule, pointer, init_value, name, size, align) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterSurface, __hipRegisterSurface, __hipRegisterSurface_fn, modules, var, hostVar, deviceVar, type, ext) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterTexture, __hipRegisterTexture, __hipRegisterTexture_fn, modules, var, hostVar, deviceVar, type, norm, ext) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterVar, __hipRegisterVar, __hipRegisterVar_fn, modules, var, hostVar, deviceVar, ext, size, constant, global) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_CompilerApi, ROCPROFILER_HIP_COMPILER_API_ID___hipUnregisterFatBinary, __hipUnregisterFatBinary, __hipUnregisterFatBinary_fn, modules) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipApiName, hipApiName, hipApiName_fn, id) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipArray3DCreate, hipArray3DCreate, hipArray3DCreate_fn, array, pAllocateArray) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipArray3DGetDescriptor, hipArray3DGetDescriptor, hipArray3DGetDescriptor_fn, pArrayDescriptor, array) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipArrayCreate, hipArrayCreate, hipArrayCreate_fn, pHandle, pAllocateArray) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipArrayDestroy, hipArrayDestroy, hipArrayDestroy_fn, array) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipArrayGetDescriptor, hipArrayGetDescriptor, hipArrayGetDescriptor_fn, pArrayDescriptor, array) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipArrayGetInfo, hipArrayGetInfo, hipArrayGetInfo_fn, desc, extent, flags, array) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipBindTexture, hipBindTexture, hipBindTexture_fn, offset, tex, devPtr, desc, size) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipBindTexture2D, hipBindTexture2D, hipBindTexture2D_fn, offset, tex, devPtr, desc, width, height, pitch) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipBindTextureToArray, hipBindTextureToArray, hipBindTextureToArray_fn, tex, array, desc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipBindTextureToMipmappedArray, hipBindTextureToMipmappedArray, hipBindTextureToMipmappedArray_fn, tex, mipmappedArray, desc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipChooseDevice, hipChooseDevice, hipChooseDevice_fn, device, prop) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipChooseDeviceR0000, hipChooseDeviceR0000, hipChooseDeviceR0000_fn, device, prop) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipConfigureCall, hipConfigureCall, hipConfigureCall_fn, gridDim, blockDim, sharedMem, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCreateSurfaceObject, hipCreateSurfaceObject, hipCreateSurfaceObject_fn, pSurfObject, pResDesc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCreateTextureObject, hipCreateTextureObject, hipCreateTextureObject_fn, pTexObject, pResDesc, pTexDesc, pResViewDesc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxCreate, hipCtxCreate, hipCtxCreate_fn, ctx, flags, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxDestroy, hipCtxDestroy, hipCtxDestroy_fn, ctx) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxDisablePeerAccess, hipCtxDisablePeerAccess, hipCtxDisablePeerAccess_fn, peerCtx) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxEnablePeerAccess, hipCtxEnablePeerAccess, hipCtxEnablePeerAccess_fn, peerCtx, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxGetApiVersion, hipCtxGetApiVersion, hipCtxGetApiVersion_fn, ctx, apiVersion) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxGetCacheConfig, hipCtxGetCacheConfig, hipCtxGetCacheConfig_fn, cacheConfig) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxGetCurrent, hipCtxGetCurrent, hipCtxGetCurrent_fn, ctx) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxGetDevice, hipCtxGetDevice, hipCtxGetDevice_fn, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxGetFlags, hipCtxGetFlags, hipCtxGetFlags_fn, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxGetSharedMemConfig, hipCtxGetSharedMemConfig, hipCtxGetSharedMemConfig_fn, pConfig) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxPopCurrent, hipCtxPopCurrent, hipCtxPopCurrent_fn, ctx) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxPushCurrent, hipCtxPushCurrent, hipCtxPushCurrent_fn, ctx) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxSetCacheConfig, hipCtxSetCacheConfig, hipCtxSetCacheConfig_fn, cacheConfig) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxSetCurrent, hipCtxSetCurrent, hipCtxSetCurrent_fn, ctx) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxSetSharedMemConfig, hipCtxSetSharedMemConfig, hipCtxSetSharedMemConfig_fn, config) +HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCtxSynchronize, hipCtxSynchronize, hipCtxSynchronize_fn) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDestroyExternalMemory, hipDestroyExternalMemory, hipDestroyExternalMemory_fn, extMem) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDestroyExternalSemaphore, hipDestroyExternalSemaphore, hipDestroyExternalSemaphore_fn, extSem) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDestroySurfaceObject, hipDestroySurfaceObject, hipDestroySurfaceObject_fn, surfaceObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDestroyTextureObject, hipDestroyTextureObject, hipDestroyTextureObject_fn, textureObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceCanAccessPeer, hipDeviceCanAccessPeer, hipDeviceCanAccessPeer_fn, canAccessPeer, deviceId, peerDeviceId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceComputeCapability, hipDeviceComputeCapability, hipDeviceComputeCapability_fn, major, minor, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceDisablePeerAccess, hipDeviceDisablePeerAccess, hipDeviceDisablePeerAccess_fn, peerDeviceId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceEnablePeerAccess, hipDeviceEnablePeerAccess, hipDeviceEnablePeerAccess_fn, peerDeviceId, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGet, hipDeviceGet, hipDeviceGet_fn, device, ordinal) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetAttribute, hipDeviceGetAttribute, hipDeviceGetAttribute_fn, pi, attr, deviceId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetByPCIBusId, hipDeviceGetByPCIBusId, hipDeviceGetByPCIBusId_fn, device, pciBusId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetCacheConfig, hipDeviceGetCacheConfig, hipDeviceGetCacheConfig_fn, cacheConfig) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetDefaultMemPool, hipDeviceGetDefaultMemPool, hipDeviceGetDefaultMemPool_fn, mem_pool, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetGraphMemAttribute, hipDeviceGetGraphMemAttribute, hipDeviceGetGraphMemAttribute_fn, device, attr, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetLimit, hipDeviceGetLimit, hipDeviceGetLimit_fn, pValue, limit) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetMemPool, hipDeviceGetMemPool, hipDeviceGetMemPool_fn, mem_pool, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetName, hipDeviceGetName, hipDeviceGetName_fn, name, len, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetP2PAttribute, hipDeviceGetP2PAttribute, hipDeviceGetP2PAttribute_fn, value, attr, srcDevice, dstDevice) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetPCIBusId, hipDeviceGetPCIBusId, hipDeviceGetPCIBusId_fn, pciBusId, len, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetSharedMemConfig, hipDeviceGetSharedMemConfig, hipDeviceGetSharedMemConfig_fn, pConfig) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetStreamPriorityRange, hipDeviceGetStreamPriorityRange, hipDeviceGetStreamPriorityRange_fn, leastPriority, greatestPriority) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGetUuid, hipDeviceGetUuid, hipDeviceGetUuid_fn, uuid, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceGraphMemTrim, hipDeviceGraphMemTrim, hipDeviceGraphMemTrim_fn, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxGetState, hipDevicePrimaryCtxGetState, hipDevicePrimaryCtxGetState_fn, dev, flags, active) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxRelease, hipDevicePrimaryCtxRelease, hipDevicePrimaryCtxRelease_fn, dev) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxReset, hipDevicePrimaryCtxReset, hipDevicePrimaryCtxReset_fn, dev) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxRetain, hipDevicePrimaryCtxRetain, hipDevicePrimaryCtxRetain_fn, pctx, dev) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDevicePrimaryCtxSetFlags, hipDevicePrimaryCtxSetFlags, hipDevicePrimaryCtxSetFlags_fn, dev, flags) +HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceReset, hipDeviceReset, hipDeviceReset_fn) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceSetCacheConfig, hipDeviceSetCacheConfig, hipDeviceSetCacheConfig_fn, cacheConfig) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceSetGraphMemAttribute, hipDeviceSetGraphMemAttribute, hipDeviceSetGraphMemAttribute_fn, device, attr, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceSetLimit, hipDeviceSetLimit, hipDeviceSetLimit_fn, limit, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceSetMemPool, hipDeviceSetMemPool, hipDeviceSetMemPool_fn, device, mem_pool) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceSetSharedMemConfig, hipDeviceSetSharedMemConfig, hipDeviceSetSharedMemConfig_fn, config) +HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceSynchronize, hipDeviceSynchronize, hipDeviceSynchronize_fn) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDeviceTotalMem, hipDeviceTotalMem, hipDeviceTotalMem_fn, bytes, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDriverGetVersion, hipDriverGetVersion, hipDriverGetVersion_fn, driverVersion) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvGetErrorName, hipDrvGetErrorName, hipDrvGetErrorName_fn, hipError, errorString) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvGetErrorString, hipDrvGetErrorString, hipDrvGetErrorString_fn, hipError, errorString) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvGraphAddMemcpyNode, hipDrvGraphAddMemcpyNode, hipDrvGraphAddMemcpyNode_fn, phGraphNode, hGraph, dependencies, numDependencies, copyParams, ctx) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvMemcpy2DUnaligned, hipDrvMemcpy2DUnaligned, hipDrvMemcpy2DUnaligned_fn, pCopy) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvMemcpy3D, hipDrvMemcpy3D, hipDrvMemcpy3D_fn, pCopy) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvMemcpy3DAsync, hipDrvMemcpy3DAsync, hipDrvMemcpy3DAsync_fn, pCopy, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvPointerGetAttributes, hipDrvPointerGetAttributes, hipDrvPointerGetAttributes_fn, numAttributes, attributes, data, ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventCreate, hipEventCreate, hipEventCreate_fn, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventCreateWithFlags, hipEventCreateWithFlags, hipEventCreateWithFlags_fn, event, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventDestroy, hipEventDestroy, hipEventDestroy_fn, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventElapsedTime, hipEventElapsedTime, hipEventElapsedTime_fn, ms, start, stop) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventQuery, hipEventQuery, hipEventQuery_fn, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventRecord, hipEventRecord, hipEventRecord_fn, event, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventSynchronize, hipEventSynchronize, hipEventSynchronize_fn, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExtGetLinkTypeAndHopCount, hipExtGetLinkTypeAndHopCount, hipExtGetLinkTypeAndHopCount_fn, device1, device2, linktype, hopcount) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExtLaunchKernel, hipExtLaunchKernel, hipExtLaunchKernel_fn, function_address, numBlocks, dimBlocks, args, sharedMemBytes, stream, startEvent, stopEvent, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExtLaunchMultiKernelMultiDevice, hipExtLaunchMultiKernelMultiDevice, hipExtLaunchMultiKernelMultiDevice_fn, launchParamsList, numDevices, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExtMallocWithFlags, hipExtMallocWithFlags, hipExtMallocWithFlags_fn, ptr, sizeBytes, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExtStreamCreateWithCUMask, hipExtStreamCreateWithCUMask, hipExtStreamCreateWithCUMask_fn, stream, cuMaskSize, cuMask) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExtStreamGetCUMask, hipExtStreamGetCUMask, hipExtStreamGetCUMask_fn, stream, cuMaskSize, cuMask) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExternalMemoryGetMappedBuffer, hipExternalMemoryGetMappedBuffer, hipExternalMemoryGetMappedBuffer_fn, devPtr, extMem, bufferDesc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFree, hipFree, hipFree_fn, ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFreeArray, hipFreeArray, hipFreeArray_fn, array) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFreeAsync, hipFreeAsync, hipFreeAsync_fn, dev_ptr, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFreeHost, hipFreeHost, hipFreeHost_fn, ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFreeMipmappedArray, hipFreeMipmappedArray, hipFreeMipmappedArray_fn, mipmappedArray) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFuncGetAttribute, hipFuncGetAttribute, hipFuncGetAttribute_fn, value, attrib, hfunc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFuncGetAttributes, hipFuncGetAttributes, hipFuncGetAttributes_fn, attr, func) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFuncSetAttribute, hipFuncSetAttribute, hipFuncSetAttribute_fn, func, attr, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFuncSetCacheConfig, hipFuncSetCacheConfig, hipFuncSetCacheConfig_fn, func, config) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig_fn, func, config) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGLGetDevices, hipGLGetDevices, hipGLGetDevices_fn, pHipDeviceCount, pHipDevices, hipDeviceCount, deviceList) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetChannelDesc, hipGetChannelDesc, hipGetChannelDesc_fn, desc, array) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetDevice, hipGetDevice, hipGetDevice_fn, deviceId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetDeviceCount, hipGetDeviceCount, hipGetDeviceCount_fn, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetDeviceFlags, hipGetDeviceFlags, hipGetDeviceFlags_fn, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetDevicePropertiesR0600, hipGetDevicePropertiesR0600, hipGetDevicePropertiesR0600_fn, prop, deviceId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetDevicePropertiesR0000, hipGetDevicePropertiesR0000, hipGetDevicePropertiesR0000_fn, prop, deviceId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetErrorName, hipGetErrorName, hipGetErrorName_fn, hip_error) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetErrorString, hipGetErrorString, hipGetErrorString_fn, hipError) +HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetLastError, hipGetLastError, hipGetLastError_fn) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetMipmappedArrayLevel, hipGetMipmappedArrayLevel, hipGetMipmappedArrayLevel_fn, levelArray, mipmappedArray, level) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetSymbolAddress, hipGetSymbolAddress, hipGetSymbolAddress_fn, devPtr, symbol) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetSymbolSize, hipGetSymbolSize, hipGetSymbolSize_fn, size, symbol) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetTextureAlignmentOffset, hipGetTextureAlignmentOffset, hipGetTextureAlignmentOffset_fn, offset, texref) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceDesc, hipGetTextureObjectResourceDesc, hipGetTextureObjectResourceDesc_fn, pResDesc, textureObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetTextureObjectResourceViewDesc, hipGetTextureObjectResourceViewDesc, hipGetTextureObjectResourceViewDesc_fn, pResViewDesc, textureObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetTextureObjectTextureDesc, hipGetTextureObjectTextureDesc, hipGetTextureObjectTextureDesc_fn, pTexDesc, textureObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetTextureReference, hipGetTextureReference, hipGetTextureReference_fn, texref, symbol) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddChildGraphNode, hipGraphAddChildGraphNode, hipGraphAddChildGraphNode_fn, pGraphNode, graph, pDependencies, numDependencies, childGraph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddDependencies, hipGraphAddDependencies, hipGraphAddDependencies_fn, graph, from, to, numDependencies) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddEmptyNode, hipGraphAddEmptyNode, hipGraphAddEmptyNode_fn, pGraphNode, graph, pDependencies, numDependencies) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddEventRecordNode, hipGraphAddEventRecordNode, hipGraphAddEventRecordNode_fn, pGraphNode, graph, pDependencies, numDependencies, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddEventWaitNode, hipGraphAddEventWaitNode, hipGraphAddEventWaitNode_fn, pGraphNode, graph, pDependencies, numDependencies, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddHostNode, hipGraphAddHostNode, hipGraphAddHostNode_fn, pGraphNode, graph, pDependencies, numDependencies, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddKernelNode, hipGraphAddKernelNode, hipGraphAddKernelNode_fn, pGraphNode, graph, pDependencies, numDependencies, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddMemAllocNode, hipGraphAddMemAllocNode, hipGraphAddMemAllocNode_fn, pGraphNode, graph, pDependencies, numDependencies, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddMemFreeNode, hipGraphAddMemFreeNode, hipGraphAddMemFreeNode_fn, pGraphNode, graph, pDependencies, numDependencies, dev_ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode, hipGraphAddMemcpyNode, hipGraphAddMemcpyNode_fn, pGraphNode, graph, pDependencies, numDependencies, pCopyParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNode1D, hipGraphAddMemcpyNode1D, hipGraphAddMemcpyNode1D_fn, pGraphNode, graph, pDependencies, numDependencies, dst, src, count, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol, hipGraphAddMemcpyNodeFromSymbol, hipGraphAddMemcpyNodeFromSymbol_fn, pGraphNode, graph, pDependencies, numDependencies, dst, symbol, count, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddMemcpyNodeToSymbol, hipGraphAddMemcpyNodeToSymbol, hipGraphAddMemcpyNodeToSymbol_fn, pGraphNode, graph, pDependencies, numDependencies, symbol, src, count, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphAddMemsetNode, hipGraphAddMemsetNode, hipGraphAddMemsetNode_fn, pGraphNode, graph, pDependencies, numDependencies, pMemsetParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphChildGraphNodeGetGraph, hipGraphChildGraphNodeGetGraph, hipGraphChildGraphNodeGetGraph_fn, node, pGraph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphClone, hipGraphClone, hipGraphClone_fn, pGraphClone, originalGraph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphCreate, hipGraphCreate, hipGraphCreate_fn, pGraph, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphDebugDotPrint, hipGraphDebugDotPrint, hipGraphDebugDotPrint_fn, graph, path, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphDestroy, hipGraphDestroy, hipGraphDestroy_fn, graph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphDestroyNode, hipGraphDestroyNode, hipGraphDestroyNode_fn, node) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeGetEvent, hipGraphEventRecordNodeGetEvent, hipGraphEventRecordNodeGetEvent_fn, node, event_out) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphEventRecordNodeSetEvent, hipGraphEventRecordNodeSetEvent, hipGraphEventRecordNodeSetEvent_fn, node, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeGetEvent, hipGraphEventWaitNodeGetEvent, hipGraphEventWaitNodeGetEvent_fn, node, event_out) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphEventWaitNodeSetEvent, hipGraphEventWaitNodeSetEvent, hipGraphEventWaitNodeSetEvent_fn, node, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecChildGraphNodeSetParams, hipGraphExecChildGraphNodeSetParams, hipGraphExecChildGraphNodeSetParams_fn, hGraphExec, node, childGraph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecDestroy, hipGraphExecDestroy, hipGraphExecDestroy_fn, graphExec) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecEventRecordNodeSetEvent, hipGraphExecEventRecordNodeSetEvent, hipGraphExecEventRecordNodeSetEvent_fn, hGraphExec, hNode, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecEventWaitNodeSetEvent, hipGraphExecEventWaitNodeSetEvent, hipGraphExecEventWaitNodeSetEvent_fn, hGraphExec, hNode, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecHostNodeSetParams, hipGraphExecHostNodeSetParams, hipGraphExecHostNodeSetParams_fn, hGraphExec, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecKernelNodeSetParams, hipGraphExecKernelNodeSetParams, hipGraphExecKernelNodeSetParams_fn, hGraphExec, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams, hipGraphExecMemcpyNodeSetParams, hipGraphExecMemcpyNodeSetParams_fn, hGraphExec, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D, hipGraphExecMemcpyNodeSetParams1D, hipGraphExecMemcpyNodeSetParams1D_fn, hGraphExec, node, dst, src, count, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol, hipGraphExecMemcpyNodeSetParamsFromSymbol, hipGraphExecMemcpyNodeSetParamsFromSymbol_fn, hGraphExec, node, dst, symbol, count, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol, hipGraphExecMemcpyNodeSetParamsToSymbol, hipGraphExecMemcpyNodeSetParamsToSymbol_fn, hGraphExec, node, symbol, src, count, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecMemsetNodeSetParams, hipGraphExecMemsetNodeSetParams, hipGraphExecMemsetNodeSetParams_fn, hGraphExec, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphExecUpdate, hipGraphExecUpdate, hipGraphExecUpdate_fn, hGraphExec, hGraph, hErrorNode_out, updateResult_out) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphGetEdges, hipGraphGetEdges, hipGraphGetEdges_fn, graph, from, to, numEdges) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphGetNodes, hipGraphGetNodes, hipGraphGetNodes_fn, graph, nodes, numNodes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphGetRootNodes, hipGraphGetRootNodes, hipGraphGetRootNodes_fn, graph, pRootNodes, pNumRootNodes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphHostNodeGetParams, hipGraphHostNodeGetParams, hipGraphHostNodeGetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphHostNodeSetParams, hipGraphHostNodeSetParams, hipGraphHostNodeSetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphInstantiate, hipGraphInstantiate, hipGraphInstantiate_fn, pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphInstantiateWithFlags, hipGraphInstantiateWithFlags, hipGraphInstantiateWithFlags_fn, pGraphExec, graph, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphKernelNodeCopyAttributes, hipGraphKernelNodeCopyAttributes, hipGraphKernelNodeCopyAttributes_fn, hSrc, hDst) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetAttribute, hipGraphKernelNodeGetAttribute, hipGraphKernelNodeGetAttribute_fn, hNode, attr, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphKernelNodeGetParams, hipGraphKernelNodeGetParams, hipGraphKernelNodeGetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetAttribute, hipGraphKernelNodeSetAttribute, hipGraphKernelNodeSetAttribute_fn, hNode, attr, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphKernelNodeSetParams, hipGraphKernelNodeSetParams, hipGraphKernelNodeSetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphLaunch, hipGraphLaunch, hipGraphLaunch_fn, graphExec, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemAllocNodeGetParams, hipGraphMemAllocNodeGetParams, hipGraphMemAllocNodeGetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemFreeNodeGetParams, hipGraphMemFreeNodeGetParams, hipGraphMemFreeNodeGetParams_fn, node, dev_ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeGetParams, hipGraphMemcpyNodeGetParams, hipGraphMemcpyNodeGetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams, hipGraphMemcpyNodeSetParams, hipGraphMemcpyNodeSetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParams1D, hipGraphMemcpyNodeSetParams1D, hipGraphMemcpyNodeSetParams1D_fn, node, dst, src, count, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol, hipGraphMemcpyNodeSetParamsFromSymbol, hipGraphMemcpyNodeSetParamsFromSymbol_fn, node, dst, symbol, count, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol, hipGraphMemcpyNodeSetParamsToSymbol, hipGraphMemcpyNodeSetParamsToSymbol_fn, node, symbol, src, count, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeGetParams, hipGraphMemsetNodeGetParams, hipGraphMemsetNodeGetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphMemsetNodeSetParams, hipGraphMemsetNodeSetParams, hipGraphMemsetNodeSetParams_fn, node, pNodeParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphNodeFindInClone, hipGraphNodeFindInClone, hipGraphNodeFindInClone_fn, pNode, originalNode, clonedGraph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependencies, hipGraphNodeGetDependencies, hipGraphNodeGetDependencies_fn, node, pDependencies, pNumDependencies) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphNodeGetDependentNodes, hipGraphNodeGetDependentNodes, hipGraphNodeGetDependentNodes_fn, node, pDependentNodes, pNumDependentNodes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphNodeGetEnabled, hipGraphNodeGetEnabled, hipGraphNodeGetEnabled_fn, hGraphExec, hNode, isEnabled) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphNodeGetType, hipGraphNodeGetType, hipGraphNodeGetType_fn, node, pType) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphNodeSetEnabled, hipGraphNodeSetEnabled, hipGraphNodeSetEnabled_fn, hGraphExec, hNode, isEnabled) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphReleaseUserObject, hipGraphReleaseUserObject, hipGraphReleaseUserObject_fn, graph, object, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphRemoveDependencies, hipGraphRemoveDependencies, hipGraphRemoveDependencies_fn, graph, from, to, numDependencies) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphRetainUserObject, hipGraphRetainUserObject, hipGraphRetainUserObject_fn, graph, object, count, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphUpload, hipGraphUpload, hipGraphUpload_fn, graphExec, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterBuffer, hipGraphicsGLRegisterBuffer, hipGraphicsGLRegisterBuffer_fn, resource, buffer, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphicsGLRegisterImage, hipGraphicsGLRegisterImage, hipGraphicsGLRegisterImage_fn, resource, image, target, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphicsMapResources, hipGraphicsMapResources, hipGraphicsMapResources_fn, count, resources, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphicsResourceGetMappedPointer, hipGraphicsResourceGetMappedPointer, hipGraphicsResourceGetMappedPointer_fn, devPtr, size, resource) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphicsSubResourceGetMappedArray, hipGraphicsSubResourceGetMappedArray, hipGraphicsSubResourceGetMappedArray_fn, array, resource, arrayIndex, mipLevel) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphicsUnmapResources, hipGraphicsUnmapResources, hipGraphicsUnmapResources_fn, count, resources, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphicsUnregisterResource, hipGraphicsUnregisterResource, hipGraphicsUnregisterResource_fn, resource) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHostAlloc, hipHostAlloc, hipHostAlloc_fn, ptr, size, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHostFree, hipHostFree, hipHostFree_fn, ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHostGetDevicePointer, hipHostGetDevicePointer, hipHostGetDevicePointer_fn, devPtr, hstPtr, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHostGetFlags, hipHostGetFlags, hipHostGetFlags_fn, flagsPtr, hostPtr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHostMalloc, hipHostMalloc, hipHostMalloc_fn, ptr, size, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHostRegister, hipHostRegister, hipHostRegister_fn, hostPtr, sizeBytes, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHostUnregister, hipHostUnregister, hipHostUnregister_fn, hostPtr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipImportExternalMemory, hipImportExternalMemory, hipImportExternalMemory_fn, extMem_out, memHandleDesc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipImportExternalSemaphore, hipImportExternalSemaphore, hipImportExternalSemaphore_fn, extSem_out, semHandleDesc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipInit, hipInit, hipInit_fn, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipIpcCloseMemHandle, hipIpcCloseMemHandle, hipIpcCloseMemHandle_fn, devPtr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipIpcGetEventHandle, hipIpcGetEventHandle, hipIpcGetEventHandle_fn, handle, event) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipIpcGetMemHandle, hipIpcGetMemHandle, hipIpcGetMemHandle_fn, handle, devPtr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipIpcOpenEventHandle, hipIpcOpenEventHandle, hipIpcOpenEventHandle_fn, event, handle) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipIpcOpenMemHandle, hipIpcOpenMemHandle, hipIpcOpenMemHandle_fn, devPtr, handle, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipKernelNameRef, hipKernelNameRef, hipKernelNameRef_fn, f) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipKernelNameRefByPtr, hipKernelNameRefByPtr, hipKernelNameRefByPtr_fn, hostFunction, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchByPtr, hipLaunchByPtr, hipLaunchByPtr_fn, func) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernel, hipLaunchCooperativeKernel, hipLaunchCooperativeKernel_fn, f, gridDim, blockDimX, kernelParams, sharedMemBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernelMultiDevice, hipLaunchCooperativeKernelMultiDevice, hipLaunchCooperativeKernelMultiDevice_fn, launchParamsList, numDevices, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchHostFunc, hipLaunchHostFunc, hipLaunchHostFunc_fn, stream, fn, userData) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchKernel, hipLaunchKernel, hipLaunchKernel_fn, function_address, numBlocks, dimBlocks, args, sharedMemBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMalloc, hipMalloc, hipMalloc_fn, ptr, size) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMalloc3D, hipMalloc3D, hipMalloc3D_fn, pitchedDevPtr, extent) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMalloc3DArray, hipMalloc3DArray, hipMalloc3DArray_fn, array, desc, extent, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMallocArray, hipMallocArray, hipMallocArray_fn, array, desc, width, height, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMallocAsync, hipMallocAsync, hipMallocAsync_fn, dev_ptr, size, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMallocFromPoolAsync, hipMallocFromPoolAsync, hipMallocFromPoolAsync_fn, dev_ptr, size, mem_pool, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMallocHost, hipMallocHost, hipMallocHost_fn, ptr, size) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMallocManaged, hipMallocManaged, hipMallocManaged_fn, dev_ptr, size, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMallocMipmappedArray, hipMallocMipmappedArray, hipMallocMipmappedArray_fn, mipmappedArray, desc, extent, numLevels, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMallocPitch, hipMallocPitch, hipMallocPitch_fn, ptr, pitch, width, height) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemAddressFree, hipMemAddressFree, hipMemAddressFree_fn, devPtr, size) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemAddressReserve, hipMemAddressReserve, hipMemAddressReserve_fn, ptr, size, alignment, addr, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemAdvise, hipMemAdvise, hipMemAdvise_fn, dev_ptr, count, advice, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemAllocHost, hipMemAllocHost, hipMemAllocHost_fn, ptr, size) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemAllocPitch, hipMemAllocPitch, hipMemAllocPitch_fn, dptr, pitch, widthInBytes, height, elementSizeBytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemCreate, hipMemCreate, hipMemCreate_fn, handle, size, prop, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemExportToShareableHandle, hipMemExportToShareableHandle, hipMemExportToShareableHandle_fn, shareableHandle, handle, handleType, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemGetAccess, hipMemGetAccess, hipMemGetAccess_fn, flags, location, ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemGetAddressRange, hipMemGetAddressRange, hipMemGetAddressRange_fn, pbase, psize, dptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemGetAllocationGranularity, hipMemGetAllocationGranularity, hipMemGetAllocationGranularity_fn, granularity, prop, option) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemGetAllocationPropertiesFromHandle, hipMemGetAllocationPropertiesFromHandle, hipMemGetAllocationPropertiesFromHandle_fn, prop, handle) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemGetInfo, hipMemGetInfo, hipMemGetInfo_fn, free, total) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemImportFromShareableHandle, hipMemImportFromShareableHandle, hipMemImportFromShareableHandle_fn, handle, osHandle, shHandleType) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemMap, hipMemMap, hipMemMap_fn, ptr, size, offset, handle, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemMapArrayAsync, hipMemMapArrayAsync, hipMemMapArrayAsync_fn, mapInfoList, count, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolCreate, hipMemPoolCreate, hipMemPoolCreate_fn, mem_pool, pool_props) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolDestroy, hipMemPoolDestroy, hipMemPoolDestroy_fn, mem_pool) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolExportPointer, hipMemPoolExportPointer, hipMemPoolExportPointer_fn, export_data, dev_ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolExportToShareableHandle, hipMemPoolExportToShareableHandle, hipMemPoolExportToShareableHandle_fn, shared_handle, mem_pool, handle_type, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolGetAccess, hipMemPoolGetAccess, hipMemPoolGetAccess_fn, flags, mem_pool, location) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolGetAttribute, hipMemPoolGetAttribute, hipMemPoolGetAttribute_fn, mem_pool, attr, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolImportFromShareableHandle, hipMemPoolImportFromShareableHandle, hipMemPoolImportFromShareableHandle_fn, mem_pool, shared_handle, handle_type, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolImportPointer, hipMemPoolImportPointer, hipMemPoolImportPointer_fn, dev_ptr, mem_pool, export_data) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolSetAccess, hipMemPoolSetAccess, hipMemPoolSetAccess_fn, mem_pool, desc_list, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolSetAttribute, hipMemPoolSetAttribute, hipMemPoolSetAttribute_fn, mem_pool, attr, value) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPoolTrimTo, hipMemPoolTrimTo, hipMemPoolTrimTo_fn, mem_pool, min_bytes_to_hold) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPrefetchAsync, hipMemPrefetchAsync, hipMemPrefetchAsync_fn, dev_ptr, count, device, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemPtrGetInfo, hipMemPtrGetInfo, hipMemPtrGetInfo_fn, ptr, size) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemRangeGetAttribute, hipMemRangeGetAttribute, hipMemRangeGetAttribute_fn, data, data_size, attribute, dev_ptr, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemRangeGetAttributes, hipMemRangeGetAttributes, hipMemRangeGetAttributes_fn, data, data_sizes, attributes, num_attributes, dev_ptr, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemRelease, hipMemRelease, hipMemRelease_fn, handle) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemRetainAllocationHandle, hipMemRetainAllocationHandle, hipMemRetainAllocationHandle_fn, handle, addr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemSetAccess, hipMemSetAccess, hipMemSetAccess_fn, ptr, size, desc, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemUnmap, hipMemUnmap, hipMemUnmap_fn, ptr, size) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy, hipMemcpy, hipMemcpy_fn, dst, src, sizeBytes, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2D, hipMemcpy2D, hipMemcpy2D_fn, dst, dpitch, src, spitch, width, height, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DAsync, hipMemcpy2DAsync, hipMemcpy2DAsync_fn, dst, dpitch, src, spitch, width, height, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArray, hipMemcpy2DFromArray, hipMemcpy2DFromArray_fn, dst, dpitch, src, wOffset, hOffset, width, height, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpy2DFromArrayAsync_fn, dst, dpitch, src, wOffset, hOffset, width, height, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DToArray, hipMemcpy2DToArray, hipMemcpy2DToArray_fn, dst, wOffset, hOffset, src, spitch, width, height, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DToArrayAsync, hipMemcpy2DToArrayAsync, hipMemcpy2DToArrayAsync_fn, dst, wOffset, hOffset, src, spitch, width, height, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy3D, hipMemcpy3D, hipMemcpy3D_fn, p) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy3DAsync, hipMemcpy3DAsync, hipMemcpy3DAsync_fn, p, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyAsync, hipMemcpyAsync, hipMemcpyAsync_fn, dst, src, sizeBytes, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyAtoH, hipMemcpyAtoH, hipMemcpyAtoH_fn, dst, srcArray, srcOffset, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyDtoD, hipMemcpyDtoD, hipMemcpyDtoD_fn, dst, src, sizeBytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyDtoDAsync, hipMemcpyDtoDAsync, hipMemcpyDtoDAsync_fn, dst, src, sizeBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyDtoH, hipMemcpyDtoH, hipMemcpyDtoH_fn, dst, src, sizeBytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyDtoHAsync, hipMemcpyDtoHAsync, hipMemcpyDtoHAsync_fn, dst, src, sizeBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyFromArray, hipMemcpyFromArray, hipMemcpyFromArray_fn, dst, srcArray, wOffset, hOffset, count, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbol, hipMemcpyFromSymbol, hipMemcpyFromSymbol_fn, dst, symbol, sizeBytes, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbolAsync, hipMemcpyFromSymbolAsync, hipMemcpyFromSymbolAsync_fn, dst, symbol, sizeBytes, offset, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyHtoA, hipMemcpyHtoA, hipMemcpyHtoA_fn, dstArray, dstOffset, srcHost, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyHtoD, hipMemcpyHtoD, hipMemcpyHtoD_fn, dst, src, sizeBytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyHtoDAsync, hipMemcpyHtoDAsync, hipMemcpyHtoDAsync_fn, dst, src, sizeBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyParam2D, hipMemcpyParam2D, hipMemcpyParam2D_fn, pCopy) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyParam2DAsync, hipMemcpyParam2DAsync, hipMemcpyParam2DAsync_fn, pCopy, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyPeer, hipMemcpyPeer, hipMemcpyPeer_fn, dst, dstDeviceId, src, srcDeviceId, sizeBytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyPeerAsync, hipMemcpyPeerAsync, hipMemcpyPeerAsync_fn, dst, dstDeviceId, src, srcDevice, sizeBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyToArray, hipMemcpyToArray, hipMemcpyToArray_fn, dst, wOffset, hOffset, src, count, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyToSymbol, hipMemcpyToSymbol, hipMemcpyToSymbol_fn, symbol, src, sizeBytes, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyToSymbolAsync, hipMemcpyToSymbolAsync, hipMemcpyToSymbolAsync_fn, symbol, src, sizeBytes, offset, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyWithStream, hipMemcpyWithStream, hipMemcpyWithStream_fn, dst, src, sizeBytes, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset, hipMemset, hipMemset_fn, dst, value, sizeBytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset2D, hipMemset2D, hipMemset2D_fn, dst, pitch, value, width, height) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset2DAsync, hipMemset2DAsync, hipMemset2DAsync_fn, dst, pitch, value, width, height, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset3D, hipMemset3D, hipMemset3D_fn, pitchedDevPtr, value, extent) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset3DAsync, hipMemset3DAsync, hipMemset3DAsync_fn, pitchedDevPtr, value, extent, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetAsync, hipMemsetAsync, hipMemsetAsync_fn, dst, value, sizeBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetD16, hipMemsetD16, hipMemsetD16_fn, dest, value, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetD16Async, hipMemsetD16Async, hipMemsetD16Async_fn, dest, value, count, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetD32, hipMemsetD32, hipMemsetD32_fn, dest, value, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetD32Async, hipMemsetD32Async, hipMemsetD32Async_fn, dst, value, count, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetD8, hipMemsetD8, hipMemsetD8_fn, dest, value, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetD8Async, hipMemsetD8Async, hipMemsetD8Async_fn, dest, value, count, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMipmappedArrayCreate, hipMipmappedArrayCreate, hipMipmappedArrayCreate_fn, pHandle, pMipmappedArrayDesc, numMipmapLevels) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMipmappedArrayDestroy, hipMipmappedArrayDestroy, hipMipmappedArrayDestroy_fn, hMipmappedArray) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMipmappedArrayGetLevel, hipMipmappedArrayGetLevel, hipMipmappedArrayGetLevel_fn, pLevelArray, hMipMappedArray, level) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleGetFunction, hipModuleGetFunction, hipModuleGetFunction_fn, function, module, kname) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleGetGlobal, hipModuleGetGlobal, hipModuleGetGlobal_fn, dptr, bytes, hmod, name) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleGetTexRef, hipModuleGetTexRef, hipModuleGetTexRef_fn, texRef, hmod, name) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernel, hipModuleLaunchCooperativeKernel, hipModuleLaunchCooperativeKernel_fn, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream, kernelParams) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice, hipModuleLaunchCooperativeKernelMultiDevice, hipModuleLaunchCooperativeKernelMultiDevice_fn, launchParamsList, numDevices, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleLaunchKernel, hipModuleLaunchKernel, hipModuleLaunchKernel_fn, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, stream, kernelParams, extra) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleLoad, hipModuleLoad, hipModuleLoad_fn, module, fname) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleLoadData, hipModuleLoadData, hipModuleLoadData_fn, module, image) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleLoadDataEx, hipModuleLoadDataEx, hipModuleLoadDataEx_fn, module, image, numOptions, options, optionValues) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_fn, numBlocks, f, blockSize, dynSharedMemPerBlk) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_fn, numBlocks, f, blockSize, dynSharedMemPerBlk, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize, hipModuleOccupancyMaxPotentialBlockSize, hipModuleOccupancyMaxPotentialBlockSize_fn, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags, hipModuleOccupancyMaxPotentialBlockSizeWithFlags, hipModuleOccupancyMaxPotentialBlockSizeWithFlags_fn, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipModuleUnload, hipModuleUnload, hipModuleUnload_fn, module) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor, hipOccupancyMaxActiveBlocksPerMultiprocessor, hipOccupancyMaxActiveBlocksPerMultiprocessor_fn, numBlocks, f, blockSize, dynSharedMemPerBlk) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_fn, numBlocks, f, blockSize, dynSharedMemPerBlk, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipOccupancyMaxPotentialBlockSize, hipOccupancyMaxPotentialBlockSize, hipOccupancyMaxPotentialBlockSize_fn, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit) +HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipPeekAtLastError, hipPeekAtLastError, hipPeekAtLastError_fn) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipPointerGetAttribute, hipPointerGetAttribute, hipPointerGetAttribute_fn, data, attribute, ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipPointerGetAttributes, hipPointerGetAttributes, hipPointerGetAttributes_fn, attributes, ptr) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipPointerSetAttribute, hipPointerSetAttribute, hipPointerSetAttribute_fn, value, attribute, ptr) +HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipProfilerStart, hipProfilerStart, hipProfilerStart_fn) +HIP_API_INFO_DEFINITION_0(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipProfilerStop, hipProfilerStop, hipProfilerStop_fn) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipRuntimeGetVersion, hipRuntimeGetVersion, hipRuntimeGetVersion_fn, runtimeVersion) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipSetDevice, hipSetDevice, hipSetDevice_fn, deviceId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipSetDeviceFlags, hipSetDeviceFlags, hipSetDeviceFlags_fn, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipSetupArgument, hipSetupArgument, hipSetupArgument_fn, arg, size, offset) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipSignalExternalSemaphoresAsync, hipSignalExternalSemaphoresAsync, hipSignalExternalSemaphoresAsync_fn, extSemArray, paramsArray, numExtSems, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamAddCallback, hipStreamAddCallback, hipStreamAddCallback_fn, stream, callback, userData, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamAttachMemAsync, hipStreamAttachMemAsync, hipStreamAttachMemAsync_fn, stream, dev_ptr, length, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamBeginCapture, hipStreamBeginCapture, hipStreamBeginCapture_fn, stream, mode) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamCreate, hipStreamCreate, hipStreamCreate_fn, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamCreateWithFlags, hipStreamCreateWithFlags, hipStreamCreateWithFlags_fn, stream, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamCreateWithPriority, hipStreamCreateWithPriority, hipStreamCreateWithPriority_fn, stream, flags, priority) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamDestroy, hipStreamDestroy, hipStreamDestroy_fn, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamEndCapture, hipStreamEndCapture, hipStreamEndCapture_fn, stream, pGraph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo, hipStreamGetCaptureInfo, hipStreamGetCaptureInfo_fn, stream, pCaptureStatus, pId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_v2, hipStreamGetCaptureInfo_v2, hipStreamGetCaptureInfo_v2_fn, stream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetDevice, hipStreamGetDevice, hipStreamGetDevice_fn, stream, device) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetFlags, hipStreamGetFlags, hipStreamGetFlags_fn, stream, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetPriority, hipStreamGetPriority, hipStreamGetPriority_fn, stream, priority) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamIsCapturing, hipStreamIsCapturing, hipStreamIsCapturing_fn, stream, pCaptureStatus) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamQuery, hipStreamQuery, hipStreamQuery_fn, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamSynchronize, hipStreamSynchronize, hipStreamSynchronize_fn, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamUpdateCaptureDependencies, hipStreamUpdateCaptureDependencies, hipStreamUpdateCaptureDependencies_fn, stream, dependencies, numDependencies, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamWaitEvent, hipStreamWaitEvent, hipStreamWaitEvent_fn, stream, event, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamWaitValue32, hipStreamWaitValue32, hipStreamWaitValue32_fn, stream, ptr, value, flags, mask) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamWaitValue64, hipStreamWaitValue64, hipStreamWaitValue64_fn, stream, ptr, value, flags, mask) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamWriteValue32, hipStreamWriteValue32, hipStreamWriteValue32_fn, stream, ptr, value, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamWriteValue64, hipStreamWriteValue64, hipStreamWriteValue64_fn, stream, ptr, value, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexObjectCreate, hipTexObjectCreate, hipTexObjectCreate_fn, pTexObject, pResDesc, pTexDesc, pResViewDesc) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexObjectDestroy, hipTexObjectDestroy, hipTexObjectDestroy_fn, texObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceDesc, hipTexObjectGetResourceDesc, hipTexObjectGetResourceDesc_fn, pResDesc, texObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexObjectGetResourceViewDesc, hipTexObjectGetResourceViewDesc, hipTexObjectGetResourceViewDesc_fn, pResViewDesc, texObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexObjectGetTextureDesc, hipTexObjectGetTextureDesc, hipTexObjectGetTextureDesc_fn, pTexDesc, texObject) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetAddress, hipTexRefGetAddress, hipTexRefGetAddress_fn, dev_ptr, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetAddressMode, hipTexRefGetAddressMode, hipTexRefGetAddressMode_fn, pam, texRef, dim) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetFilterMode, hipTexRefGetFilterMode, hipTexRefGetFilterMode_fn, pfm, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetFlags, hipTexRefGetFlags, hipTexRefGetFlags_fn, pFlags, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetFormat, hipTexRefGetFormat, hipTexRefGetFormat_fn, pFormat, pNumChannels, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetMaxAnisotropy, hipTexRefGetMaxAnisotropy, hipTexRefGetMaxAnisotropy_fn, pmaxAnsio, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetMipMappedArray, hipTexRefGetMipMappedArray, hipTexRefGetMipMappedArray_fn, pArray, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapFilterMode, hipTexRefGetMipmapFilterMode, hipTexRefGetMipmapFilterMode_fn, pfm, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapLevelBias, hipTexRefGetMipmapLevelBias, hipTexRefGetMipmapLevelBias_fn, pbias, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefGetMipmapLevelClamp, hipTexRefGetMipmapLevelClamp, hipTexRefGetMipmapLevelClamp_fn, pminMipmapLevelClamp, pmaxMipmapLevelClamp, texRef) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetAddress, hipTexRefSetAddress, hipTexRefSetAddress_fn, ByteOffset, texRef, dptr, bytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetAddress2D, hipTexRefSetAddress2D, hipTexRefSetAddress2D_fn, texRef, desc, dptr, Pitch) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetAddressMode, hipTexRefSetAddressMode, hipTexRefSetAddressMode_fn, texRef, dim, am) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetArray, hipTexRefSetArray, hipTexRefSetArray_fn, tex, array, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetBorderColor, hipTexRefSetBorderColor, hipTexRefSetBorderColor_fn, texRef, pBorderColor) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetFilterMode, hipTexRefSetFilterMode, hipTexRefSetFilterMode_fn, texRef, fm) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetFlags, hipTexRefSetFlags, hipTexRefSetFlags_fn, texRef, Flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetFormat, hipTexRefSetFormat, hipTexRefSetFormat_fn, texRef, fmt, NumPackedComponents) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetMaxAnisotropy, hipTexRefSetMaxAnisotropy, hipTexRefSetMaxAnisotropy_fn, texRef, maxAniso) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapFilterMode, hipTexRefSetMipmapFilterMode, hipTexRefSetMipmapFilterMode_fn, texRef, fm) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapLevelBias, hipTexRefSetMipmapLevelBias, hipTexRefSetMipmapLevelBias_fn, texRef, bias) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetMipmapLevelClamp, hipTexRefSetMipmapLevelClamp, hipTexRefSetMipmapLevelClamp_fn, texRef, minMipMapLevelClamp, maxMipMapLevelClamp) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipTexRefSetMipmappedArray, hipTexRefSetMipmappedArray, hipTexRefSetMipmappedArray_fn, texRef, mipmappedArray, Flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipThreadExchangeStreamCaptureMode, hipThreadExchangeStreamCaptureMode, hipThreadExchangeStreamCaptureMode_fn, mode) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipUnbindTexture, hipUnbindTexture, hipUnbindTexture_fn, tex) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipUserObjectCreate, hipUserObjectCreate, hipUserObjectCreate_fn, object_out, ptr, destroy, initialRefcount, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipUserObjectRelease, hipUserObjectRelease, hipUserObjectRelease_fn, object, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipUserObjectRetain, hipUserObjectRetain, hipUserObjectRetain_fn, object, count) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipWaitExternalSemaphoresAsync, hipWaitExternalSemaphoresAsync, hipWaitExternalSemaphoresAsync_fn, extSemArray, paramsArray, numExtSems, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipCreateChannelDesc, hipCreateChannelDesc, hipCreateChannelDesc_fn, x, y, z, w, f) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipExtModuleLaunchKernel, hipExtModuleLaunchKernel, hipExtModuleLaunchKernel_fn, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipHccModuleLaunchKernel, hipHccModuleLaunchKernel, hipHccModuleLaunchKernel_fn, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy_spt, hipMemcpy_spt, hipMemcpy_spt_fn, dst, src, sizeBytes, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyToSymbol_spt, hipMemcpyToSymbol_spt, hipMemcpyToSymbol_spt_fn, symbol, src, sizeBytes, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbol_spt, hipMemcpyFromSymbol_spt, hipMemcpyFromSymbol_spt_fn, dst, symbol, sizeBytes, offset, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2D_spt, hipMemcpy2D_spt, hipMemcpy2D_spt_fn, dst, dpitch, src, spitch, width, height, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArray_spt, hipMemcpy2DFromArray_spt, hipMemcpy2DFromArray_spt_fn, dst, dpitch, src, wOffset, hOffset, width, height, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy3D_spt, hipMemcpy3D_spt, hipMemcpy3D_spt_fn, p) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset_spt, hipMemset_spt, hipMemset_spt_fn, dst, value, sizeBytes) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemsetAsync_spt, hipMemsetAsync_spt, hipMemsetAsync_spt_fn, dst, value, sizeBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset2D_spt, hipMemset2D_spt, hipMemset2D_spt_fn, dst, pitch, value, width, height) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset2DAsync_spt, hipMemset2DAsync_spt, hipMemset2DAsync_spt_fn, dst, pitch, value, width, height, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset3DAsync_spt, hipMemset3DAsync_spt, hipMemset3DAsync_spt_fn, pitchedDevPtr, value, extent, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemset3D_spt, hipMemset3D_spt, hipMemset3D_spt_fn, pitchedDevPtr, value, extent) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyAsync_spt, hipMemcpyAsync_spt, hipMemcpyAsync_spt_fn, dst, src, sizeBytes, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy3DAsync_spt, hipMemcpy3DAsync_spt, hipMemcpy3DAsync_spt_fn, p, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DAsync_spt, hipMemcpy2DAsync_spt, hipMemcpy2DAsync_spt_fn, dst, dpitch, src, spitch, width, height, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyFromSymbolAsync_spt, hipMemcpyFromSymbolAsync_spt, hipMemcpyFromSymbolAsync_spt_fn, dst, symbol, sizeBytes, offset, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyToSymbolAsync_spt, hipMemcpyToSymbolAsync_spt, hipMemcpyToSymbolAsync_spt_fn, symbol, src, sizeBytes, offset, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpyFromArray_spt, hipMemcpyFromArray_spt, hipMemcpyFromArray_spt_fn, dst, src, wOffsetSrc, hOffset, count, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DToArray_spt, hipMemcpy2DToArray_spt, hipMemcpy2DToArray_spt_fn, dst, wOffset, hOffset, src, spitch, width, height, kind) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DFromArrayAsync_spt, hipMemcpy2DFromArrayAsync_spt, hipMemcpy2DFromArrayAsync_spt_fn, dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipMemcpy2DToArrayAsync_spt, hipMemcpy2DToArrayAsync_spt, hipMemcpy2DToArrayAsync_spt_fn, dst, wOffset, hOffset, src, spitch, width, height, kind, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamQuery_spt, hipStreamQuery_spt, hipStreamQuery_spt_fn, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamSynchronize_spt, hipStreamSynchronize_spt, hipStreamSynchronize_spt_fn, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetPriority_spt, hipStreamGetPriority_spt, hipStreamGetPriority_spt_fn, stream, priority) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamWaitEvent_spt, hipStreamWaitEvent_spt, hipStreamWaitEvent_spt_fn, stream, event, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetFlags_spt, hipStreamGetFlags_spt, hipStreamGetFlags_spt_fn, stream, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamAddCallback_spt, hipStreamAddCallback_spt, hipStreamAddCallback_spt_fn, stream, callback, userData, flags) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipEventRecord_spt, hipEventRecord_spt, hipEventRecord_spt_fn, event, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchCooperativeKernel_spt, hipLaunchCooperativeKernel_spt, hipLaunchCooperativeKernel_spt_fn, f, gridDim, blockDim, kernelParams, sharedMemBytes, hStream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchKernel_spt, hipLaunchKernel_spt, hipLaunchKernel_spt_fn, function_address, numBlocks, dimBlocks, args, sharedMemBytes, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGraphLaunch_spt, hipGraphLaunch_spt, hipGraphLaunch_spt_fn, graphExec, stream) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamBeginCapture_spt, hipStreamBeginCapture_spt, hipStreamBeginCapture_spt_fn, stream, mode) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamEndCapture_spt, hipStreamEndCapture_spt, hipStreamEndCapture_spt_fn, stream, pGraph) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamIsCapturing_spt, hipStreamIsCapturing_spt, hipStreamIsCapturing_spt_fn, stream, pCaptureStatus) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_spt, hipStreamGetCaptureInfo_spt, hipStreamGetCaptureInfo_spt_fn, stream, pCaptureStatus, pId) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipStreamGetCaptureInfo_v2_spt, hipStreamGetCaptureInfo_v2_spt, hipStreamGetCaptureInfo_v2_spt_fn, stream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipLaunchHostFunc_spt, hipLaunchHostFunc_spt, hipLaunchHostFunc_spt_fn, stream, fn, userData) +HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipGetStreamDeviceId, hipGetStreamDeviceId, hipGetStreamDeviceId_fn, stream) +// HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_API_TABLE_ID_RuntimeApi, ROCPROFILER_HIP_API_ID_hipDrvGraphAddMemsetNode, hipDrvGraphAddMemsetNode, hipDrvGraphAddMemsetNode_fn, phGraphNode, hGraph, dependencies, numDependencies, memsetParams, ctx) +// clang-format on + +#else +# error "Do not compile this file directly. It is included by lib/rocprofiler-sdk/hip/hip.cpp" +#endif diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.hpp new file mode 100644 index 0000000000..584305e0c7 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.hpp @@ -0,0 +1,112 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include + +#if HIP_VERSION_MAJOR < 6 +# include "lib/rocprofiler-sdk/hip/details/hip_api_trace.hpp" +#else +# include +#endif + +#include +#include + +namespace rocprofiler +{ +namespace hip +{ +using hip_compiler_api_table_t = HipCompilerDispatchTable; +using hip_runtime_api_table_t = HipDispatchTable; + +struct HipApiTable +{ + hip_compiler_api_table_t* compiler = nullptr; + hip_runtime_api_table_t* runtime = nullptr; +}; + +using hip_api_table_t = HipApiTable; + +hip_api_table_t& +get_table(); + +template +struct hip_table_lookup; + +template +struct hip_domain_info; + +template +struct hip_api_info; + +template +struct hip_api_impl : hip_domain_info +{ + template + static auto set_data_args(DataArgsT&, Args... args); + + template + static auto exec(FuncT&&, Args&&... args); + + template + static auto functor(Args&&... args); +}; + +template +const char* +name_by_id(uint32_t id); + +template +uint32_t +id_by_name(const char* name); + +void +iterate_args(uint32_t id, + const rocprofiler_callback_tracing_hip_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + void* user_data); + +template +std::vector +get_names(); + +template +std::vector +get_ids(); + +void +copy_table(hip_compiler_api_table_t* _orig); + +void +copy_table(hip_runtime_api_table_t* _orig); + +void +update_table(hip_compiler_api_table_t* _orig); + +void +update_table(hip_runtime_api_table_t* _orig); +} // namespace hip +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/types.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/types.hpp new file mode 100644 index 0000000000..c99eba8086 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/types.hpp @@ -0,0 +1,182 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include + +#include "lib/common/defines.hpp" + +// #ifndef ROCPROFILER_UNSAFE_NO_VERSION_CHECK +// # if defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0 +// # if HIP_API_TABLE_MAJOR_VERSION <= 0x01 +// namespace rocprofiler +// { +// namespace hip +// { +// static_assert(HIP_CORE_API_TABLE_MAJOR_VERSION == 0x01, +// "Change in the major version of HIP core API table"); +// static_assert(HIP_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x01, +// "Change in the major version of HIP amd-extended API table"); +// static_assert(HIP_FINALIZER_API_TABLE_MAJOR_VERSION == 0x01, +// "Change in the major version of HIP finalizer API table"); +// static_assert(HIP_IMAGE_API_TABLE_MAJOR_VERSION == 0x01, +// "Change in the major version of HIP image API table"); + +// static_assert(HIP_CORE_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP core API table"); +// static_assert(HIP_AMD_EXT_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP amd-extended API table"); +// static_assert(HIP_FINALIZER_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP finalizer API table"); +// static_assert(HIP_IMAGE_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP image API table"); + +// // this should always be updated to latest table size +// template +// struct table_size; + +// // latest version of hip runtime that has been updated for support by rocprofiler +// // and the current version of hip runtime during this compilation +// constexpr size_t latest_version = ROCPROFILER_COMPUTE_VERSION(1, 11, 0); +// constexpr size_t current_version = ROCPROFILER_HIP_RUNTIME_VERSION; + +// // aliases to the template specializations providing the table size info +// using current_table_size_t = table_size; +// using latest_table_size_t = table_size; + +// // specialization for v1.9 +// template <> +// struct table_size +// { +// static constexpr size_t finalizer_ext = 64; +// static constexpr size_t image_ext = 120; +// static constexpr size_t amd_ext = 456; +// static constexpr size_t core_api_ext = 1016; +// }; + +// // specialization for v1.10 - increased amd_ext by 10 functions +// template <> +// struct table_size +// : table_size +// { +// static constexpr size_t amd_ext = 552; +// }; + +// // version 1.11 is same as 1.10 +// template <> +// struct table_size +// : table_size +// {}; + +// // default static asserts to check against latest version +// // e.g. v1.12 might have the same table sizes as v1.11 so +// // we don't want to fail to compile if nothing has changed +// template +// struct table_size : latest_table_size_t +// {}; + +// // if you hit these static asserts, that means HIP added entries to the table but did not update +// the +// // step numbers +// static_assert(sizeof(FinalizerExtTable) == current_table_size_t::finalizer_ext, +// "HIP finalizer API table size changed or version not supported"); +// static_assert(sizeof(ImageExtTable) == current_table_size_t::image_ext, +// "HIP image-extended API table size changed or version not supported"); +// static_assert(sizeof(AmdExtTable) == current_table_size_t::amd_ext, +// "HIP amd-extended API table size changed or version not supported"); +// static_assert(sizeof(CoreApiTable) == current_table_size_t::core_api_ext, +// "HIP core API table size changed or version not supported"); +// } // namespace hip +// } // namespace rocprofiler +// # else +// namespace rocprofiler +// { +// namespace hip +// { +// static_assert(HIP_CORE_API_TABLE_MAJOR_VERSION == 0x02, +// "Change in the major version of HIP core API table"); +// static_assert(HIP_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x02, +// "Change in the major version of HIP amd-extended API table"); +// static_assert(HIP_FINALIZER_API_TABLE_MAJOR_VERSION == 0x02, +// "Change in the major version of HIP finalizer API table"); +// static_assert(HIP_IMAGE_API_TABLE_MAJOR_VERSION == 0x02, +// "Change in the major version of HIP image API table"); + +// static_assert(HIP_CORE_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP core API table"); +// static_assert(HIP_AMD_EXT_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP amd-extended API table"); +// static_assert(HIP_FINALIZER_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP finalizer API table"); +// static_assert(HIP_IMAGE_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP image API table"); +// static_assert(HIP_AQLPROFILE_API_TABLE_STEP_VERSION == 0x00, +// "Change in the major version of HIP aqlprofile API table"); + +// // this should always be updated to latest table size +// template +// struct table_size; + +// // latest version of hip runtime that has been updated for support by rocprofiler +// // and the current version of hip runtime during this compilation +// constexpr size_t latest_version = ROCPROFILER_COMPUTE_VERSION(1, 12, 0); +// constexpr size_t current_version = ROCPROFILER_HIP_RUNTIME_VERSION; + +// // aliases to the template specializations providing the table size info +// using current_table_size_t = table_size; +// using latest_table_size_t = table_size; + +// // specialization for v1.12 +// template <> +// struct table_size +// { +// static constexpr size_t finalizer_ext = 64; +// static constexpr size_t image_ext = 120; +// static constexpr size_t amd_ext = 552; +// static constexpr size_t core_api_ext = 1016; +// }; + +// // default static asserts to check against latest version +// // e.g. v1.12 might have the same table sizes as v1.11 so +// // we don't want to fail to compile if nothing has changed +// template +// struct table_size : latest_table_size_t +// {}; + +// // if you hit these static asserts, that means HIP added entries to the table but did not update +// the +// // step numbers +// static_assert(sizeof(FinalizerExtTable) == current_table_size_t::finalizer_ext, +// "HIP finalizer API table size changed or version not supported"); +// static_assert(sizeof(ImageExtTable) == current_table_size_t::image_ext, +// "HIP image-extended API table size changed or version not supported"); +// static_assert(sizeof(AmdExtTable) == current_table_size_t::amd_ext, +// "HIP amd-extended API table size changed or version not supported"); +// static_assert(sizeof(CoreApiTable) == current_table_size_t::core_api_ext, +// "HIP core API table size changed or version not supported"); +// } // namespace hip +// } // namespace rocprofiler +// # endif +// # endif +// #endif diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/utils.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/utils.hpp new file mode 100644 index 0000000000..0e2ae5f431 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/utils.hpp @@ -0,0 +1,103 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include "lib/common/mpl.hpp" +#include "lib/rocprofiler-sdk/hip/details/ostream.hpp" + +#include "fmt/core.h" +#include "fmt/ranges.h" + +#include +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace hip +{ +namespace utils +{ +inline static std::ostream& +operator<<(std::ostream& out, const hipDeviceProp_tR0000& v) +{ + return ::rocprofiler::hip::detail::operator<<(out, v); +} + +template +auto +stringize_impl(const Tp& _v) +{ + using nonpointer_type = typename std::remove_pointer_t; + + if constexpr(common::mpl::is_pair::value) + { + return std::make_pair(stringize_impl(_v.first), stringize_impl(_v.second)); + } + else if constexpr(std::is_constructible::value) + { + auto _ss = std::stringstream{}; + _ss << _v; + return _ss.str(); + } + else if constexpr(fmt::is_formattable::value && !std::is_pointer::value) + { + return fmt::format("{}", _v); + } + else if constexpr(std::is_pointer::value && !std::is_pointer::value && + common::mpl::is_type_complete_v && + !std::is_void::value) + { + if(_v) + { + return stringize_impl(*_v); + } + else + { + auto _ss = std::stringstream{}; + _ss << _v; + return _ss.str(); + } + } + else + { + auto _ss = std::stringstream{}; + _ss << _v; + return _ss.str(); + } +} + +template +auto +stringize(Args... args) +{ + return std::vector>{stringize_impl(args)...}; +} +} // namespace utils +} // namespace hip +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp index 28df0cfcb8..d1ce3e9651 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/async_copy.cpp @@ -181,14 +181,14 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg) constexpr auto nanosec = 1000000000UL; uint64_t sysclock_hz = 0; ROCP_HSA_TABLE_CALL(ERROR, - get_table().core_->hsa_system_get_info_fn( + get_core_table()->hsa_system_get_info_fn( HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz)); return (nanosec / sysclock_hz); }(); auto* _data = static_cast(arg); auto copy_time = hsa_amd_profiling_async_copy_time_t{}; - auto copy_time_status = get_table().amd_ext_->hsa_amd_profiling_get_async_copy_time_fn( + auto copy_time_status = get_amd_ext_table()->hsa_amd_profiling_get_async_copy_time_fn( _data->rocp_signal, ©_time); // normalize @@ -260,16 +260,16 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg) std::tie(rocp_amd_signal->start_ts, rocp_amd_signal->end_ts); const hsa_signal_value_t new_value = - get_table().core_->hsa_signal_load_relaxed_fn(_data->orig_signal) - 1; + get_core_table()->hsa_signal_load_relaxed_fn(_data->orig_signal) - 1; LOG_IF(ERROR, signal_value != new_value) << "bad original signal value in " << __FUNCTION__; - get_table().core_->hsa_signal_store_screlease_fn(_data->orig_signal, signal_value); + get_core_table()->hsa_signal_store_screlease_fn(_data->orig_signal, signal_value); } if(signal_value == 0) { - ROCP_HSA_TABLE_CALL(ERROR, get_table().core_->hsa_signal_destroy_fn(_data->rocp_signal)); + ROCP_HSA_TABLE_CALL(ERROR, get_core_table()->hsa_signal_destroy_fn(_data->rocp_signal)); delete _data; get_active_signals()->fetch_sub(1); } @@ -426,12 +426,12 @@ async_copy_impl(Args... args) constexpr auto completion_signal_idx = arg_indices::completion_signal_idx; auto& _completion_signal = std::get(_tied_args); const hsa_signal_value_t _completion_signal_val = - get_table().core_->hsa_signal_load_scacquire_fn(_completion_signal); + get_core_table()->hsa_signal_load_scacquire_fn(_completion_signal); { const uint32_t num_consumers = 0; const hsa_agent_t* consumers = nullptr; - auto _status = get_table().core_->hsa_signal_create_fn( + auto _status = get_core_table()->hsa_signal_create_fn( _completion_signal_val, num_consumers, consumers, &_data->rocp_signal); if(_status != HSA_STATUS_SUCCESS) @@ -449,18 +449,17 @@ async_copy_impl(Args... args) } { - auto _status = - get_table().amd_ext_->hsa_amd_signal_async_handler_fn(_data->rocp_signal, - HSA_SIGNAL_CONDITION_LT, - _completion_signal_val, - async_copy_handler, - _data); + auto _status = get_amd_ext_table()->hsa_amd_signal_async_handler_fn(_data->rocp_signal, + HSA_SIGNAL_CONDITION_LT, + _completion_signal_val, + async_copy_handler, + _data); if(_status != HSA_STATUS_SUCCESS) { LOG(ERROR) << "hsa_amd_signal_async_handler returned non-zero error code " << _status; - ROCP_HSA_TABLE_CALL(ERROR, get_table().core_->hsa_signal_destroy_fn(_data->rocp_signal)) + ROCP_HSA_TABLE_CALL(ERROR, get_core_table()->hsa_signal_destroy_fn(_data->rocp_signal)) << ":: failed to destroy signal after async handler failed"; get_active_signals()->fetch_sub(1); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp index 5a7282ecd9..badc0efb4a 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/defines.hpp @@ -141,6 +141,11 @@ using this_type = hsa_api_info; \ using base_type = hsa_api_impl; \ \ + static constexpr auto offset() \ + { \ + return offsetof(hsa_table_lookup::type, HSA_FUNC_PTR); \ + } \ + \ static auto& get_table() { return hsa_table_lookup{}(); } \ \ template \ @@ -213,6 +218,11 @@ using this_type = hsa_api_info; \ using base_type = hsa_api_impl; \ \ + static constexpr auto offset() \ + { \ + return offsetof(hsa_table_lookup::type, HSA_FUNC_PTR); \ + } \ + \ static auto& get_table() { return hsa_table_lookup{}(); } \ \ template \ @@ -270,7 +280,7 @@ } \ } -#define HSA_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, MEMBER) \ +#define HSA_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, TYPE, MEMBER) \ namespace rocprofiler \ { \ namespace hsa \ @@ -278,6 +288,7 @@ template <> \ struct hsa_table_lookup \ { \ + using type = TYPE; \ auto& operator()(hsa_api_table_t& _v) const { return _v.MEMBER; } \ auto& operator()(hsa_api_table_t* _v) const { return _v->MEMBER; } \ auto& operator()() const { return (*this)(get_table()); } \ diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/ostream.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/ostream.hpp index fe8b9b3e8f..eb98bc8c64 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/ostream.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/ostream.hpp @@ -32,13 +32,12 @@ namespace rocprofiler { namespace hsa { +namespace detail +{ static int HSA_depth_max = 1; static thread_local int HSA_depth_max_cnt = 0; static std::string_view HSA_structs_regex = {}; -// begin ostream ops for HSA -// basic ostream ops -namespace detail -{ + template inline static std::ostream& operator<<(std::ostream& out, const T& v) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp index 8b94092739..545aa96cc6 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp @@ -22,6 +22,7 @@ #include "lib/rocprofiler-sdk/hsa/hsa.hpp" #include "lib/common/defines.hpp" +#include "lib/common/static_object.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" @@ -33,6 +34,7 @@ #include #include #include +#include #include @@ -81,36 +83,87 @@ set_data_retval(DataT& _data, Tp _val) static_assert(std::is_void::value, "Error! unsupported return type"); } } + +// helper to ensure that the version variable is initialized with the correct major version, minor +// version (sizeof), and step version +#define DEFINE_TABLE_VERSION_IMPL(VARIABLE, OBJECT, MAJOR_VERSION, STEP_VERSION) \ + constexpr auto VARIABLE = hsa_table_version_t{MAJOR_VERSION, sizeof(OBJECT), STEP_VERSION, 0}; + +// helper to ensure that the version variable is initialized with the correct major version, minor +// version (sizeof), and step version +#define DEFINE_TABLE_VERSION(ALIAS, NAME) \ + DEFINE_TABLE_VERSION_IMPL(hsa_##ALIAS##_table_version, \ + hsa_##ALIAS##_table_t, \ + HSA_##NAME##_TABLE_MAJOR_VERSION, \ + HSA_##NAME##_TABLE_STEP_VERSION) + +DEFINE_TABLE_VERSION(api, API) +DEFINE_TABLE_VERSION(core, CORE_API) +DEFINE_TABLE_VERSION(amd_ext, AMD_EXT_API) +DEFINE_TABLE_VERSION(fini_ext, FINALIZER_API) +DEFINE_TABLE_VERSION(img_ext, IMAGE_API) + +#undef DEFINE_TABLE_VERSION +#undef DEFINE_TABLE_VERSION_IMPL + +template +Tp*& +get_table_impl(hsa_table_version_t _version) +{ + auto*& val = common::static_object::construct(); + val->version = _version; + return val; +} } // namespace +hsa_table_version_t +get_table_version() +{ + return hsa_api_table_version; +} + +// helper to ensure that table type is paired with the correct table version +#define GET_TABLE_IMPL(ALIAS) get_table_impl(hsa_##ALIAS##_table_version); + +hsa_core_table_t* +get_core_table() +{ + static auto*& val = GET_TABLE_IMPL(core); + return val; +} + +hsa_amd_ext_table_t* +get_amd_ext_table() +{ + static auto*& val = GET_TABLE_IMPL(amd_ext); + return val; +} + +hsa_fini_ext_table_t* +get_fini_ext_table() +{ + static auto*& val = GET_TABLE_IMPL(fini_ext); + return val; +} + +hsa_img_ext_table_t* +get_img_ext_table() +{ + static auto*& val = GET_TABLE_IMPL(img_ext); + return val; +} + +#undef GET_TABLE_IMPL + hsa_api_table_t& get_table() { - static auto _core = CoreApiTable{}; - static auto _amd_ext = AmdExtTable{}; - static auto _img_ext = ImageExtTable{}; - static auto _fini_ext = FinalizerExtTable{}; - static auto _v = []() { - _core.version = { - HSA_CORE_API_TABLE_MAJOR_VERSION, sizeof(_core), HSA_CORE_API_TABLE_STEP_VERSION, 0}; - _amd_ext.version = {HSA_AMD_EXT_API_TABLE_MAJOR_VERSION, - sizeof(_amd_ext), - HSA_AMD_EXT_API_TABLE_STEP_VERSION, - 0}; - _img_ext.version = {HSA_IMAGE_API_TABLE_MAJOR_VERSION, - sizeof(_img_ext), - HSA_IMAGE_API_TABLE_STEP_VERSION, - 0}; - _fini_ext.version = {HSA_FINALIZER_API_TABLE_MAJOR_VERSION, - sizeof(_fini_ext), - HSA_FINALIZER_API_TABLE_STEP_VERSION, - 0}; - auto _version = ApiTableVersion{ - HSA_API_TABLE_MAJOR_VERSION, sizeof(HsaApiTable), HSA_API_TABLE_STEP_VERSION, 0}; - auto _val = hsa_api_table_t{_version, &_core, &_amd_ext, &_fini_ext, &_img_ext}; - return _val; - }(); - return _v; + static auto tbl = hsa_api_table_t{.version = hsa_api_table_version, + .core_ = get_core_table(), + .amd_ext_ = get_amd_ext_table(), + .finalizer_ext_ = get_fini_ext_table(), + .image_ext_ = get_img_ext_table()}; + return tbl; } template @@ -169,6 +222,61 @@ hsa_api_impl::exec(FuncT&& _func, Args&&... args) return return_type{HSA_STATUS_ERROR}; } +namespace +{ +using correlation_service = context::correlation_tracing_service; +using buffer_hsa_api_record_t = rocprofiler_buffer_tracing_hsa_api_record_t; +using callback_hsa_api_data_t = rocprofiler_callback_tracing_hsa_api_data_t; + +struct callback_context_data +{ + const context::context* ctx = nullptr; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_user_data_t user_data = {.value = 0}; +}; + +struct buffered_context_data +{ + const context::context* ctx = nullptr; + rocprofiler_user_data_t external_correlation = {}; +}; + +constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0}; + +void +populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx, + rocprofiler_buffer_tracing_kind_t buffered_domain_idx, + int operation_idx, + std::vector& callback_contexts, + std::vector& buffered_contexts) +{ + auto active_contexts = context::context_array_t{}; + auto thr_id = common::get_tid(); + for(const auto* itr : context::get_active_contexts(active_contexts)) + { + if(!itr) continue; + + // if(itr->pc_sampler) has_pc_sampling = true; + + if(itr->callback_tracer) + { + // if the given domain + op is not enabled, skip this context + if(itr->callback_tracer->domains(callback_domain_idx, operation_idx)) + callback_contexts.emplace_back( + callback_context_data{itr, rocprofiler_callback_tracing_record_t{}}); + } + + if(itr->buffered_tracer) + { + // if the given domain + op is not enabled, skip this context + if(itr->buffered_tracer->domains(buffered_domain_idx, operation_idx)) + buffered_contexts.emplace_back(buffered_context_data{ + itr, itr->correlation_tracer.external_correlator.get(thr_id)}); + } + } +} +} // namespace + template template auto @@ -185,48 +293,16 @@ hsa_api_impl::functor(Args&&... args) return HSA_STATUS_SUCCESS; } - struct callback_context_data - { - const context::context* ctx = nullptr; - rocprofiler_callback_tracing_record_t record = {}; - rocprofiler_user_data_t user_data = {.value = 0}; - }; + auto thr_id = common::get_tid(); + auto callback_contexts = std::vector{}; + auto buffered_contexts = std::vector{}; + auto has_pc_sampling = false; - struct buffered_context_data - { - const context::context* ctx = nullptr; - rocprofiler_user_data_t external_correlation = {}; - }; - - static thread_local auto active_contexts = context::context_array_t{}; - auto thr_id = common::get_tid(); - auto callback_contexts = std::vector{}; - auto buffered_contexts = std::vector{}; - auto has_pc_sampling = false; - for(const auto* itr : context::get_active_contexts(active_contexts)) - { - if(!itr) continue; - - // if(itr->pc_sampler) has_pc_sampling = true; - - if(itr->callback_tracer) - { - // if the given domain + op is not enabled, skip this context - if(itr->callback_tracer->domains(info_type::callback_domain_idx, - info_type::operation_idx)) - callback_contexts.emplace_back( - callback_context_data{itr, rocprofiler_callback_tracing_record_t{}}); - } - - if(itr->buffered_tracer) - { - // if the given domain + op is not enabled, skip this context - if(itr->buffered_tracer->domains(info_type::buffered_domain_idx, - info_type::operation_idx)) - buffered_contexts.emplace_back(buffered_context_data{ - itr, itr->correlation_tracer.external_correlator.get(thr_id)}); - } - } + populate_contexts(info_type::callback_domain_idx, + info_type::buffered_domain_idx, + info_type::operation_idx, + callback_contexts, + buffered_contexts); if(callback_contexts.empty() && buffered_contexts.empty()) { @@ -237,16 +313,11 @@ hsa_api_impl::functor(Args&&... args) return HSA_STATUS_SUCCESS; } - using correlation_service = context::correlation_tracing_service; - using buffer_hsa_api_record_t = rocprofiler_buffer_tracing_hsa_api_record_t; - using callback_hsa_api_data_t = rocprofiler_callback_tracing_hsa_api_data_t; - - constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0}; - auto ref_count = (has_pc_sampling) ? 4 : 2; - auto buffer_record = common::init_public_api_struct(buffer_hsa_api_record_t{}); - auto tracer_data = common::init_public_api_struct(callback_hsa_api_data_t{}); - auto* corr_id = correlation_service::construct(ref_count); - auto internal_corr_id = corr_id->internal; + auto ref_count = (has_pc_sampling) ? 4 : 2; + auto buffer_record = common::init_public_api_struct(buffer_hsa_api_record_t{}); + auto tracer_data = common::init_public_api_struct(callback_hsa_api_data_t{}); + auto* corr_id = correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; // construct the buffered info before the callback so the callbacks are as closely wrapped // around the function call as possible @@ -495,11 +566,15 @@ update_table(hsa_api_table_t* _orig, std::index_sequence) return; // 1. get the sub-table containing the function pointer - // 2. get reference to function pointer in sub-table - // 3. update function pointer with functor + // 2. make sure the function pointer offset exists in passed table auto& _table = _info.get_table(_orig_v); - auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_info.offset() < _table->version.minor_id) + { + // 3. get reference to function pointer in sub-table + // 4. update function pointer with functor + auto& _func = _info.get_table_func(_table); + _func = _info.get_functor(_func); + } }; auto _contexts = context::get_registered_contexts(); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp index 5af4e7f4eb..6ab10634f2 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp @@ -26,9 +26,9 @@ #include -HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_API_TABLE_ID_CoreApi, core_) -HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_API_TABLE_ID_AmdExt, amd_ext_) -HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_API_TABLE_ID_ImageExt, image_ext_) +HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_API_TABLE_ID_CoreApi, ::CoreApiTable, core_) +HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_API_TABLE_ID_AmdExt, ::AmdExtTable, amd_ext_) +HSA_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_HSA_API_TABLE_ID_ImageExt, ::ImageExtTable, image_ext_) #if defined(ROCPROFILER_LIB_ROCPROFILER_HSA_HSA_CPP_IMPL) && \ ROCPROFILER_LIB_ROCPROFILER_HSA_HSA_CPP_IMPL == 1 diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp index 7656002e32..1b39b23e6b 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.hpp @@ -22,6 +22,7 @@ #pragma once +#include #include #include @@ -31,11 +32,31 @@ namespace rocprofiler { namespace hsa { -using hsa_api_table_t = HsaApiTable; +using hsa_api_table_t = ::HsaApiTable; +using hsa_table_version_t = ::ApiTableVersion; +using hsa_core_table_t = ::CoreApiTable; +using hsa_amd_ext_table_t = ::AmdExtTable; +using hsa_fini_ext_table_t = ::FinalizerExtTable; +using hsa_img_ext_table_t = ::ImageExtTable; hsa_api_table_t& get_table(); +hsa_table_version_t +get_table_version(); + +hsa_core_table_t* +get_core_table(); + +hsa_amd_ext_table_t* +get_amd_ext_table(); + +hsa_fini_ext_table_t* +get_fini_ext_table(); + +hsa_img_ext_table_t* +get_img_ext_table(); + template struct hsa_table_lookup; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp index 9e5bf71bb8..581cbfc625 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -25,6 +25,7 @@ #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/hsa/code_object.hpp" +#include "lib/rocprofiler-sdk/hsa/hsa.hpp" #include #include @@ -88,16 +89,16 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data) if(!ctxs.empty()) { // only do the following work if there are contexts that require this info - const auto* _rocp_agent = queue_info_session.queue.get_agent().get_rocp_agent(); - auto _hsa_agent = queue_info_session.queue.get_agent().get_hsa_agent(); - auto _queue_id = queue_info_session.queue.get_id(); + const auto* _rocp_agent = queue_info_session.rocp_agent; + auto _hsa_agent = queue_info_session.hsa_agent; + auto _queue_id = queue_info_session.queue_id; auto _signal = queue_info_session.interrupt_signal; auto _kern_id = queue_info_session.kernel_id; const auto& _extern_corr_ids = queue_info_session.extern_corr_ids; auto dispatch_time = hsa_amd_profiling_dispatch_time_t{}; auto dispatch_time_status = - queue_info_session.queue.ext_api().hsa_amd_profiling_get_dispatch_time_fn( + hsa::get_amd_ext_table()->hsa_amd_profiling_get_dispatch_time_fn( _hsa_agent, _signal, &dispatch_time); // if we encounter this in CI, it will cause test to fail @@ -185,17 +186,14 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data) // Delete signals and packets, signal we have completed. if(queue_info_session.interrupt_signal.handle != 0u) { - queue_info_session.queue.core_api().hsa_signal_destroy_fn( - queue_info_session.interrupt_signal); + hsa::get_core_table()->hsa_signal_destroy_fn(queue_info_session.interrupt_signal); } if(queue_info_session.kernel_pkt.ext_amd_aql_pm4.completion_signal.handle != 0u) { - queue_info_session.queue.core_api().hsa_signal_destroy_fn( + hsa::get_core_table()->hsa_signal_destroy_fn( queue_info_session.kernel_pkt.ext_amd_aql_pm4.completion_signal); } - queue_info_session.queue.async_complete(); - if(_corr_id) { LOG_IF(FATAL, _corr_id->ref_count.load() == 0) @@ -204,7 +202,9 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data) _corr_id->ref_count.fetch_sub(1); } + queue_info_session.queue.async_complete(); delete static_cast(data); + return false; } @@ -260,7 +260,7 @@ WriteInterceptor(const void* packets, LOG_IF(FATAL, data == nullptr) << "WriteInterceptor was not passed a pointer to the queue"; - static thread_local auto ctxs = context_array_t{}; + auto ctxs = context_array_t{}; context::get_active_contexts(ctxs, context_filter); auto& queue = *static_cast(data); @@ -395,6 +395,9 @@ WriteInterceptor(const void* packets, .interrupt_signal = interrupt_signal, .tid = thr_id, .kernel_id = kernel_id, + .queue_id = queue.get_id(), + .hsa_agent = queue.get_agent().get_hsa_agent(), + .rocp_agent = queue.get_agent().get_rocp_agent(), .correlation_id = corr_id, .kernel_pkt = kernel_pkt, .contexts = ctxs, @@ -405,15 +408,7 @@ WriteInterceptor(const void* packets, } } // namespace -Queue::~Queue() -{ - // Potentially replace with condition variable at some point - // but performance may not matter here. - while(_active_async_packets.load(std::memory_order_relaxed) > 0) - { - std::this_thread::sleep_for(std::chrono::milliseconds{1}); - } -} +Queue::~Queue() { sync(); } void Queue::signal_async_handler(const hsa_signal_t& signal, Queue::queue_info_session_t* data) const @@ -470,6 +465,28 @@ Queue::Queue(const AgentCache& agent, *queue = _intercept_queue; } +void +Queue::sync() const +{ + // Potentially replace with condition variable at some point + // but performance may not matter here. + constexpr auto max_wait_time = std::chrono::milliseconds{1000}; + constexpr auto query_interval = std::chrono::milliseconds{10}; + auto _orig_active = _active_async_packets.load(std::memory_order_relaxed); + auto _curr_active = _orig_active; + auto inactive = common::yield( + [this, &_curr_active]() { + return ((_curr_active = _active_async_packets.load(std::memory_order_relaxed)) == 0); + }, + max_wait_time, + query_interval); + + LOG_IF(WARNING, !inactive) + << "rocprofiler-sdk Queue (instance=" << this << ") abandoned waiting for " << _orig_active + << " async completion callbacks after " << max_wait_time.count() << " msecs. There were " + << _curr_active << " async completion callbacks which were not delivered at that time."; +} + void Queue::register_callback(ClientID id, queue_cb_t enqueue_cb, completed_cb_t complete_cb) { diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp index 95323a22e7..43c1b683dc 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.hpp @@ -22,6 +22,7 @@ #pragma once +#include #include #include @@ -113,6 +114,9 @@ public: hsa_signal_t interrupt_signal = {}; rocprofiler_thread_id_t tid = common::get_tid(); rocprofiler_kernel_id_t kernel_id = 0; + rocprofiler_queue_id_t queue_id = {}; + hsa_agent_t hsa_agent = {}; + const rocprofiler_agent_t* rocp_agent = nullptr; context::correlation_id* correlation_id = nullptr; rocprofiler_packet kernel_pkt = {}; context_array_t contexts = {}; @@ -169,6 +173,7 @@ public: // have comleted. void async_started() { _active_async_packets++; } void async_complete() { _active_async_packets--; } + void sync() const; void register_callback(ClientID id, queue_cb_t enqueue_cb, completed_cb_t complete_cb); void remove_callback(ClientID id); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp index b445d45d4e..4274833cb3 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp @@ -21,6 +21,7 @@ // THE SOFTWARE. #include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" +#include "lib/common/static_object.hpp" #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" @@ -215,11 +216,22 @@ QueueController::get_queue(const hsa_queue_t& _hsa_queue) const _hsa_queue); } +void +QueueController::iterate_queues(const queue_iterator_cb_t& cb) const +{ + _queues.rlock([&cb](const queue_map_t& _queues_v) { + for(const auto& itr : _queues_v) + { + if(itr.second) cb(itr.second.get()); + } + }); +} + QueueController& get_queue_controller() { - static QueueController controller; - return controller; + static auto*& controller = common::static_object::construct(); + return *(CHECK_NOTNULL(controller)); } void @@ -227,5 +239,11 @@ queue_controller_init(HsaApiTable* table) { get_queue_controller().init(*table->core_, *table->amd_ext_); } + +void +queue_controller_fini() +{ + get_queue_controller().iterate_queues([](const Queue* _queue) { _queue->sync(); }); +} } // namespace hsa } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp index 1c4a8c692c..dd008da3f2 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.hpp @@ -27,6 +27,7 @@ #include "lib/rocprofiler-sdk/hsa/queue.hpp" #include +#include #include #include #include @@ -39,6 +40,8 @@ namespace hsa class QueueController { public: + using queue_iterator_cb_t = std::function; + QueueController() = default; // Initializes the QueueInterceptor. This must be delayed until // HSA has been inited. @@ -65,6 +68,8 @@ public: const Queue* get_queue(const hsa_queue_t&) const; + void iterate_queues(const queue_iterator_cb_t&) const; + private: using agent_callback_tuple_t = std::tuple; @@ -85,5 +90,7 @@ get_queue_controller(); void queue_controller_init(HsaApiTable* table); +void +queue_controller_fini(); } // namespace hsa } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp index 6cd18cca84..28d0ffa60e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp @@ -24,11 +24,14 @@ #include -#include +#include "lib/common/mpl.hpp" #include "fmt/core.h" #include "fmt/ranges.h" +#include +#include + #include #include #include @@ -47,34 +50,41 @@ namespace hsa { namespace utils { -template -struct is_pair_impl -{ - static constexpr auto value = false; -}; - -template -struct is_pair_impl> -{ - static constexpr auto value = true; -}; - -template -struct is_pair : is_pair_impl>>> -{}; - template auto stringize_impl(const Tp& _v) { - if constexpr(is_pair::value) + using nonpointer_type = typename std::remove_pointer_t; + + if constexpr(common::mpl::is_pair::value) { return std::make_pair(stringize_impl(_v.first), stringize_impl(_v.second)); } + else if constexpr(std::is_constructible::value) + { + auto _ss = std::stringstream{}; + _ss << _v; + return _ss.str(); + } else if constexpr(fmt::is_formattable::value && !std::is_pointer::value) { return fmt::format("{}", _v); } + else if constexpr(std::is_pointer::value && !std::is_pointer::value && + common::mpl::is_type_complete_v && + !std::is_void::value) + { + if(_v) + { + return stringize_impl(*_v); + } + else + { + auto _ss = std::stringstream{}; + _ss << _v; + return _ss.str(); + } + } else { auto _ss = std::stringstream{}; @@ -116,6 +126,10 @@ struct handle_formatter : handle_formatter #if ROCPROFILER_HSA_RUNTIME_EXT_AMD_VERSION >= 10300 namespace fmt { +template <> +struct formatter : rocprofiler::hsa::utils::handle_formatter +{}; + template <> struct formatter : rocprofiler::hsa::utils::handle_formatter @@ -126,6 +140,32 @@ struct formatter : rocprofiler::hsa::utils::handle_formatter {}; +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(hsa_access_permission_t v, Ctx& ctx) const + { + auto label = [v]() -> std::string_view { + switch(v) + { + case HSA_ACCESS_PERMISSION_NONE: return "NONE"; + case HSA_ACCESS_PERMISSION_RO: return "READ_ONLY"; + case HSA_ACCESS_PERMISSION_WO: return "WRITE_ONLY"; + case HSA_ACCESS_PERMISSION_RW: return "READ_WRITE"; + } + return "NONE"; + }(); + return fmt::format_to(ctx.out(), "{}", label); + } +}; + template <> struct formatter { diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp index c5fa8f2429..65089c3520 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/intercept_table.cpp @@ -32,6 +32,7 @@ #include "lib/rocprofiler-sdk/registration.hpp" #include +#include #include #include @@ -50,8 +51,9 @@ using library_sequence_t = std::integer_sequence{}; + ROCPROFILER_HIP_RUNTIME_LIBRARY, + ROCPROFILER_MARKER_LIBRARY, + ROCPROFILER_HIP_COMPILER_LIBRARY>{}; // check that intercept_library_seq is up to date static_assert((1 << (intercept_library_seq.size())) == ROCPROFILER_LIBRARY_LAST, @@ -161,6 +163,16 @@ template void notify_runtime_api_registration(rocprofiler_runtime_library_t, uint64_t, uint64_t, std::tuple); + +template void notify_runtime_api_registration(rocprofiler_runtime_library_t, + uint64_t, + uint64_t, + std::tuple); + +template void notify_runtime_api_registration(rocprofiler_runtime_library_t, + uint64_t, + uint64_t, + std::tuple); } // namespace intercept_table } // namespace rocprofiler @@ -177,8 +189,6 @@ rocprofiler_at_runtime_api_registration(rocprofiler_intercept_library_cb_t callb if((libs & ROCPROFILER_LIBRARY) == ROCPROFILER_LIBRARY) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; - else if((libs & ROCPROFILER_HIP_LIBRARY) == ROCPROFILER_HIP_LIBRARY) - return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; else if((libs & ROCPROFILER_MARKER_LIBRARY) == ROCPROFILER_MARKER_LIBRARY) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp index effef6e926..a94a68fa8d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp @@ -69,10 +69,12 @@ using creation_notifier_cb_t = void (*)(rocprofiler_runtime_library_t, void*); using thread_pool_config_t = PTL::ThreadPool::Config; // this is used to loop over the different libraries -constexpr auto creation_notifier_library_seq = library_sequence_t{}; +constexpr auto creation_notifier_library_seq = + library_sequence_t{}; // check that creation_notifier_library_seq is up to date static_assert((1 << (creation_notifier_library_seq.size() - 1)) == ROCPROFILER_LIBRARY_LAST, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp index ac518d1960..c2bb388682 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp @@ -26,6 +26,7 @@ #include "lib/common/static_object.hpp" #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/hip/hip.hpp" #include "lib/rocprofiler-sdk/hsa/async_copy.hpp" #include "lib/rocprofiler-sdk/hsa/code_object.hpp" #include "lib/rocprofiler-sdk/hsa/hsa.hpp" @@ -51,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -515,10 +517,17 @@ set_fini_status(int v) void initialize() { - if(get_init_status() != 0) return; + LOG(INFO) << "rocprofiler initialize called..."; + + if(get_init_status() != 0) + { + LOG(INFO) << "rocprofiler initialize ignored..."; + return; + } static auto _once = std::once_flag{}; std::call_once(_once, []() { + LOG(INFO) << "rocprofiler initialize started..."; // initialization is in process set_init_status(-1); std::atexit([]() { @@ -558,6 +567,7 @@ finalize() std::call_once(_once, []() { set_fini_status(-1); hsa::async_copy_fini(); + hsa::queue_controller_fini(); hsa::code_object_shutdown(); if(get_init_status() > 0) { @@ -588,6 +598,8 @@ rocprofiler_is_finalized(int* status) rocprofiler_status_t rocprofiler_force_configure(rocprofiler_configure_func_t configure_func) { + LOG(INFO) << "forcing rocprofiler configuration"; + auto& forced_config = rocprofiler::registration::get_forced_configure(); // init status may be -1 (currently initializing) or 1 (already initialized). @@ -632,9 +644,47 @@ rocprofiler_set_api_table(const char* name, { // pass to hip init LOG_IF(ERROR, num_tables > 1) - << " rocprofiler expected HIP library to pass 1 API table, not " << num_tables; + << " rocprofiler expected HIP library to pass 1 API table for " << name << ", not " + << num_tables; - return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; + auto* hip_runtime_api_table = static_cast(*tables); + + // any internal modifications to the HipDispatchTable need to be done before we make the + // copy or else those modifications will be lost when HIP API tracing is enabled + // because the HIP API tracing invokes the function pointers from the copy below + rocprofiler::hip::copy_table(hip_runtime_api_table); + + // install rocprofiler API wrappers + rocprofiler::hip::update_table(hip_runtime_api_table); + + rocprofiler::intercept_table::notify_runtime_api_registration( + ROCPROFILER_HIP_RUNTIME_LIBRARY, + lib_version, + lib_instance, + std::make_tuple(hip_runtime_api_table)); + } + else if(std::string_view{name} == "hip_compiler") + { + // pass to hip init + LOG_IF(ERROR, num_tables > 1) + << " rocprofiler expected HIP library to pass 1 API table for " << name << ", not " + << num_tables; + + auto* hip_compiler_api_table = static_cast(*tables); + + // any internal modifications to the HipCompilerDispatchTable need to be done before we make + // the copy or else those modifications will be lost when HIP API tracing is enabled because + // the HIP API tracing invokes the function pointers from the copy below + rocprofiler::hip::copy_table(hip_compiler_api_table); + + // install rocprofiler API wrappers + rocprofiler::hip::update_table(hip_compiler_api_table); + + rocprofiler::intercept_table::notify_runtime_api_registration( + ROCPROFILER_HIP_COMPILER_LIBRARY, + lib_version, + lib_instance, + std::make_tuple(hip_compiler_api_table)); } else if(std::string_view{name} == "hsa") { diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt index 1cf57a3d2f..8de1f8fadd 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt @@ -11,7 +11,7 @@ include(GoogleTest) # # -------------------------------------------------------------------------------------- # -set(rocprofiler_lib_sources agent.cpp buffer.cpp timestamp.cpp version.cpp) +set(rocprofiler_lib_sources agent.cpp buffer.cpp hsa.cpp timestamp.cpp version.cpp) add_executable(rocprofiler-lib-tests) target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources} details/agent.cpp) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/hsa.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/hsa.cpp new file mode 100644 index 0000000000..2b96edfd48 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/hsa.cpp @@ -0,0 +1,89 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include "lib/rocprofiler-sdk/hsa/hsa.hpp" + +#include + +TEST(hsa, tables) +{ + namespace hsa = ::rocprofiler::hsa; + + // version of HsaApiTable + auto version = hsa::get_table_version(); + + // HsaApiTable components + auto* core = hsa::get_core_table(); + auto* amd_ext = hsa::get_amd_ext_table(); + auto* fini_ext = hsa::get_fini_ext_table(); + auto* img_ext = hsa::get_img_ext_table(); + + // HsaApiTable instance + auto table = hsa::get_table(); + + //------------------------------------------------------------------------// + // checks against HSA headers + //------------------------------------------------------------------------// + + // make sure the version matches values from HSA header + EXPECT_EQ(version.major_id, HSA_API_TABLE_MAJOR_VERSION); + EXPECT_EQ(version.minor_id, sizeof(hsa::hsa_api_table_t)); + EXPECT_EQ(version.step_id, HSA_API_TABLE_STEP_VERSION); + + // make sure the version matches values from HSA header + EXPECT_EQ(core->version.major_id, HSA_CORE_API_TABLE_MAJOR_VERSION); + EXPECT_EQ(core->version.minor_id, sizeof(hsa::hsa_core_table_t)); + EXPECT_EQ(core->version.step_id, HSA_CORE_API_TABLE_STEP_VERSION); + + // make sure the version matches values from HSA header + EXPECT_EQ(amd_ext->version.major_id, HSA_AMD_EXT_API_TABLE_MAJOR_VERSION); + EXPECT_EQ(amd_ext->version.minor_id, sizeof(hsa::hsa_amd_ext_table_t)); + EXPECT_EQ(amd_ext->version.step_id, HSA_AMD_EXT_API_TABLE_STEP_VERSION); + + // make sure the version matches values from HSA header + EXPECT_EQ(fini_ext->version.major_id, HSA_FINALIZER_API_TABLE_MAJOR_VERSION); + EXPECT_EQ(fini_ext->version.minor_id, sizeof(hsa::hsa_fini_ext_table_t)); + EXPECT_EQ(fini_ext->version.step_id, HSA_FINALIZER_API_TABLE_STEP_VERSION); + + // make sure the version matches values from HSA header + EXPECT_EQ(img_ext->version.major_id, HSA_IMAGE_API_TABLE_MAJOR_VERSION); + EXPECT_EQ(img_ext->version.minor_id, sizeof(hsa::hsa_img_ext_table_t)); + EXPECT_EQ(img_ext->version.step_id, HSA_IMAGE_API_TABLE_STEP_VERSION); + + //------------------------------------------------------------------------// + // checks between instances + //------------------------------------------------------------------------// + + // make sure the get_table_version is same as what is in HsaApiTable + EXPECT_EQ(table.version.major_id, version.major_id); + EXPECT_EQ(table.version.minor_id, version.minor_id); + EXPECT_EQ(table.version.step_id, version.step_id); + + // make sure HsaApiTable has same pointers + EXPECT_EQ(table.core_, core); + EXPECT_EQ(table.amd_ext_, amd_ext); + EXPECT_EQ(table.finalizer_ext_, fini_ext); + EXPECT_EQ(table.image_ext_, img_ext); +} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/intercept_table.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/intercept_table.cpp index 7403eaa103..8807802083 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/intercept_table.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/intercept_table.cpp @@ -32,6 +32,7 @@ #include "lib/common/utility.hpp" #include +#include #include #include @@ -71,7 +72,8 @@ struct callback_data rocprofiler_client_id_t* client_id = nullptr; rocprofiler_client_finalize_t client_fini_func = nullptr; - rocprofiler_context_id_t client_ctx = {}; + rocprofiler_context_id_t client_hsa_ctx = {}; + rocprofiler_context_id_t client_hip_ctx = {}; rocprofiler_buffer_id_t client_buffer = {}; rocprofiler_callback_thread_t client_thread = {}; uint64_t client_workflow_count = {}; @@ -260,7 +262,7 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing) cb_data->client_workflow_count++; cb_data->client_fini_func = fini_func; - ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx), + ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_hsa_ctx), "failed to create context"); auto operations = std::vector{ROCPROFILER_HSA_API_ID_hsa_init, @@ -269,7 +271,7 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing) ROCPROFILER_HSA_API_ID_hsa_shut_down}; ROCPROFILER_CALL( - rocprofiler_configure_callback_tracing_service(cb_data->client_ctx, + rocprofiler_configure_callback_tracing_service(cb_data->client_hsa_ctx, ROCPROFILER_CALLBACK_TRACING_HSA_API, operations.data(), operations.size(), @@ -278,12 +280,12 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing) "callback tracing service failed to configure"); int valid_ctx = 0; - ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx), + ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_hsa_ctx, &valid_ctx), "failure checking context validity"); EXPECT_EQ(valid_ctx, 1); - ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx), + ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_hsa_ctx), "rocprofiler context start failed"); // no errors @@ -292,7 +294,7 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing) static fini_func_t tool_fini = [](void* client_data) -> void { auto* cb_data = static_cast(client_data); - ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx), + ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_hsa_ctx), "rocprofiler context stop failed"); static_cast(client_data)->client_workflow_count++; @@ -329,44 +331,24 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing) ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT); using init_list_t = std::initializer_list; - for(auto itr : init_list_t{ - (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | ROCPROFILER_MARKER_LIBRARY), - (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY), - (ROCPROFILER_HIP_LIBRARY), - (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY), - (ROCPROFILER_MARKER_LIBRARY)}) + for(auto itr : init_list_t{(ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | + ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY), + (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY), + (ROCPROFILER_MARKER_LIBRARY)}) { ROCPROFILER_CALL_EXPECT( rocprofiler_at_runtime_api_registration( api_registration_callback, itr, static_cast(&cb_data)), - "test should be updated if new (non-HSA) intercept table is supported", + "test should be updated if new (non-HSA, non-HIP) intercept table is supported", ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); } - ROCPROFILER_CALL_EXPECT(rocprofiler_at_runtime_api_registration( - api_registration_callback, - ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY, - static_cast(&cb_data)), - "test should be updated if HIP intercept table is supported", - ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); - - ROCPROFILER_CALL_EXPECT( - rocprofiler_at_runtime_api_registration( - api_registration_callback, ROCPROFILER_HIP_LIBRARY, static_cast(&cb_data)), - "test should be updated if HIP intercept table is supported", - ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); - - ROCPROFILER_CALL_EXPECT(rocprofiler_at_runtime_api_registration( - api_registration_callback, - ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY, - static_cast(&cb_data)), - "test should be updated if ROCTx intercept table is supported", - ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); - - ROCPROFILER_CALL( - rocprofiler_at_runtime_api_registration( - api_registration_callback, ROCPROFILER_HSA_LIBRARY, static_cast(&cb_data)), - "HSA API intercept table registration failed"); + ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration( + api_registration_callback, + ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | + ROCPROFILER_HIP_COMPILER_LIBRARY, + static_cast(&cb_data)), + "HSA and HIP intercept table registration failed"); return &cfg_result; }; @@ -451,7 +433,7 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context) cb_data->client_workflow_count++; cb_data->client_fini_func = fini_func; - ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx), + ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_hsa_ctx), "failed to create context"); auto operations = std::vector{ROCPROFILER_HSA_API_ID_hsa_init, @@ -460,7 +442,7 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context) ROCPROFILER_HSA_API_ID_hsa_shut_down}; ROCPROFILER_CALL( - rocprofiler_configure_callback_tracing_service(cb_data->client_ctx, + rocprofiler_configure_callback_tracing_service(cb_data->client_hsa_ctx, ROCPROFILER_CALLBACK_TRACING_HSA_API, operations.data(), operations.size(), @@ -469,12 +451,12 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context) "callback tracing service failed to configure"); int valid_ctx = 0; - ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx), + ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_hsa_ctx, &valid_ctx), "failure checking context validity"); EXPECT_EQ(valid_ctx, 1); - ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx), + ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_hsa_ctx), "rocprofiler context start failed"); // no errors @@ -483,11 +465,11 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context) static fini_func_t tool_fini = [](void* client_data) -> void { auto* cb_data = static_cast(client_data); - ROCPROFILER_CALL_EXPECT(rocprofiler_stop_context(cb_data->client_ctx), + ROCPROFILER_CALL_EXPECT(rocprofiler_stop_context(cb_data->client_hsa_ctx), "rocprofiler context stop", ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND); - ROCPROFILER_CALL_EXPECT(rocprofiler_start_context(cb_data->client_ctx), + ROCPROFILER_CALL_EXPECT(rocprofiler_start_context(cb_data->client_hsa_ctx), "rocprofiler context start", ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED); @@ -517,42 +499,29 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context) cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); ROCPROFILER_CALL_EXPECT( - rocprofiler_at_runtime_api_registration(api_registration_callback, - ROCPROFILER_LIBRARY | ROCPROFILER_HSA_LIBRARY | - ROCPROFILER_HIP_LIBRARY | - ROCPROFILER_MARKER_LIBRARY, - static_cast(&cb_data)), + rocprofiler_at_runtime_api_registration( + api_registration_callback, + ROCPROFILER_LIBRARY | ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | + ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY, + static_cast(&cb_data)), "function should return invalid argument if ROCPROFILER_LIBRARY included", ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT); using init_list_t = std::initializer_list; - for(auto itr : init_list_t{ - (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | ROCPROFILER_MARKER_LIBRARY), - (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY), - (ROCPROFILER_HIP_LIBRARY), - (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY), - (ROCPROFILER_MARKER_LIBRARY)}) + for(auto itr : init_list_t{(ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | + ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY), + (ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY), + (ROCPROFILER_HIP_LIBRARY | ROCPROFILER_MARKER_LIBRARY), + (ROCPROFILER_HIP_COMPILER_LIBRARY | ROCPROFILER_MARKER_LIBRARY), + (ROCPROFILER_MARKER_LIBRARY)}) { ROCPROFILER_CALL_EXPECT( rocprofiler_at_runtime_api_registration( api_registration_callback, itr, static_cast(&cb_data)), - "test should be updated if new (non-HSA) intercept table is supported", + "test should be updated if new (non-HSA, non-HIP) intercept table is supported", ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); } - ROCPROFILER_CALL_EXPECT(rocprofiler_at_runtime_api_registration( - api_registration_callback, - ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY, - static_cast(&cb_data)), - "test should be updated if HIP intercept table is supported", - ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); - - ROCPROFILER_CALL_EXPECT( - rocprofiler_at_runtime_api_registration( - api_registration_callback, ROCPROFILER_HIP_LIBRARY, static_cast(&cb_data)), - "test should be updated if HIP intercept table is supported", - ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); - ROCPROFILER_CALL_EXPECT(rocprofiler_at_runtime_api_registration( api_registration_callback, ROCPROFILER_HSA_LIBRARY | ROCPROFILER_MARKER_LIBRARY, @@ -560,10 +529,12 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context) "test should be updated if ROCTx intercept table is supported", ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED); - ROCPROFILER_CALL( - rocprofiler_at_runtime_api_registration( - api_registration_callback, ROCPROFILER_HSA_LIBRARY, static_cast(&cb_data)), - "HSA API intercept table registration failed"); + ROCPROFILER_CALL(rocprofiler_at_runtime_api_registration( + api_registration_callback, + ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY | + ROCPROFILER_HIP_COMPILER_LIBRARY, + static_cast(&cb_data)), + "HSA and HIP API intercept table registration failed"); return &cfg_result; }; @@ -575,11 +546,11 @@ TEST(rocprofiler_lib, intercept_table_and_callback_tracing_disable_context) _data->first->agent_count++; if(int _is_active = 0; - rocprofiler_context_is_active(_data->second->client_ctx, &_is_active) == + rocprofiler_context_is_active(_data->second->client_hsa_ctx, &_is_active) == ROCPROFILER_STATUS_SUCCESS && _is_active != 0) { - ROCPROFILER_CALL(rocprofiler_stop_context(_data->second->client_ctx), + ROCPROFILER_CALL(rocprofiler_stop_context(_data->second->client_hsa_ctx), "rocprofiler context stop failed"); } diff --git a/projects/rocprofiler-sdk/source/scripts/deduce-sanitizer-lib.sh b/projects/rocprofiler-sdk/source/scripts/deduce-sanitizer-lib.sh new file mode 100755 index 0000000000..847dcff81b --- /dev/null +++ b/projects/rocprofiler-sdk/source/scripts/deduce-sanitizer-lib.sh @@ -0,0 +1,25 @@ +#!/bin/bash -e + +TEST_SOURCE=${PWD}/deduce-sanitizer-lib.cpp +TEST_BINARY=${PWD}/deduce-sanitizer-lib.out +TEST_RESULT=${PWD}/deduce-sanitizer-lib.txt + +LIBNAME=${1} +shift + +cat << EOF > ${TEST_SOURCE} +#include + +int +main(int argc, char** argv) +{ + auto ret = 0; + if(argc > 1) ret = std::stoi(argv[1]); + return ret; +} +EOF + +${@} ${TEST_SOURCE} -o ${TEST_BINARY} &> /dev/stderr + +ldd ${TEST_BINARY} | grep ${LIBNAME} | sed -E 's/.* => //g' | awk '{print $1}' > ${TEST_RESULT} +cat ${TEST_RESULT} diff --git a/projects/rocprofiler-sdk/source/scripts/thread-sanitizer-suppr.txt b/projects/rocprofiler-sdk/source/scripts/thread-sanitizer-suppr.txt index f53a0ee7c0..4cb71f7223 100644 --- a/projects/rocprofiler-sdk/source/scripts/thread-sanitizer-suppr.txt +++ b/projects/rocprofiler-sdk/source/scripts/thread-sanitizer-suppr.txt @@ -8,6 +8,9 @@ thread:libhsa-runtime64.so # data race in operator delete(void*) race:libamdhip64.so +# data race arising from hsa runtime +race:libhsa-runtime64.so + # unlock of an unlocked mutex (or by a wrong thread) mutex:librocm_smi64.so diff --git a/projects/rocprofiler-sdk/tests/CMakeLists.txt b/projects/rocprofiler-sdk/tests/CMakeLists.txt index 51b9ba203c..83749a972f 100644 --- a/projects/rocprofiler-sdk/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/CMakeLists.txt @@ -27,6 +27,10 @@ set(CMAKE_HIP_STANDARD_REQUIRED ON) enable_testing() include(CTest) +include(GNUInstallDirs) +# always use lib instead of lib64 +set(CMAKE_INSTALL_LIBDIR "lib") + # needed for validation find_package(Python3 REQUIRED) diff --git a/projects/rocprofiler-sdk/tests/apps/simple-transpose/CMakeLists.txt b/projects/rocprofiler-sdk/tests/apps/simple-transpose/CMakeLists.txt index 3310175e9d..6ad5b7d54c 100644 --- a/projects/rocprofiler-sdk/tests/apps/simple-transpose/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/apps/simple-transpose/CMakeLists.txt @@ -41,6 +41,10 @@ target_compile_options(simple-transpose PRIVATE -W -Wall -Wextra -Wpedantic -Wsh find_package(Threads REQUIRED) target_link_libraries(simple-transpose PRIVATE Threads::Threads) +find_package(rocprofiler-sdk-roctx REQUIRED) +target_link_libraries(simple-transpose + PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx) + install( TARGETS simple-transpose DESTINATION bin diff --git a/projects/rocprofiler-sdk/tests/apps/simple-transpose/simple-transpose.cpp b/projects/rocprofiler-sdk/tests/apps/simple-transpose/simple-transpose.cpp index c7daa0f801..b5faac265c 100644 --- a/projects/rocprofiler-sdk/tests/apps/simple-transpose/simple-transpose.cpp +++ b/projects/rocprofiler-sdk/tests/apps/simple-transpose/simple-transpose.cpp @@ -24,7 +24,10 @@ THE SOFTWARE. #include // hip header file -#include "hip/hip_runtime.h" +#include + +// roctx header file +#include #define WIDTH 1024 @@ -81,6 +84,8 @@ matrixTransposeCPUReference(float* output, float* input, const unsigned int widt int main() { + roctxRangePush("main"); + float* Matrix; float* TransposeMatrix; float* cpuTransposeMatrix; @@ -113,6 +118,15 @@ main() // Memory transfer from host to device HIP_API_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); + auto tid = roctx_thread_id_t{}; + roctxGetThreadId(&tid); + roctxProfilerPause(tid); + // Memory transfer that should be hidden by profiling tool + HIP_API_CALL( + hipMemcpy(gpuTransposeMatrix, gpuMatrix, NUM * sizeof(float), hipMemcpyDeviceToDevice)); + roctxProfilerResume(tid); + + roctxMark("pre-kernel-launch"); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), @@ -122,6 +136,7 @@ main() gpuTransposeMatrix, gpuMatrix, WIDTH); + roctxMark("post-kernel-launch"); // Memory transfer from device to host HIP_API_CALL( @@ -158,5 +173,7 @@ main() free(TransposeMatrix); free(cpuTransposeMatrix); + roctxRangePop(); + return errors; } diff --git a/projects/rocprofiler-sdk/tests/common/serialization.hpp b/projects/rocprofiler-sdk/tests/common/serialization.hpp index 00484d5abf..9e638498f1 100644 --- a/projects/rocprofiler-sdk/tests/common/serialization.hpp +++ b/projects/rocprofiler-sdk/tests/common/serialization.hpp @@ -183,6 +183,22 @@ save(ArchiveT& ar, rocprofiler_callback_tracing_marker_api_data_t data) SAVE_DATA_FIELD(retval); } +template +void +save(ArchiveT& ar, rocprofiler_hip_api_retval_t data) +{ + SAVE_DATA_FIELD(hipError_t_retval); +} + +template +void +save(ArchiveT& ar, rocprofiler_callback_tracing_hip_api_data_t data) +{ + SAVE_DATA_FIELD(size); + // SAVE_DATA_FIELD(args); + SAVE_DATA_FIELD(retval); +} + template void save(ArchiveT& ar, rocprofiler_callback_tracing_record_t data) diff --git a/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py b/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py index 78f78392b9..01c68d056a 100644 --- a/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py +++ b/projects/rocprofiler-sdk/tests/kernel-tracing/validate.py @@ -28,12 +28,14 @@ def test_data_structure(input_data): node_exists("code_objects", sdk_data["callback_records"]) node_exists("kernel_symbols", sdk_data["callback_records"]) node_exists("hsa_api_traces", sdk_data["callback_records"]) + node_exists("hip_api_traces", sdk_data["callback_records"], 0) node_exists("marker_api_traces", sdk_data["callback_records"]) node_exists("names", sdk_data["buffer_records"]) node_exists("kernel_dispatches", sdk_data["buffer_records"]) node_exists("memory_copies", sdk_data["buffer_records"], 0) node_exists("hsa_api_traces", sdk_data["buffer_records"]) + node_exists("hip_api_traces", sdk_data["buffer_records"], 0) node_exists("marker_api_traces", sdk_data["buffer_records"]) @@ -43,7 +45,7 @@ def test_timestamps(input_data): cb_start = {} cb_end = {} - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["callback_records"][titr]: cid = itr["record"]["correlation_id"]["internal"] phase = itr["record"]["phase"] @@ -77,7 +79,7 @@ def test_internal_correlation_ids(input_data): sdk_data = data["rocprofiler-sdk-json-tool"] api_corr_ids = [] - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["callback_records"][titr]: api_corr_ids.append(itr["record"]["correlation_id"]["internal"]) @@ -103,7 +105,7 @@ def test_external_correlation_ids(input_data): sdk_data = data["rocprofiler-sdk-json-tool"] extern_corr_ids = [] - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["callback_records"][titr]: assert itr["record"]["correlation_id"]["external"] > 0 assert ( @@ -112,7 +114,7 @@ def test_external_correlation_ids(input_data): extern_corr_ids.append(itr["record"]["correlation_id"]["external"]) extern_corr_ids = list(set(sorted(extern_corr_ids))) - for titr in ["hsa_api_traces", "marker_api_traces"]: + for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]: for itr in sdk_data["buffer_records"][titr]: assert itr["correlation_id"]["external"] > 0 assert itr["thread_id"] == itr["correlation_id"]["external"] diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/tracing/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/tracing/CMakeLists.txt index 57dd050ef3..afff7204d1 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/tracing/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/tracing/CMakeLists.txt @@ -14,8 +14,8 @@ add_test( NAME rocprofv3-test-trace-execute COMMAND $ --hsa-trace --kernel-trace - --memory-copy-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace -o out - $) + --memory-copy-trace --marker-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace -o + out $) string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") @@ -39,9 +39,27 @@ add_test( --kernel-input ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_kernel_trace.csv --memory-copy-input - ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv) + ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv + --marker-input + ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv) + +set(VALIDATION_FILES + ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_hsa_api_trace.csv + ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_kernel_trace.csv + ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_memory_copy_trace.csv + ${CMAKE_CURRENT_BINARY_DIR}/simple-transpose-trace/out_marker_api_trace.csv) set_tests_properties( rocprofv3-test-trace-validate - PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS rocprofv3-test-trace-execute - FAIL_REGULAR_EXPRESSION "AssertionError") + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + DEPENDS + rocprofv3-test-trace-execute + FAIL_REGULAR_EXPRESSION + "AssertionError" + REQUIRED_FILES + "${VALIDATION_FILES}" + ATTACHED_FILES_ON_FAIL + "${VALIDATION_FILES}") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/tracing/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/tracing/conftest.py index febffdf809..e3f88e1884 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/tracing/conftest.py +++ b/projects/rocprofiler-sdk/tests/rocprofv3/tracing/conftest.py @@ -20,6 +20,16 @@ def pytest_addoption(parser): action="store", help="Path to memory-copy tracing CSV file.", ) + parser.addoption( + "--marker-input", + action="store", + help="Path to marker API tracing CSV file.", + ) + parser.addoption( + "--hip-input", + action="store", + help="Path to HIP runtime and compiler API tracing CSV file.", + ) @pytest.fixture @@ -56,3 +66,27 @@ def memory_copy_input_data(request): data.append(row) return data + + +@pytest.fixture +def marker_input_data(request): + filename = request.config.getoption("--marker-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data + + +@pytest.fixture +def hip_input_data(request): + filename = request.config.getoption("--hip-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/tracing/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/tracing/validate.py index e051a39fb3..401a5c5c6c 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/tracing/validate.py +++ b/projects/rocprofiler-sdk/tests/rocprofv3/tracing/validate.py @@ -17,11 +17,14 @@ def test_hsa_api_trace(hsa_input_data): correlation_ids = sorted(list(set(correlation_ids))) + hsa_api_calls_offset = 2 # roctxRangePush is first + num_marker_api_calls = 6 # six marker API calls + # all correlation ids are unique assert len(correlation_ids) == len(hsa_input_data) # correlation ids are numbered from 1 to N - assert correlation_ids[0] == 1 - assert correlation_ids[-1] == len(correlation_ids) + assert correlation_ids[0] == hsa_api_calls_offset + assert correlation_ids[-1] == len(correlation_ids) + num_marker_api_calls functions = list(set(functions)) assert "hsa_amd_memory_async_copy_on_engine" in functions @@ -63,6 +66,21 @@ def test_memory_copy_trace(memory_copy_input_data): assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) +def test_marker_api_trace(marker_input_data): + functions = [] + for row in marker_input_data: + assert row["Domain"] == "MARKER_API" + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int( + row["Process_Id"] + ) + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + functions.append(row["Function"]) + + functions = list(set(functions)) + assert "main" in functions + + if __name__ == "__main__": exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp index 0ceebb34ae..8440327d0c 100644 --- a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp +++ b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp @@ -129,7 +129,9 @@ callback_name_info get_callback_tracing_names() { static const auto supported = std::unordered_set{ - ROCPROFILER_CALLBACK_TRACING_HSA_API, ROCPROFILER_CALLBACK_TRACING_MARKER_API}; + ROCPROFILER_CALLBACK_TRACING_HSA_API, + ROCPROFILER_CALLBACK_TRACING_MARKER_API, + ROCPROFILER_CALLBACK_TRACING_HIP_API}; auto cb_name_info = callback_name_info{}; // @@ -237,6 +239,33 @@ using callback_payload_t = rocprofiler_callback_tracing_hsa_api_data_t, rocprofiler_callback_tracing_marker_api_data_t>; +using callback_arg_array_t = std::vector>; + +template +void +serialize_args(ArchiveT& ar, const callback_arg_array_t& data) +{ + ar.setNextName("args"); + ar.startNode(); + for(const auto& itr : data) + ar(cereal::make_nvp(itr.first, itr.second)); + ar.finishNode(); +} + +int +save_args(rocprofiler_callback_tracing_kind_t, + uint32_t, + uint32_t, + const char* arg_name, + const char* arg_value_str, + const void* const, + void* data) +{ + auto* argvec = static_cast(data); + argvec->emplace_back(arg_name, arg_value_str); + return 0; +} + struct code_object_callback_record_t { uint64_t timestamp = 0; @@ -272,6 +301,7 @@ struct hsa_api_callback_record_t uint64_t timestamp = 0; rocprofiler_callback_tracing_record_t record = {}; rocprofiler_callback_tracing_hsa_api_data_t payload = {}; + callback_arg_array_t args = {}; template void save(ArchiveT& ar) const @@ -279,6 +309,24 @@ struct hsa_api_callback_record_t ar(cereal::make_nvp("timestamp", timestamp)); ar(cereal::make_nvp("record", record)); ar(cereal::make_nvp("payload", payload)); + serialize_args(ar, args); + } +}; + +struct hip_api_callback_record_t +{ + uint64_t timestamp = 0; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_callback_tracing_hip_api_data_t payload = {}; + callback_arg_array_t args = {}; + + template + void save(ArchiveT& ar) const + { + ar(cereal::make_nvp("timestamp", timestamp)); + ar(cereal::make_nvp("record", record)); + ar(cereal::make_nvp("payload", payload)); + serialize_args(ar, args); } }; @@ -301,6 +349,7 @@ auto code_object_records = std::deque{}; auto kernel_symbol_records = std::deque{}; auto hsa_api_cb_records = std::deque{}; auto marker_api_cb_records = std::deque{}; +auto hip_api_cb_records = std::deque{}; rocprofiler_thread_id_t push_external_correlation(); @@ -340,7 +389,18 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_API) { auto* data = static_cast(record.payload); - hsa_api_cb_records.emplace_back(hsa_api_callback_record_t{ts, record, *data}); + auto args = callback_arg_array_t{}; + rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, &args); + hsa_api_cb_records.emplace_back( + hsa_api_callback_record_t{ts, record, *data, std::move(args)}); + } + else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HIP_API) + { + auto* data = static_cast(record.payload); + auto args = callback_arg_array_t{}; + rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, &args); + hip_api_cb_records.emplace_back( + hip_api_callback_record_t{ts, record, *data, std::move(args)}); } else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API) { @@ -355,6 +415,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, auto hsa_api_bf_records = std::deque{}; auto marker_api_bf_records = std::deque{}; +auto hip_api_bf_records = std::deque{}; auto kernel_dispatch_records = std::deque{}; auto memory_copy_records = std::deque{}; @@ -413,6 +474,13 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, marker_api_bf_records.emplace_back(*record); } + else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_API) + { + auto* record = + static_cast(header->payload); + + hip_api_bf_records.emplace_back(*record); + } else if(header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) { auto* record = static_cast( @@ -481,27 +549,34 @@ flush(); // contexts rocprofiler_context_id_t hsa_api_callback_ctx = {}; +rocprofiler_context_id_t hip_api_callback_ctx = {}; rocprofiler_context_id_t marker_api_callback_ctx = {}; rocprofiler_context_id_t code_object_ctx = {}; rocprofiler_context_id_t hsa_api_buffered_ctx = {}; +rocprofiler_context_id_t hip_api_buffered_ctx = {}; rocprofiler_context_id_t marker_api_buffered_ctx = {}; rocprofiler_context_id_t kernel_dispatch_ctx = {}; rocprofiler_context_id_t memory_copy_ctx = {}; // buffers rocprofiler_buffer_id_t hsa_api_buffered_buffer = {}; +rocprofiler_buffer_id_t hip_api_buffered_buffer = {}; rocprofiler_buffer_id_t marker_api_buffered_buffer = {}; rocprofiler_buffer_id_t kernel_dispatch_buffer = {}; rocprofiler_buffer_id_t memory_copy_buffer = {}; auto contexts = std::unordered_map{ {"HSA_API_CALLBACK", &hsa_api_callback_ctx}, + {"HIP_API_CALLBACK", &hip_api_callback_ctx}, {"MARKER_API_CALLBACK", &marker_api_callback_ctx}, {"CODE_OBJECT", &code_object_ctx}, {"HSA_API_BUFFERED", &hsa_api_buffered_ctx}, + {"HIP_API_BUFFERED", &hip_api_buffered_ctx}, {"MARKER_API_BUFFERED", &marker_api_buffered_ctx}, {"KERNEL_DISPATCH", &kernel_dispatch_ctx}, {"MEMORY_COPY", &memory_copy_ctx}}; -auto buffers = std::array{&hsa_api_buffered_buffer, + +auto buffers = std::array{&hsa_api_buffered_buffer, + &hip_api_buffered_buffer, &marker_api_buffered_buffer, &kernel_dispatch_buffer, &memory_copy_buffer}; @@ -545,7 +620,16 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) 0, tool_tracing_callback, nullptr), - "hsa api tracing service configure"); + "hsa api callback tracing service configure"); + + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(hip_api_callback_ctx, + ROCPROFILER_CALLBACK_TRACING_HIP_API, + nullptr, + 0, + tool_tracing_callback, + nullptr), + "hip api callback tracing service configure"); ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service(code_object_ctx, @@ -577,6 +661,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) &hsa_api_buffered_buffer), "buffer creation"); + ROCPROFILER_CALL(rocprofiler_create_buffer(hip_api_buffered_ctx, + buffer_size, + watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + tool_tracing_buffered, + tool_data, + &hip_api_buffered_buffer), + "buffer creation"); + ROCPROFILER_CALL(rocprofiler_create_buffer(marker_api_buffered_ctx, buffer_size, watermark, @@ -612,6 +705,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) hsa_api_buffered_buffer), "buffer tracing service configure"); + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service(hip_api_buffered_ctx, + ROCPROFILER_BUFFER_TRACING_HIP_API, + nullptr, + 0, + hip_api_buffered_buffer), + "buffer tracing service configure"); + ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service(marker_api_buffered_ctx, ROCPROFILER_BUFFER_TRACING_MARKER_API, @@ -714,10 +815,12 @@ tool_fini(void* tool_data) << ", code_object_callback_records=" << code_object_records.size() << ", kernel_symbol_callback_records=" << kernel_symbol_records.size() << ", hsa_api_callback_records=" << hsa_api_cb_records.size() + << ", hip_api_callback_records=" << hip_api_cb_records.size() << ", marker_api_callback_records=" << marker_api_cb_records.size() << ", kernel_dispatch_records=" << kernel_dispatch_records.size() << ", memory_copy_records=" << memory_copy_records.size() << ", hsa_api_bf_records=" << hsa_api_bf_records.size() + << ", hip_api_bf_records=" << hip_api_bf_records.size() << ", marker_api_bf_records=" << marker_api_bf_records.size() << " ...\n" << std::flush; @@ -779,6 +882,7 @@ tool_fini(void* tool_data) json_ar(cereal::make_nvp("code_objects", code_object_records)); json_ar(cereal::make_nvp("kernel_symbols", kernel_symbol_records)); json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_cb_records)); + json_ar(cereal::make_nvp("hip_api_traces", hip_api_cb_records)); json_ar(cereal::make_nvp("marker_api_traces", marker_api_cb_records)); } catch(std::exception& e) { @@ -796,6 +900,7 @@ tool_fini(void* tool_data) json_ar(cereal::make_nvp("kernel_dispatches", kernel_dispatch_records)); json_ar(cereal::make_nvp("memory_copies", memory_copy_records)); json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records)); + json_ar(cereal::make_nvp("hip_api_traces", hip_api_bf_records)); json_ar(cereal::make_nvp("marker_api_traces", marker_api_bf_records)); } catch(std::exception& e) {