diff --git a/projects/clr/hipamd/.clang-format b/projects/clr/hipamd/.clang-format new file mode 100644 index 0000000000..5572a72cdd --- /dev/null +++ b/projects/clr/hipamd/.clang-format @@ -0,0 +1,10 @@ +Language: Cpp +BasedOnStyle: Google +AlignEscapedNewlinesLeft: false +AlignOperands: false +ColumnLimit: 100 +AlwaysBreakTemplateDeclarations: false +DerivePointerAlignment: false +IndentFunctionDeclarationAfterType: false +MaxEmptyLinesToKeep: 2 +SortIncludes: false diff --git a/projects/clr/hipamd/.gitattributes b/projects/clr/hipamd/.gitattributes new file mode 100644 index 0000000000..d5175f2f9c --- /dev/null +++ b/projects/clr/hipamd/.gitattributes @@ -0,0 +1,20 @@ +# Set the default behavior, in case people don't have core.autolf set. +* text=auto + +# Explicitly declare text files you want to always be normalized and converted +# to have LF line endings on checkout. +*.c text eol=lf +*.cpp text eol=lf +*.cc text eol=lf +*.h text eol=lf +*.hpp text eol=lf +*.txt text eol=lf + +# Define files to support auto-remove trailing white space +# Need to run the command below, before add modified file(s) to the staging area +# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"' +*.cpp filter=trimspace +*.c filter=trimspace +*.h filter=trimspacecpp +*.hpp filter=trimspace +*.md filter=trimspace \ No newline at end of file diff --git a/projects/clr/hipamd/.gitignore b/projects/clr/hipamd/.gitignore new file mode 100644 index 0000000000..fe07943cad --- /dev/null +++ b/projects/clr/hipamd/.gitignore @@ -0,0 +1,16 @@ +.* +!.gitignore +*.o +*.exe +*.swp +lib +packages +build +bin/hipInfo +bin/hipBusBandwidth +bin/hipDispatchLatency +bin/hipify-clang +tags +samples/1_Utils/hipInfo/hipInfo +samples/1_Utils/hipBusBandwidth/hipBusBandwidth +samples/1_Utils/hipDispatchLatency/hipDispatchLatency \ No newline at end of file diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt new file mode 100755 index 0000000000..38948ecf48 --- /dev/null +++ b/projects/clr/hipamd/CMakeLists.txt @@ -0,0 +1,480 @@ +# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.16.8) +project(hip) + +include(GNUInstallDirs) + +# sample command for hip-rocclr runtime, you'll need to have rocclr built +# ROCM_PATH is the path where ROCM is installed +# For shared lib of hip-rocclr runtime +# For release version +# cmake -DHIP_COMMON_DIR="$HIP_DIR" -DAMD_OPENCL_PATH=$OPENCL_DIR -DROCCLR_PATH=$ROCCLR_DIR -DCMAKE_PREFIX_PATH="/" -DCMAKE_INSTALL_PREFIX= .. +# For debug version +# cmake -DHIP_COMMON_DIR="$HIP_DIR" -DAMD_OPENCL_PATH=$OPENCL_DIR -DROCCLR_PATH=$ROCCLR_DIR -DCMAKE_PREFIX_PATH="/" -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX= .. +# For static lib of hip-rocclr runtime +# For release version +# cmake -DHIP_COMMON_DIR="$HIP_DIR" -DAMD_OPENCL_PATH=$OPENCL_DIR -DROCCLR_PATH=$ROCCLR_DIR -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="/" -DCMAKE_INSTALL_PREFIX= .. +# For debug version +# cmake -DHIP_COMMON_DIR="$HIP_DIR" -DAMD_OPENCL_PATH=$OPENCL_DIR -DROCCLR_PATH=$ROCCLR_DIR -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Debug -DCMAKE_PREFIX_PATH="/" -DCMAKE_INSTALL_PREFIX= .. +# If you don't specify CMAKE_INSTALL_PREFIX, hip-rocclr runtime will be installed to "/hip". +# By default, CMake will search for a folder named vdi or ROCclr relative to the current path. Specify -DROCCLR_PATH=$ROCCLR_DIR if rocclr source is in obscure location. +# By default, CMake will search for a folder named opencl or ROCm-OpenCL-Runtime relative to the current path. Specify -DAMD_OPENCL_PATH=$OPENCL_DIR if opencl source is in obscure location. +list(APPEND CMAKE_MODULE_PATH ${HIP_COMMON_DIR}/cmake) + +# required to add the right link to libhsa-runtime in install/lib path +# CMAKE_PREFIX_PATH is used as rpath to search for libs outside HIP +set(CMAKE_INSTALL_RPATH "${CMAKE_PREFIX_PATH}/${CMAKE_INSTALL_LIBDIR}") +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + +############################# +# Options +############################# +option(BUILD_HIPIFY_CLANG "Enable building the CUDA->HIP converter" OFF) +option(__HIP_ENABLE_PCH "Enable/Disable pre-compiled hip headers" ON) +option(HIP_OFFICIAL_BUILD "Enable/Disable for mainline/staging builds" OFF) +option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) +set(HIPCC_BIN_DIR "" CACHE STRING "HIPCC and HIPCONFIG binary directories") + +if(__HIP_ENABLE_PCH) + set(_pchStatus 1) +else() + set(_pchStatus 0) +endif() + +message(STATUS "HIPCC_BIN_DIR found at ${HIPCC_BIN_DIR}") +message(STATUS "HIP_COMMON_DIR found at ${HIP_COMMON_DIR}") +set(HIP_COMMON_INCLUDE_DIR ${HIP_COMMON_DIR}/include) +set(HIP_COMMON_BIN_DIR ${HIP_COMMON_DIR}/bin) +set(__HIPCONFIG_EXECUTABLE__ ${HIP_COMMON_DIR}/bin/hipconfig) + +############################# +# Setup config generation +############################# +string(TIMESTAMP _timestamp UTC) +set(_versionInfo "# Auto-generated by cmake\n") +set(_buildInfo "# Auto-generated by cmake on ${_timestamp} UTC\n") +macro(add_to_config _configfile _variable) + set(${_configfile} "${${_configfile}}${_variable}=${${_variable}}\n") +endmacro() + +############################# +# Setup version information +############################# +find_package(Perl REQUIRED) + +# Determine HIP_BASE_VERSION +set(ENV{HIP_PATH} "") +file(STRINGS ${HIP_COMMON_DIR}/VERSION VERSION_LIST REGEX "^[0-9]+") +list(GET VERSION_LIST 0 HIP_VERSION_MAJOR) +list(GET VERSION_LIST 1 HIP_VERSION_MINOR) +list(GET VERSION_LIST 2 HIP_VERSION_PATCH) +set(HIP_VERSION_GITDATE 0) + +find_package(Git) + +# FIXME: Two different version strings used. +# Below we use UNIX commands, not compatible with Windows. +if(GIT_FOUND) + # use the commit date, instead of build date + execute_process(COMMAND ${GIT_EXECUTABLE} show -s --format=%ct + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(git_result EQUAL 0) + set(HIP_VERSION_UNIXDATE ${git_output}) + endif() + + # get date information based on UTC + # use the last two digits of year + week number + day in the week as HIP_VERSION_GITDATE + execute_process(COMMAND ${PERL_EXECUTABLE} "-MPOSIX=strftime" "-le" "print strftime \'%y%W%w\',gmtime(${HIP_VERSION_UNIXDATE})" + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(git_result EQUAL 0) + set(HIP_VERSION_GITDATE ${git_output}) + endif() + + # get commit short hash + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(git_result EQUAL 0) + set(HIP_VERSION_GITHASH ${git_output}) + endif() + + set(HIP_VERSION_BUILD_ID 0) + set(HIP_VERSION_BUILD_NAME "") + if(NOT DEFINED ENV{HIP_OFFICIAL_BUILD} AND NOT HIP_OFFICIAL_BUILD) + set(HIP_VERSION_PATCH ${HIP_VERSION_GITDATE}) + endif() + + if(DEFINED ENV{ROCM_LIBPATCH_VERSION}) + set(HIP_PACKAGING_VERSION_PATCH ${HIP_VERSION_PATCH}.$ENV{ROCM_LIBPATCH_VERSION}) + else() + set(HIP_PACKAGING_VERSION_PATCH ${HIP_VERSION_PATCH}-${HIP_VERSION_GITHASH}) + endif() +else() + set(HIP_VERSION_BUILD_ID 0) + set(HIP_VERSION_BUILD_NAME "") + # FIXME: Some parts depend on this being set. + set(HIP_PACKAGING_VERSION_PATCH "0") +endif() + +## Debian package specific variables +if ( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} ) + set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} ) +else() + set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" ) +endif() +message (STATUS "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" ) + +## RPM package specific variables +if ( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} ) + set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} ) +else() + set ( CPACK_RPM_PACKAGE_RELEASE "local" ) +endif() + +## 'dist' breaks manual builds on debian systems due to empty Provides +execute_process( COMMAND rpm --eval %{?dist} + RESULT_VARIABLE PROC_RESULT + OUTPUT_VARIABLE EVAL_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE ) + +if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" ) + string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" ) +endif() +message(STATUS "CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}") + +add_to_config(_versionInfo HIP_PACKAGING_VERSION_PATCH) +add_to_config(_versionInfo CPACK_DEBIAN_PACKAGE_RELEASE) +add_to_config(_versionInfo CPACK_RPM_PACKAGE_RELEASE) + +add_to_config(_versionInfo HIP_VERSION_MAJOR) +add_to_config(_versionInfo HIP_VERSION_MINOR) +add_to_config(_versionInfo HIP_VERSION_PATCH) +add_to_config(_versionInfo HIP_VERSION_GITHASH) + +set (HIP_LIB_VERSION_MAJOR ${HIP_VERSION_MAJOR}) +set (HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR}) +if (${ROCM_PATCH_VERSION} ) + set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION}) +elseif (DEFINED HIP_VERSION_GITHASH) + set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH}-${HIP_VERSION_GITHASH}) +else () + set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH}) +endif () +set (HIP_LIB_VERSION_STRING "${HIP_LIB_VERSION_MAJOR}.${HIP_LIB_VERSION_MINOR}.${HIP_LIB_VERSION_PATCH}") +if (DEFINED ENV{ROCM_RPATH}) + set (CMAKE_INSTALL_RPATH "$ENV{ROCM_RPATH}") + set (CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) + set (CMAKE_SKIP_BUILD_RPATH TRUE) + set (CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) +endif () + +# overwrite HIP_VERSION_PATCH for packaging +set(HIP_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_PACKAGING_VERSION_PATCH}) + +# Remove when CI is updated +if(HIP_PLATFORM STREQUAL "rocclr") + set(HIP_PLATFORM "amd") +endif() +############################# +# Configure variables +############################# +# Determine HIP_PLATFORM +if(NOT DEFINED HIP_PLATFORM) + if(NOT DEFINED ENV{HIP_PLATFORM}) + execute_process(COMMAND ${__HIPCONFIG_EXECUTABLE__} --platform + OUTPUT_VARIABLE HIP_PLATFORM + OUTPUT_STRIP_TRAILING_WHITESPACE) + else() + set(HIP_PLATFORM $ENV{HIP_PLATFORM} CACHE STRING "HIP Platform") + endif() +endif() +message(STATUS "HIP Platform: " ${HIP_PLATFORM}) + +if(HIP_PLATFORM STREQUAL "nvidia") + set(HIP_RUNTIME "cuda" CACHE STRING "HIP Runtime") + set(HIP_COMPILER "nvcc" CACHE STRING "HIP Compiler") +elseif(HIP_PLATFORM STREQUAL "amd") + set(HIP_RUNTIME "rocclr" CACHE STRING "HIP Runtime") + set(HIP_COMPILER "clang" CACHE STRING "HIP Compiler") +else() + message(FATAL_ERROR "Unexpected HIP_PLATFORM: " ${HIP_PLATFORM}) +endif() + +message(STATUS "HIP Runtime: " ${HIP_RUNTIME}) +message(STATUS "HIP Compiler: " ${HIP_COMPILER}) + +add_to_config(_buildInfo HIP_RUNTIME) +add_to_config(_buildInfo HIP_COMPILER) + +# Set default build type +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() + +if (NOT DEFINED ROCM_PATH ) + set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) +endif () +message (STATUS "ROCM Installation path(ROCM_PATH): ${ROCM_PATH}") + +# Determine HIP install path +if (UNIX) + set(HIP_DEFAULT_INSTALL_PREFIX "${ROCM_PATH}") +endif() +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX ${HIP_DEFAULT_INSTALL_PREFIX} CACHE PATH "Installation path for HIP" FORCE) +endif() + +if(DEV_LOG_ENABLE MATCHES "yes") + add_definitions(-DDEV_LOG_ENABLE) +endif() + +# Set default install path as "${ROCM_PATH}", can override the path from cmake build. +set(CPACK_INSTALL_PREFIX ${HIP_DEFAULT_INSTALL_PREFIX} CACHE PATH "Package Installation path for HIP") + +if(IS_ABSOLUTE ${CMAKE_INSTALL_PREFIX}) + message(STATUS "HIP will be installed in: " ${CMAKE_INSTALL_PREFIX}) +else() + message(FATAL_ERROR "Don't know where to install HIP. Please specify absolute path using -DCMAKE_INSTALL_PREFIX") +endif() + +# set the installation path for the installer package +set(CPACK_SET_DESTDIR ON CACHE BOOL "Installer package will install hip to CMAKE_INSTALL_PREFIX instead of CPACK_PACKAGING_INSTALL_PREFIX") +if (NOT CPACK_SET_DESTDIR) + set(CPACK_PACKAGING_INSTALL_PREFIX "${ROCM_PATH}" CACHE PATH "Default installation path of hcc installer package") +endif (NOT CPACK_SET_DESTDIR) + +############################# +# Build steps +############################# +set(BIN_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}) +set(LIB_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) +set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) +set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) +set(CONFIG_LANG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip-lang) +set(CONFIG_RTC_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hiprtc) + +# Build clang hipify if enabled +if (BUILD_HIPIFY_CLANG) + add_subdirectory(hipify-clang) +endif() + +# Generate hip_version.h +set(_versionInfoHeader +"// Auto-generated by cmake\n +#ifndef HIP_VERSION_H +#define HIP_VERSION_H\n +#define HIP_VERSION_MAJOR ${HIP_VERSION_MAJOR} +#define HIP_VERSION_MINOR ${HIP_VERSION_MINOR} +#define HIP_VERSION_PATCH ${HIP_VERSION_PATCH} +#define HIP_VERSION_GITHASH \"${HIP_VERSION_GITHASH}\" +#define HIP_VERSION_BUILD_ID ${HIP_VERSION_BUILD_ID} +#define HIP_VERSION_BUILD_NAME \"${HIP_VERSION_BUILD_NAME}\" +#define HIP_VERSION (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)\n +#define __HIP_HAS_GET_PCH ${_pchStatus}\n +#endif\n +") +file(WRITE "${PROJECT_BINARY_DIR}/include/hip/hip_version.h" ${_versionInfoHeader}) + +if(HIP_RUNTIME STREQUAL "rocclr") + add_subdirectory(src) +endif() + +# Generate .hipInfo +file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) + +# Generate .hipVersion +file(WRITE "${PROJECT_BINARY_DIR}/.hipVersion" ${_versionInfo}) + +# Build doxygen documentation +find_program(DOXYGEN_EXE doxygen) +if(DOXYGEN_EXE) + add_custom_target(doc COMMAND HIP_PATH=${CMAKE_CURRENT_SOURCE_DIR} ${DOXYGEN_EXE} ${CMAKE_CURRENT_SOURCE_DIR}/docs/doxygen-input/doxy.cfg + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/docs) +endif() + +############################# +# Install steps +############################# + +# Install .hipInfo +install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# Install .hipVersion +install(FILES ${PROJECT_BINARY_DIR}/.hipVersion DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# Install src, bin, include & cmake if necessary +execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR} + RESULT_VARIABLE INSTALL_SOURCE) +if(NOT ${INSTALL_SOURCE} EQUAL 0) + if(WIN32) + install(DIRECTORY ${HIP_COMMON_BIN_DIR} DESTINATION . USE_SOURCE_PERMISSIONS) + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/src/" DESTINATION ${CMAKE_INSTALL_BINDIR} + FILES_MATCHING PATTERN "*.pdb" + PATTERN "*.ilk" + PATTERN "CMakeFiles" EXCLUDE + PATTERN "hip_rtc_gen" EXCLUDE + PATTERN "libelf" EXCLUDE + PATTERN "loader" EXCLUDE + PATTERN "pal" EXCLUDE + PATTERN "libamdhsacode" EXCLUDE) + endif() + else() + # Exclude .bat files on Linux. + #Hip bin files moved to /opt/rocm/bin and the file permission need to set properly + install(DIRECTORY ${HIP_COMMON_BIN_DIR} DESTINATION . USE_SOURCE_PERMISSIONS + DIRECTORY_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + PATTERN *.bat EXCLUDE) + endif() + + if(WIN32) #not required for flat folder structure + # The following two lines will be removed after upstream updation + install(CODE "MESSAGE(\"Removing ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}\")") + install(CODE "file(REMOVE_RECURSE ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})") + endif() + + install(DIRECTORY include DESTINATION .) + install(DIRECTORY ${HIP_COMMON_INCLUDE_DIR}/hip/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/) + if(WIN32) + install(DIRECTORY ${HIP_COMMON_DIR}/cmake DESTINATION .) + else() + install(DIRECTORY ${HIP_COMMON_DIR}/cmake/ DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR}) + endif() +endif() + +# Install generated headers +# FIXME: Associate with individual targets. +if(HIP_PLATFORM STREQUAL "amd") +install(FILES ${PROJECT_BINARY_DIR}/include/hip/amd_detail/hip_prof_str.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/amd_detail) +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin DESTINATION . USE_SOURCE_PERMISSIONS) +endif() +install(FILES ${PROJECT_BINARY_DIR}/include/hip/hip_version.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip) + +if (NOT ${HIPCC_BIN_DIR} STREQUAL "") + file(TO_CMAKE_PATH "${HIPCC_BIN_DIR}" HIPCC_BIN_DIR) + set(hipcc_bin ${HIPCC_BIN_DIR}/hipcc.bin) + set(hipconfig_bin ${HIPCC_BIN_DIR}/hipconfig.bin) + if(WIN32) + set(hipcc_bin ${hipcc_bin}.exe) + set(hipconfig_bin ${hipconfig_bin}.exe) + endif() + install(PROGRAMS ${hipcc_bin} DESTINATION bin) + install(PROGRAMS ${hipconfig_bin} DESTINATION bin) +endif() + +############################# +# hip-config +############################# +include(CMakePackageConfigHelpers) + +configure_package_config_file( + hip-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake + INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} + PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR + ) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake + VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_GITDATE}" + COMPATIBILITY SameMajorVersion + ) +install( + FILES + ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake + DESTINATION + ${CONFIG_PACKAGE_INSTALL_DIR} + ) +# Packaging invokes UNIX commands, which are not available on Windows. + +if(NOT WIN32) + add_subdirectory(packaging) +endif() + +############################# +# Code formatting +############################# +# Target: clangformat +find_program(CLANGFORMAT_EXE clang-format PATHS ${HCC_HOME}/bin) +if(CLANGFORMAT_EXE) + file(GLOB_RECURSE FORMAT_SOURCE_FILE_LIST *.cpp *.hpp *.h) + add_custom_target(clangformat COMMAND ${CLANGFORMAT_EXE} -style=file -i ${FORMAT_SOURCE_FILE_LIST} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +endif() + +############################# +# Testing steps +############################# +# HIT is not compatible with Windows +if(NOT WIN32) +set(HIP_ROOT_DIR ${CMAKE_CURRENT_BINARY_DIR}) +set(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}) +if(HIP_PLATFORM STREQUAL "nvidia") + execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/include" "${HIP_ROOT_DIR}/include" RESULT_VARIABLE RUN_HIT ERROR_QUIET) +endif() +execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_COMMON_INCLUDE_DIR}/hip/" "${HIP_ROOT_DIR}/include/hip/" RESULT_VARIABLE RUN_HIT ERROR_QUIET) +execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_COMMON_DIR}/cmake" "${HIP_ROOT_DIR}/cmake" RESULT_VARIABLE RUN_HIT ERROR_QUIET) +if(${RUN_HIT} EQUAL 0) + execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_COMMON_BIN_DIR}" "${HIP_ROOT_DIR}/bin" RESULT_VARIABLE RUN_HIT ERROR_QUIET) +endif() +if(HIP_CATCH_TEST EQUAL "1") + message(STATUS "Building of catch tests through hipamd is no longer supported. Testing targets will not be available. catch tests have been moved to an independent github project hip-tests. Please refer to hip-tests Readme for build instructions! ") +else() + if(${RUN_HIT} EQUAL 0) + set(CMAKE_MODULE_PATH "${HIP_ROOT_DIR}/cmake" ${CMAKE_MODULE_PATH}) + include(${HIP_COMMON_DIR}/tests/hit/HIT.cmake) + include(${HIP_COMMON_DIR}/tests/Tests.cmake) + else() + message(STATUS "Testing targets will not be available. To enable them please ensure that the HIP installation directory is writeable. Use -DCMAKE_INSTALL_PREFIX to specify a suitable location") + endif() +endif() +endif() + +############################# +# Code analysis +############################# +# Target: clang +if(HIP_HIPCC_EXECUTABLE) + add_custom_target(analyze + COMMAND ${HIP_HIPCC_EXECUTABLE} -fvisibility=hidden -fvisibility-inlines-hidden --analyze --analyzer-outputtext -isystem ${ROCM_PATH}/${CMAKE_INSTALL_INCLUDEDIR} -Wno-unused-command-line-argument -I${ROCM_PATH}/${CMAKE_INSTALL_INCLUDEDIR} -c src/*.cpp -Iinclude/ -I./ + WORKING_DIRECTORY ${HIP_SRC_PATH}) + if(CPPCHECK_EXE) + add_dependencies(analyze cppcheck) + endif() +endif() + +#File reorg Backward compatibility function +if(NOT WIN32) + if(FILE_REORG_BACKWARD_COMPATIBILITY) + include(hip-backward-compat.cmake) + endif() +endif() diff --git a/projects/clr/hipamd/INSTALL.md b/projects/clr/hipamd/INSTALL.md new file mode 100644 index 0000000000..8b20e4d0b2 --- /dev/null +++ b/projects/clr/hipamd/INSTALL.md @@ -0,0 +1,62 @@ +## Prerequisites + +- Install mesa-common-dev +- Either build or install [COMGR](https://github.com/RadeonOpenCompute/ROCm-CompilerSupport), [CLANG](https://github.com/RadeonOpenCompute/llvm-project) and [Device Library](https://github.com/RadeonOpenCompute/ROCm-Device-Libs) + +## Branch of repository + +Before get HIP source code, set the expected branch of repository at the variable HIP_BRANCH. +For example, for ROCm5.0 release branch, set +``` +export HIP_BRANCH=rocm-5.0.x +``` + +ROCm5.1 release branch, set +``` +export HIP_BRANCH=rocm-5.1.x +``` +Similiar format for future branches. + +## Getting the source code + +```bash +git clone -b $HIP_BRANCH https://github.com/ROCm-Developer-Tools/hipamd.git +git clone -b $HIP_BRANCH https://github.com/ROCm-Developer-Tools/hip.git +git clone -b $HIP_BRANCH https://github.com/ROCm-Developer-Tools/ROCclr.git +git clone -b $HIP_BRANCH https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime.git +``` + +## Set the environment variables + +```bash +export HIPAMD_DIR="$(readlink -f hipamd)" +export HIP_DIR="$(readlink -f hip)" +export ROCclr_DIR="$(readlink -f ROCclr)" +export OPENCL_DIR="$(readlink -f ROCm-OpenCL-Runtime)" +``` + +## Build HIPAMD + +Commands to build hipamd are as following, +```bash +cd "$HIPAMD_DIR" +mkdir -p build; cd build +cmake -DHIP_COMMON_DIR=$HIP_DIR -DAMD_OPENCL_PATH=$OPENCL_DIR -DROCCLR_PATH=$ROCCLR_DIR -DCMAKE_PREFIX_PATH="/" .. +make -j$(nproc) +sudo make install +``` + +Please note, HIP_COMMON_DIR looks for hip common ([HIP](https://github.com/ROCm-Developer-Tools/HIP/)) source codes. +By default, release version of hipamd is built. hip will be installed to the default path /hip + +Developer can use cmake option CMAKE_INSTALL_PREFIX to define the path where hip is expected to be installed, commands to build are as following, +```bash +cd "$HIPAMD_DIR" +mkdir -p build; cd build +cmake -DHIP_COMMON_DIR=$HIP_DIR -DAMD_OPENCL_PATH=$OPENCL_DIR -DROCCLR_PATH=$ROCCLR_DIR -DCMAKE_PREFIX_PATH="/" -DCMAKE_INSTALL_PREFIX=$PWD/install .. +make -j$(nproc) +sudo make install +``` + +After installation, make sure HIP_PATH is pointed to the path where hip is installed. + diff --git a/projects/clr/hipamd/LICENSE.txt b/projects/clr/hipamd/LICENSE.txt new file mode 100644 index 0000000000..4cbb639232 --- /dev/null +++ b/projects/clr/hipamd/LICENSE.txt @@ -0,0 +1,20 @@ +Copyright (c) 2008 - 2022 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/projects/clr/hipamd/README.md b/projects/clr/hipamd/README.md new file mode 100644 index 0000000000..eb75364424 --- /dev/null +++ b/projects/clr/hipamd/README.md @@ -0,0 +1,24 @@ +## What is this repository for? ### + +This repository provides [HIP](https://github.com/ROCm-Developer-Tools/HIP) implementation specifically for AMD platform. + +## DISCLAIMER + +The information presented in this document is for informational purposes only and may contain technical inaccuracies, omissions, and typographical errors. The information contained herein is subject to change and may be rendered inaccurate for many reasons, including but not limited to product and roadmap changes, component and motherboard versionchanges, new model and/or product releases, product differences between differing manufacturers, software changes, BIOS flashes, firmware upgrades, or the like. Any computer system has risks of security vulnerabilities that cannot be completely prevented or mitigated.AMD assumes no obligation to update or otherwise correct or revise this information. However, AMD reserves the right to revise this information and to make changes from time to time to the content hereof without obligation of AMD to notify any person of such revisions or changes.THIS INFORMATION IS PROVIDED ‘AS IS.” AMD MAKES NO REPRESENTATIONS OR WARRANTIES WITH RESPECT TO THE CONTENTS HEREOF AND ASSUMES NO RESPONSIBILITY FOR ANY INACCURACIES, ERRORS, OR OMISSIONS THAT MAY APPEAR IN THIS INFORMATION. AMD SPECIFICALLY DISCLAIMS ANY IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR ANY PARTICULAR PURPOSE. IN NO EVENT WILL AMD BE LIABLE TO ANY PERSON FOR ANY RELIANCE, DIRECT, INDIRECT, SPECIAL, OR OTHER CONSEQUENTIAL DAMAGES ARISING FROM THE USE OF ANY INFORMATION CONTAINED HEREIN, EVEN IF AMD IS EXPRESSLY ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies. + +© 2021 Advanced Micro Devices, Inc. All Rights Reserved. + +## Repository branches: + +The hipamd repository maintains several branches. The branches that are of importance are: + +* Main branch: This is the stable branch. It is up to date with the latest release branch, for example, if the latest HIP release is rocm-4.4, main branch will be the repository based on this release. +* Develop branch: This is the default branch, on which the new features are still under development and visible. While this maybe of interest to many, it should be noted that this branch and the features under development might not be stable. +* Release branches. These are branches corresponding to each ROCM release, listed with release tags, such as rocm-4.4, etc. + +## Release tagging: + +hipamd releases are typically naming convention for each ROCM release to help differentiate them. + +* rocm x.yy: These are the stable releases based on the ROCM release. + This type of release is typically made once a month.* \ No newline at end of file diff --git a/projects/clr/hipamd/bin/roc-obj b/projects/clr/hipamd/bin/roc-obj new file mode 100755 index 0000000000..0b0d12f5f2 --- /dev/null +++ b/projects/clr/hipamd/bin/roc-obj @@ -0,0 +1,265 @@ +#!/bin/bash + +#| Usage: roc-obj [-h] [-t REGEXP] [-o OUTDIR] [-I REPLACE-STRING|-i] [-d] +#| EXECUTABLE... [: [SUFFIX COMMAND [ARGS...] ;]...] +#| +#| Wrapper for roc-obj-ls and roc-obj-extract which extracts code objects +#| embedded in each EXECUTABLE and optionally applies COMMANDs to them. +#| +#| If the POSIX extended regular expression REGEXP is specified, only embedded +#| code objects whose Target ID matches REGEXP are extracted; otherwise all +#| code objects are extracted. +#| +#| If the directory path OUTDIR is specified, it is created if it does not +#| already exist, and the code objects are extracted into it; otherwise they +#| are extracted into the current working directory. +#| +#| The extracted files are named by appending a ":" followed by the Target ID +#| of the extracted code object to the input filename EXECUTABLE they were +#| extracted from. +#| +#| If the list of EXECUTABLE arguments is terminated with ":" then after all +#| selected files are successfully extracted, zero or more additional embedded +#| command-lines, separated by ";", are read from the command-line starting +#| after the ":". These must specify a SUFFIX used to name the output of the +#| corresponding COMMAND, along with the COMMAND name and any ARGS to it. +#| +#| Then each COMMAND is executed, as if by a POSIX "execvp" function, once for +#| each embedded code object that was created in OUTDIR. (Note: Typically this +#| means the user must ensure the commands are present in at least one +#| directory of the "PATH" environment variable.) For each execution of +#| COMMAND: +#| +#| If REPLACE-STRING is specified, all instances of REPLACE-STRING in ARGS are +#| replaced with the file path of the extracted code object before executing +#| COMMAND. +#| +#| The standard input is redirected from the extracted code object. +#| +#| If SUFFIX is "-" the standard output is not redirected. If SUFFIX is "!" the +#| standard output is redirected to /dev/null. Otherwise, the standard output +#| is redirected to files named by the file path of the extracted code object +#| with SUFFIX appended. +#| +#| Note: The executables roc-obj-ls, roc-obj-extract, and llvm-objdump (in the +#| case of disassembly requested using the -d flag) are searched for in a +#| unique way. A series of directories are searched, some conditionally, until +#| a suitable executable is found. If all directories are searched without +#| finding the executable, an error occurs. The first directory searched is the +#| one containing the hard-link to the roc-obj being executed, known as the +#| "base directory". Next, if the environment variable HIP_CLANG_PATH is set, +#| it is searched; otherwise, the base directory path is appended with +#| "../../llvm/bin" and it is searched. Finally, the PATH is searched as if by +#| a POSIX "execvp" function. +#| +#| Option Descriptions: +#| -h, --help print this help text and exit +#| -t, --target-id only extract code objects from EXECUTABLE whose Target ID +#| matches the POSIX extended regular expression REGEXP +#| -o, --outdir set the output directory, which is created if it +#| does not exist +#| -I, --replace-string replace all occurrences of the literal string +#| REPLACE-STRING in ARGS with the input filename +#| -i, --replace equivalent to -I{} +#| -d, --disassemble diassemble extracted code objects; equivalent to +#| : .s llvm-objdump -d - ; +#| +#| Example Usage: +#| +#| Extract all code objects embedded in a.so: +#| $ roc-obj a.so +#| +#| Extract all code objects embedded in a.so, b.so, and c.so: +#| $ roc-obj a.so b.so c.so +#| +#| Extract all code objects embedded in a.so with "gfx9" in their Target ID: +#| $ roc-obj -t gfx9 a.so +#| +#| Extract all code objects embedded in a.so into output/ (creating it if needed): +#| $ roc-obj -o output/ a.so +#| +#| Extract all code objects embedded in a.so with "gfx9" in their Target ID +#| into output/ (creating it if needed): +#| $ roc-obj -t gfx9 -o output/ a.so +#| +#| Extract all code objects embedded in a.so, and then disassemble each of them +#| to files ending with .s: +#| $ roc-obj -d a.so +#| +#| Extract all code objects embedded in a.so, and count the number of bytes in +#| each, writing the results to files ending with .count: +#| $ roc-obj a.so : .count wc -c +#| +#| Extract all code objects embedded in a.so, and inspect their ELF headers +#| using llvm-readelf (which will not read from standard input), writing to +#| files ending with .hdr: +#| $ roc-obj -I'{}' a.so : .hdr llvm-readelf -h '{}' +#| +#| Extract all code objects embedded in a.so, and then extract each of their +#| .text sections using llvm-objcopy (which won't read from standard input +#| or write to standard output): +#| $ roc-obj -I'{}' a.so : ! llvm-objcopy -O binary :only-section=.text '{}' '{}.text' +#| +#| Extract all code objects embedded in a.so, b.so, and c.so with target +#| feature xnack disabled into directory out/. Then, for each: +#| Write the size in bytes into a file ending with .count, and +#| Write a textual description of the ELF headers to a file ending with .hdr, and +#| Extract the .text section to a file ending with .text +#| $ roc-obj -I'{}' -t xnack- -o out/ a.so b.so c.so : \ +#| .count wc -c \; +#| .hdr llvm-readelf -h '{}' \; +#| ! llvm-objcopy -O binary --only-section=.text '{}' '{}.text' + +set -euo pipefail + +usage() { + sed -n 's/^#| \?\(.*\)$/\1/p' "$0" +} + +usage_then_exit() { + local -r status="$1"; shift + usage >&$(( status ? 2 : 1 )) + exit "$status" +} + +fail() { + printf "error: %s\n" "$*" >&2 + exit 1 +} + +# Account for the fact that we do not necessarily put ROCm tools in the PATH, +# nor do we have a single, unified ROCm "bin/" directory. +# +# Note that this is only used for roc-obj-ls, roc-obj-extract, and "shortcut" +# options like -d, and the user can still use any copy of llvm-* by explicitly +# invoking it with a full path, e.g. : /path/to/llvm-* ... ; +find_rocm_executable_or_fail() { + local -r command="$1"; shift + local file + local searched=() + for dir in "$BASE_DIR" "${HIP_CLANG_PATH:-"$BASE_DIR/../../llvm/bin"}"; do + file="$dir/$command" + if [[ -x $file ]]; then + printf "%s" "$file" + return + else + searched+=("$dir") + fi + done + if hash "$command" 2>/dev/null; then + printf "%s" "$command" + else + fail could not find "$command" in "${searched[*]}" or PATH + fi +} + +# Extract the embedded code objects of the executable file given as the first +# argument into OPT_OUTDIR, filtering them via OPT_TARGET_ID. +# +# Deletes any resulting files which are empty, and prints the paths of the +# remaining files. +extract() { + local -r executable="$1"; shift + local prefix + prefix="$(basename -- "$executable")" + # We want the shell to split the result of roc-obj-ls on whitespace, as + # neither the Target ID nor the URI can have embedded spaces. + # shellcheck disable=SC2046 + set -- $("$ROC_OBJ_LS" -- "$executable" | awk "\$2~/$OPT_TARGET_ID/") + while (( $# )); do + local output="$prefix:$1"; shift + output="$output.$1"; shift + local uri="$1"; shift + [[ -n $OPT_OUTDIR ]] && output="$OPT_OUTDIR/$output" + "$ROC_OBJ_EXTRACT" -o - -- "$uri" >"$output" + if [[ -s $output ]]; then + printf '%s\n' "$output" + else + rm "$output" + fi + done + (( $# )) && fail expected even number of fields from roc-obj-ls +} + +# Run a command over a list of inputs, naming output files with the supplied +# suffix and applying OPT_REPLACE_STRING if needed. +# +# Arguments are of the form: +# $suffix $command $args... ; $inputs +run_command() { + local -r suffix="$1"; shift + local -r command="$1"; shift + local args=() + while (( $# )); do + local arg="$1"; shift + [[ $arg == ';' ]] && break + args+=("$arg") + done + local inputs=("$@") + for input in "${inputs[@]}"; do + case "$suffix" in + '-') output=/dev/stdout;; + '!') output=/dev/null;; + *) output="$input$suffix";; + esac + "$command" "${args[@]//$OPT_REPLACE_STRING/$input}" <"$input" >"$output" + done +} + +main() { + [[ -n $OPT_OUTDIR ]] && mkdir -p "$OPT_OUTDIR" + local inputs=() + while (( $# )); do + local executable="$1"; shift + [[ $executable == : ]] && break + # Append the file paths extracted from $executable to $inputs + readarray -t -O "${#inputs[@]}" inputs < <(extract "$executable") + done + (( ${#inputs[@]} )) || fail no executables specified + while (( $# )); do + local suffix="$1"; shift + local command="$1"; shift + local args=() + while (( $# )); do + local arg="$1"; shift + [[ $arg == \; ]] && break + args+=("$arg") + done + run_command "$suffix" "$command" "${args[@]}" \; "${inputs[@]}" + done + (( OPT_DISASSEMBLE )) && run_command .s "$OBJDUMP" -d - \; "${inputs[@]}" +} + +OPT_TARGET_ID='' +OPT_OUTDIR='' +OPT_REPLACE_STRING='' +OPT_DISASSEMBLE=0 +! getopt -T || fail util-linux enhanced getopt required +getopt="$(getopt -o +ht:o:I:id \ + --long help,target-id:,outdir:,replace:,replace-default,disassemble \ + -n roc-obj -- "$@")" +eval set -- "$getopt" +unset getopt +while true; do + case "$1" in + -h | --help) usage_then_exit 0;; + -t | --target-id) OPT_TARGET_ID="${2//\//\\\/}"; shift 2;; + -o | --outdir) OPT_OUTDIR="$2"; shift 2;; + -I | --replace-string) OPT_REPLACE_STRING="$2"; shift 2;; + -i | --replace) OPT_REPLACE_STRING='{}'; shift;; + -d | --disassemble) OPT_DISASSEMBLE=1; shift;; + --) shift; break;; + *) usage_then_exit 1;; + esac +done +readonly -- OPT_TARGET_ID OPT_OUTDIR OPT_REPLACE_STRING OPT_DISASSEMBLE + +# We expect to be installed as ROCM_PATH/hip/bin/roc-obj, which means BASE_DIR +# is ROCM_PATH/hip/bin. +BASE_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" +(( OPT_DISASSEMBLE )) && OBJDUMP="$(find_rocm_executable_or_fail llvm-objdump)" +ROC_OBJ_LS="$(find_rocm_executable_or_fail roc-obj-ls)" +ROC_OBJ_EXTRACT="$(find_rocm_executable_or_fail roc-obj-extract)" +readonly -- BASE_DIR OBJDUMP ROC_OBJ_LS ROC_OBJ_EXTRACT + +main "$@" diff --git a/projects/clr/hipamd/bin/roc-obj-extract b/projects/clr/hipamd/bin/roc-obj-extract new file mode 100755 index 0000000000..9420a4b059 --- /dev/null +++ b/projects/clr/hipamd/bin/roc-obj-extract @@ -0,0 +1,244 @@ +#!/usr/bin/perl +# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +use strict; +use File::Copy; +use File::Spec; +use File::Basename; +use File::Which; +use Cwd 'realpath'; +use Getopt::Std; +use List::Util qw(max); +use URI::Encode; + +my $extract_range_specifier; +my $extract_pid; +my $extract_file; +my $output_file; +my $output_path; +my $extract_offset; +my $extract_size; +my $pid_running; +my $verbose=0; +my $error=0; +my $output_to_stdout=0; + +sub usage { + print("Usage: $0 [-o|v|h] URI... \n"); + print(" URIs can be read from STDIN, one per line.\n"); + print(" From the URIs specified, extracts code objects into files named: "); + print("-[pid]-offset-size.co\n\n"); + print("Options:\n"); + print(" -o \tPath for output. If \"-\" specified, code object is printed to STDOUT.\n"); + print(" -v \tVerbose output to STDOUT.\n"); + print(" -h \tShow this help message.\n"); + print("\nURI syntax:\n"); + print("\tcode_object_uri ::== file_uri | memory_uri\n"); + print("\tfile_uri ::== \"file://\" extract_file [ range_specifier ]\n"); + print("\tmemory_uri ::== \"memory://\" process_id range_specifier\n"); + print("\trange_specifier ::== range_delimiter range_attribute [\"&\" range_attribute]\n"); + print("\trange_delimiter ::== \"#\" | \"?\"\n"); + print("\trange_attribute ::== [\"offset=\" number | \"size=\" number ]\n"); + print("\textract_file ::== URI_ENCODED_OS_FILE_PATH\n"); + print("\tprocess_id ::== DECIMAL_NUMBER\n"); + print("\tnumber ::== HEX_NUMBER \| DECIMAL_NUMBER \| OCTAL_NUMBER\n\n"); + print("\tExample: file://dir1/dir2/hello_world#offset=133&size=14472 \n"); + print("\t memory://1234#offset=0x20000&size=3000\n\n"); + print(" NOTES:\n\n"); + print("\tWhen specifying a URI in a shell command you will need to escape the \'&\' character in the range_specifier.\n"); + print("\tIf \"size=\" is not specified, the default is the remainder of the file from the given offset.\n\n"); + + exit($error); +} + +# Process options +my %options=(); +getopts('vho:', \%options); + +if (defined $options{h}) { + usage(); +} + +if (defined $options{v}) { + $verbose = 1; +} + +if (defined $options{o}) { + $output_path = $options{o}; + if ($output_path eq "-") { + $output_to_stdout=1; + } else { + (-d $output_path) || die("Error: Path \'$output_path\' cannot be found.\n"); + } +} + +# Only push STDIN if there are no arguments -- otherwise this +# consumes the caller's stdin by accident. +# push STDIN to ARGV array. +if ($#ARGV < 0) { + push @ARGV, unless -t STDIN; +} + +# error check: enough arguments presented. +if ($#ARGV < 0) { + print(STDERR "Error: No arguments.\n"); $error++; + usage(); +} + +# error check: command dd is available. +my $dd_cmd = which("dd"); +(-f $dd_cmd) || die("Error: Can't find dd command\n"); + +foreach my $uri_str(@ARGV) { + chomp $uri_str; + + my ($uri_protocol, $specs) = split(/:\/\//,$uri_str); + my $obj_uri_encode = URI::Encode->new(); + my $decoded_extract_file; + my $file_size; + + if (lc($uri_protocol) eq "file") { + # expect file path + ($extract_file, $extract_range_specifier) = split(/[#,?]/,$specs); + + # decode the file name. URIs may have file/path names with non-alphanumeric characters, which will be encoded with %. We need to decode these. + $decoded_extract_file = $obj_uri_encode->decode($extract_file); + + # verify file exists: + if (! -e $decoded_extract_file) { + print(STDERR "Error: can't find file: $decoded_extract_file\n"); $error++; + next; + } + + # use the output_path is specified, otherwise use current working dir. + if ($output_path ne "") { + $output_file = File::Spec->catfile($output_path, basename($decoded_extract_file)); + } else { + $output_file = basename($decoded_extract_file); + } + + } elsif ( lc($uri_protocol) eq "memory") { + # expect memory specifier + ($extract_pid, $extract_range_specifier) = split(/[#,?]/,$specs); + + # verify pid is currently running + $pid_running = kill 0, $extract_pid; + if (! $pid_running) { + print(STDERR "Error: PID: $extract_pid is NOT running\n"); $error++; + next; + } + + # get pid filename: + $extract_file = "/proc/$extract_pid/mem"; + + # verify file exists: + if (! -e $extract_file) { + print(STDERR "Error: can't find file: $extract_file\n"); $error++; + next; + } + + # for extracting from a pid, make the output file in the current dir/path with the pid value as a name. + $output_file = "pid${extract_pid}"; + + # need to set $decoded_extract_file, because later we use this for other checks. + $decoded_extract_file = $extract_file; + } else { + # error, unrecognized Code Object URI + print(STDERR "Error: \'$uri_protocol\' is not recognized as a supported code object URI.\n"); $error++; + next; + } + + # it is valid to not give a range specifier in a URI, in which case the entire code object will be extracted. + if ($extract_range_specifier ne "") { + my @tokens; + my $str; + my $value; + my $size_specified = 0; + + @tokens = split(/[&]/,$extract_range_specifier); + foreach (@tokens) { + ($str,$value) = split(/=/,$_); + if ($str eq "size") { + $extract_size=$value; + $size_specified = 1; + } elsif ($str eq "offset") { + $extract_offset=$value; + } + } + + if ($size_specified != 1) { + # "size" not specified. default to rest of file (total size - offset) + $extract_size = -s $decoded_extract_file; + $extract_size -= $extract_offset; + } + + } else { + # Error if URI is a memory request, and we have no range_specifier. + if ($pid_running) { + print(STDERR "Error: must specify a Range Specifier (offset and size) for a memory URI: $uri_str\n"); $error++; + next; + } + + $extract_offset = 0; + $extract_size = -s $decoded_extract_file; + } + + # We should have at least a valid size to extract; ignore cases with size=0. + if ($extract_size != 0) { + print("Reading input file \"$extract_file\" ...\n") if ($verbose); + + # only if this is a File URI. + if (lc($uri_protocol) eq "file") { + # verify that offset+size does not exceed file size: + my $file_size = -s $decoded_extract_file; + my $size = int($extract_offset) + int($extract_size); + if ( $size > $file_size ) { + print(STDERR "Error: requested offset($extract_offset) + size($extract_size) exceeds file size($file_size) for file \"$decoded_extract_file\".\n"); $error++; + next; + } + } + + open(INPUT_FP, "<", $decoded_extract_file) || die $!; + binmode INPUT_FP; + + # extract the code object + my $co_filename; + if (!$output_to_stdout) { + $co_filename = "of=\'${output_file}-offset${extract_offset}-size${extract_size}.co\'"; + } + + my $dd_cmd_str = "$dd_cmd if=\'$decoded_extract_file\' $co_filename skip=$extract_offset count=$extract_size bs=1 status=none"; + + print("DD Command: $dd_cmd_str\n") if ($verbose); + + my $dd_ret = system($dd_cmd_str); + if ($dd_ret != 0) { + print(STDERR "Error: DD command ($dd_cmd_str) failed with RC: $dd_ret\n"); $error++; + } + + print("Extract request: file: $extract_file offset: $extract_offset size: $extract_size\n") if ($verbose); + } else { + print("Warning: trying to extract from $extract_file at offset=$extract_offset with size=0. Nothing to extract.\n") if ($verbose); + } + +} # end of for each (URI) argument + +exit($error); diff --git a/projects/clr/hipamd/bin/roc-obj-ls b/projects/clr/hipamd/bin/roc-obj-ls new file mode 100755 index 0000000000..7ce201978e --- /dev/null +++ b/projects/clr/hipamd/bin/roc-obj-ls @@ -0,0 +1,192 @@ +#!/usr/bin/perl +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +use strict; +use File::Copy; +use File::Spec; +use File::Basename; +use File::Which; +use Cwd 'realpath'; +use Getopt::Std; +use List::Util qw(max); +use URI::Encode; + +sub usage { + print("Usage: $0 [-v|h] executable...\n"); + print("List the URIs of the code objects embedded in the specfied host executables.\n"); + print("-v \tVerbose output (includes Entry ID)\n"); + print("-h \tShow this help message\n"); + exit; +} + +# sub to read a qword. 1st arg is a FP, 2nd arg is ref to destination var. +sub readq { + my ($input_fp, $qword) = @_; + read($input_fp, my $bytes, 8) == 8 or die("Error: Failed to read 8 bytes\n"); + ${$qword} = unpack("Q<", $bytes); +} + +# sub to move address to next alignment boundary +# first arg is address to move +# second arg is alignment requirement/boundary +sub align_up { + my ($address, $alignment) = @_; + $address = int(($address + ($alignment - 1)) / $alignment) * $alignment; +} + +# Process options +my %options=(); +getopts('vhd', \%options); + +if (defined $options{h}) { + usage(); +} + +my $verbose = $options{v}; +my $debug = $options{d}; + +my $num_bundles = 1; +my $bundle_alignment = 4096; + +# look for objdump +my $objdump = which("objdump"); +(-f $objdump) || die("Error: Can't find objdump command\n"); + +# for each argument (which should be an executable): +foreach my $executable_file(@ARGV) { + + # debug message + print("Reading input file \"$executable_file\" ...\n") if ($debug); + + # verify/open file specified. + open (INPUT_FP, "<", $executable_file) || die("Error: failed to open file: $executable_file\n"); + binmode INPUT_FP; + + # kernel section information + my $escaped_name=quotemeta($executable_file); + my $bundle_section_name = ".hip_fatbin"; + my $bundle_section_size = hex(`$objdump -h $escaped_name | grep $bundle_section_name | awk '{print \$3}'`); + my $bundle_section_offset = hex(`$objdump -h $escaped_name | grep $bundle_section_name | awk '{print \$6}'`); + + $bundle_section_size or die("Error: No kernel section found\n"); + + my $bundle_section_end = $bundle_section_offset + $bundle_section_size; + + if ($debug) { + printf("Code Objects Bundle section size: %x\n",$bundle_section_size); + printf("Code Objects Bundle section offset: %x\n",$bundle_section_offset); + printf("Code Objects Bundle section end: %x\n\n",$bundle_section_end); + } + + my $current_bundle_offset = $bundle_section_offset; + printf("Current Bundle offset: 0x%X\n",$current_bundle_offset) if ($debug); + + # move fp to current_bundle_offset. + seek(INPUT_FP, $current_bundle_offset, 0); + + while ($current_bundle_offset < $bundle_section_end) { + + # skip OFFLOAD_BUNDLER_MAGIC_STR + my $magic_str; + my $read_bytes = read(INPUT_FP, $magic_str, 24); + if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) { + print(STDERR "Error: Offload bundle magic string not detected\n") if ($debug); + last; + } + + # read number of bundle entries, which are code objects. + my $num_codeobjects; + readq(\*INPUT_FP,\$num_codeobjects); + + # header with current bundle number and number of embedded code objcts in that bundle. + # print("Bundle Number: $num_bundles with $num_codeobjects Code Objects:\n") if ($very_verbose); + + my $end_of_current_bundle = $current_bundle_offset; + + # Column Header. + printf("%-8s%-40s%35s\n","Bundle#","Entry ID:","URI:") if ($verbose); + + # for each Bundle entry (code object) .... + for (my $iter = 0; $iter < $num_codeobjects; $iter++) { + + print("\nEntry #$iter\n") if $debug; + + # read bundle entry (code object) offset + my $entry_offset; + my $abs_offset; + readq(*INPUT_FP,\$entry_offset); + printf("entry_offset: 0x%X\n",$entry_offset) if $debug; + + # read bundle entry (code object) size + my $entry_size; + readq(*INPUT_FP,\$entry_size); + printf("entry_size: 0x%X\n",$entry_size) if $debug; + + # read triple size + my $triple_size; + readq(*INPUT_FP,\$triple_size); + printf("triple_size: 0x%X\n",$triple_size) if $debug; + + # read triple string + my $triple; + my $read_bytes = read(INPUT_FP, $triple, $triple_size); + $read_bytes == $triple_size or die("Error: Fail to parse triple\n"); + print("triple: $triple\n") if $debug; + + # because the bundle entry's offset is relative to the beginning of the bundled code object section. + $abs_offset = int($current_bundle_offset + $entry_offset); + + # and we need to keep track of where we are in the current bundle. + $end_of_current_bundle = int($abs_offset + $entry_size); + + printf("abs_offset: 0x%X\n",$abs_offset) if $debug; + + my $obj_uri_encode = URI::Encode->new(); + my $encoded_executable_file = $obj_uri_encode->encode($executable_file); + + printf("%-8s%-40s%35s%s%s%s%s%s%s\n",$num_bundles,$triple,"file:\/\/",$encoded_executable_file,"\#offset=",$abs_offset, "\&size=",$entry_size); + + printf("end_of_current_bundle: 0x%X\n",$end_of_current_bundle) if $debug; + printf("Hex values: file:\/\/$encoded_executable_file#offset=0x%X$abs_offset\&size=0x%X\n", $abs_offset, $entry_size) if $debug; + + } # End of for each Bundle entry (code object) ... + + printf("\n") if ($verbose); + + # we've finished listing this current bundle ... + printf("current_bundle_offset: %x \n",$current_bundle_offset) if ($debug); + printf("bundle_section_end: %x \n", $bundle_section_end) if ($debug); + + # move current_bundle_offset to next alignment boundary. + $current_bundle_offset = align_up($end_of_current_bundle,$bundle_alignment); + printf("Adjusting for alignment of next bundle: current_bundle_offset: %x \n\n\n", $current_bundle_offset) if ($debug); + + # seek to the end of the current bundle: + seek(INPUT_FP, $current_bundle_offset, 0); + + # increment the number of bundles listed. + $num_bundles = $num_bundles+1; + + } # End of while loop + +} # End of for each command line argument + +exit(0); diff --git a/projects/clr/hipamd/header_template.hpp.in b/projects/clr/hipamd/header_template.hpp.in new file mode 100644 index 0000000000..2a23abb164 --- /dev/null +++ b/projects/clr/hipamd/header_template.hpp.in @@ -0,0 +1,39 @@ +/* + Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +#ifndef @include_guard@ +#define @include_guard@ + +#if defined(__GNUC__) +#warning "This file is deprecated. Use header files from HIP_PATH/include, where HIP_PATH corresponds to HIP install path" +#else +#pragma message("This file is deprecated. Use header files from HIP_PATH/include, where HIP_PATH corresponds to HIP install path") +#endif + +@include_statements@ + +@hashzero_check@ + +@file_contents@ + +@hash_endif@ + +#endif diff --git a/projects/clr/hipamd/hip-backward-compat.cmake b/projects/clr/hipamd/hip-backward-compat.cmake new file mode 100644 index 0000000000..16348c935a --- /dev/null +++ b/projects/clr/hipamd/hip-backward-compat.cmake @@ -0,0 +1,261 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.16.8) + +set(HIP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) +set(HIP_WRAPPER_DIR ${HIP_BUILD_DIR}/wrapper_dir) +set(HIP_WRAPPER_INC_DIR ${HIP_WRAPPER_DIR}/include/hip) +set(HIP_WRAPPER_BIN_DIR ${HIP_WRAPPER_DIR}/bin) +set(HIP_WRAPPER_LIB_DIR ${HIP_WRAPPER_DIR}/lib) +set(HIP_WRAPPER_CMAKE_DIR ${HIP_WRAPPER_DIR}/cmake) +set(HIP_WRAPPER_FINDHIP_DIR ${HIP_WRAPPER_DIR}/FindHIP) +set(HIP_SRC_INC_DIR ${HIP_SRC_PATH}/include/hip) +set(HIP_SRC_BIN_DIR ${HIP_SRC_PATH}/bin) +set(HIP_INFO_FILE ".hipInfo") + +#Function to set actual file contents in wrapper files +#Some components grep for the contents in the file +function(set_file_contents input_file) + set(hashzero_check "#if 0 +/* The following is a copy of the original file for the benefit of build systems which grep for values + * in this file rather than preprocess it. This is just for backward compatibility */") + + file(READ ${input_file} file_contents) + set(hash_endif "#endif") + get_filename_component(file_name ${input_file} NAME) + configure_file(${HIP_SRC_PATH}/header_template.hpp.in ${HIP_WRAPPER_INC_DIR}/${file_name}) +endfunction() + +#use header template file and generate wrapper header files +function(generate_wrapper_header) +#create respecitve folder in /opt/rocm/hip + file(MAKE_DIRECTORY ${HIP_WRAPPER_INC_DIR}/amd_detail) + file(MAKE_DIRECTORY ${HIP_WRAPPER_INC_DIR}/nvidia_detail) + + #find all header files from include/hip + file(GLOB include_files ${HIP_BUILD_DIR}/include/hip/*.h) + #Convert the list of files into #includes + foreach(header_file ${include_files}) + # set include guard + get_filename_component(INC_GAURD_NAME ${header_file} NAME_WE) + string(TOUPPER ${INC_GAURD_NAME} INC_GAURD_NAME) + set(include_guard "HIP_WRAPPER_INCLUDE_HIP_${INC_GAURD_NAME}_H") + #set #include statement + get_filename_component(file_name ${header_file} NAME) + set(include_statements "#include \"../../../${CMAKE_INSTALL_INCLUDEDIR}/hip/${file_name}\"\n") + if(${file_name} STREQUAL "hip_version.h") + set_file_contents(${header_file}) + else() + configure_file(${HIP_SRC_PATH}/header_template.hpp.in ${HIP_WRAPPER_INC_DIR}/${file_name}) + endif() + endforeach() + + #find all header files from include/hip/amd_detail + file(GLOB include_files ${HIP_SRC_INC_DIR}/amd_detail/*) + #Convert the list of files into #includes + foreach(header_file ${include_files}) + # set include guard + get_filename_component(INC_GAURD_NAME ${header_file} NAME_WE) + string(TOUPPER ${INC_GAURD_NAME} INC_GAURD_NAME) + set(include_guard "HIP_WRAPPER_INCLUDE_HIP_AMD_DETAIL_${INC_GAURD_NAME}_H") + #set #include statement + get_filename_component(file_name ${header_file} NAME) + set(include_statements "#include \"../../../../${CMAKE_INSTALL_INCLUDEDIR}/hip/amd_detail/${file_name}\"\n") + + configure_file(${HIP_SRC_PATH}/header_template.hpp.in ${HIP_WRAPPER_INC_DIR}/amd_detail/${file_name}) + endforeach() + + #find all header files from include/hip/nvidia_detail + file(GLOB include_files ${HIP_SRC_INC_DIR}/nvidia_detail/*) + #Convert the list of files into #includes + foreach(header_file ${include_files}) + # set include guard + get_filename_component(INC_GAURD_NAME ${header_file} NAME_WE) + string(TOUPPER ${INC_GAURD_NAME} INC_GAURD_NAME) + set(include_guard "HIP_WRAPPER_INCLUDE_HIP_NVIDIA_DETAIL_${INC_GAURD_NAME}_H") + #set #include statement + get_filename_component(file_name ${header_file} NAME) + set(include_statements "#include \"../../../../${CMAKE_INSTALL_INCLUDEDIR}/hip/nvidia_detail/${file_name}\"\n") + + configure_file(${HIP_SRC_PATH}/header_template.hpp.in ${HIP_WRAPPER_INC_DIR}/nvidia_detail/${file_name}) + endforeach() + +endfunction() + +#function to create symlink to binaries +function(create_binary_symlink) + file(MAKE_DIRECTORY ${HIP_WRAPPER_BIN_DIR}) + #get all binaries + file(GLOB binary_files ${HIP_SRC_BIN_DIR}/*) + #Add .hipVersion to binary list + set(binary_files "${binary_files}" ".hipVersion") + foreach(binary_file ${binary_files}) + get_filename_component(file_name ${binary_file} NAME) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../${CMAKE_INSTALL_BINDIR}/${file_name} ${HIP_WRAPPER_BIN_DIR}/${file_name}) + endforeach() + + unset(binary_files) + file(GLOB binary_files ${HIP_BUILD_DIR}/bin/*) + foreach(binary_file ${binary_files}) + get_filename_component(file_name ${binary_file} NAME) + if(WIN32) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../${CMAKE_INSTALL_BINDIR}/${file_name} ${HIP_WRAPPER_BIN_DIR}/${file_name}) + + else() + if( NOT ${file_name} MATCHES ".bat$") + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../${CMAKE_INSTALL_BINDIR}/${file_name} ${HIP_WRAPPER_BIN_DIR}/${file_name}) + endif()#end of bat file check + endif()#end of OS check + endforeach() +endfunction() + +#function to create symlink to libraries +function(create_library_symlink) + file(MAKE_DIRECTORY ${HIP_WRAPPER_LIB_DIR}) + if(BUILD_SHARED_LIBS) + set(LIB_AMDHIP "libamdhip64.so") + set(MAJ_VERSION "${HIP_LIB_VERSION_MAJOR}") + set(SO_VERSION "${HIP_LIB_VERSION_STRING}") + set(library_files "${LIB_AMDHIP}" "${LIB_AMDHIP}.${MAJ_VERSION}" "${LIB_AMDHIP}.${SO_VERSION}") + set(LIB_HIPRTC "libhiprtc-builtins.so") + set(library_files "${library_files}" "${LIB_HIPRTC}" "${LIB_HIPRTC}.${MAJ_VERSION}" "${LIB_HIPRTC}.${SO_VERSION}" ) + set(LIB_RTC "libhiprtc.so") + set(library_files "${library_files}" "${LIB_RTC}" "${LIB_RTC}.${MAJ_VERSION}" "${LIB_RTC}.${SO_VERSION}" ) + else() + set(library_files "libamdhip64.a") + endif() + + foreach(file_name ${library_files}) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${HIP_WRAPPER_LIB_DIR}/${file_name}) + endforeach() + #Add symlink for .hipInfo + set(file_name ${HIP_INFO_FILE}) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../${CMAKE_INSTALL_LIBDIR}/${file_name} ${HIP_WRAPPER_LIB_DIR}/${file_name}) +endfunction() + +function(create_cmake_symlink) + file(MAKE_DIRECTORY ${HIP_WRAPPER_CMAKE_DIR}/hip) + + #create symlink to all hip config files + file(GLOB config_files ${HIP_BUILD_DIR}/hip-config*) + foreach(config_name ${config_files}) + get_filename_component(file_name ${config_name} NAME) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../../../${CMAKE_INSTALL_LIBDIR}/cmake/hip/${file_name} ${HIP_WRAPPER_CMAKE_DIR}/hip/${file_name}) + endforeach() + unset(config_files) + + #create symlink to hip-lang + file(MAKE_DIRECTORY ${HIP_WRAPPER_CMAKE_DIR}/hip-lang) + file(GLOB config_files ${HIP_BUILD_DIR}/src/hip-lang-config*) + foreach(config_name ${config_files}) + get_filename_component(file_name ${config_name} NAME) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../../../${CMAKE_INSTALL_LIBDIR}/cmake/hip-lang/${file_name} ${HIP_WRAPPER_CMAKE_DIR}/hip-lang/${file_name}) + endforeach() + unset(config_files) + + #create symlink to hiprtc config files + file(MAKE_DIRECTORY ${HIP_WRAPPER_CMAKE_DIR}/hiprtc) + file(GLOB config_files ${HIP_BUILD_DIR}/hiprtc-config*) + foreach(config_name ${config_files}) + get_filename_component(file_name ${config_name} NAME) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../../../${CMAKE_INSTALL_LIBDIR}/cmake/hiprtc/${file_name} ${HIP_WRAPPER_CMAKE_DIR}/hiprtc/${file_name}) + endforeach() + unset(config_files) + + #create symlink to FindHIP + file(MAKE_DIRECTORY ${HIP_WRAPPER_FINDHIP_DIR}/FindHIP) + file(GLOB config_files ${HIP_BUILD_DIR}/cmake/FindHIP/*.cmake) + foreach(config_name ${config_files}) + get_filename_component(file_name ${config_name} NAME) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../../${CMAKE_INSTALL_LIBDIR}/cmake/hip/FindHIP/${file_name} ${HIP_WRAPPER_FINDHIP_DIR}/FindHIP/${file_name}) + endforeach() + unset(config_files) + + file(GLOB config_files ${HIP_BUILD_DIR}/cmake/*.cmake) + foreach(config_name ${config_files}) + get_filename_component(file_name ${config_name} NAME) + add_custom_target(link_${file_name} ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink + ../../${CMAKE_INSTALL_LIBDIR}/cmake/hip/${file_name} ${HIP_WRAPPER_FINDHIP_DIR}/${file_name}) + endforeach() + unset(config_files) + +endfunction() + +#Use template header file and generater wrapper header files +generate_wrapper_header() +install(DIRECTORY ${HIP_WRAPPER_INC_DIR} DESTINATION hip/include COMPONENT dev) +# Create symlink to binaries +create_binary_symlink() +install(DIRECTORY ${HIP_WRAPPER_BIN_DIR} DESTINATION hip COMPONENT dev) + +option(BUILD_SHARED_LIBS "Build the shared library" ON) +# Create symlink to library files +create_library_symlink() +if(HIP_PLATFORM STREQUAL "amd" ) + if(BUILD_SHARED_LIBS) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libamdhip64.so DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libamdhip64.so.${HIP_LIB_VERSION_MAJOR} DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libamdhip64.so.${HIP_LIB_VERSION_STRING} DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libhiprtc-builtins.so DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libhiprtc-builtins.so.${HIP_LIB_VERSION_MAJOR} DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libhiprtc-builtins.so.${HIP_LIB_VERSION_STRING} DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libhiprtc.so DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libhiprtc.so.${HIP_LIB_VERSION_MAJOR} DESTINATION hip/lib COMPONENT binary) + install(FILES ${HIP_WRAPPER_LIB_DIR}/libhiprtc.so.${HIP_LIB_VERSION_STRING} DESTINATION hip/lib COMPONENT binary) + + else() + install(FILES ${HIP_WRAPPER_LIB_DIR}/libamdhip64.a DESTINATION hip/lib COMPONENT binary) + endif()#End BUILD_SHARED_LIBS +endif()#End HIP_PLATFORM AMD +#install hipInfo +install(FILES ${HIP_WRAPPER_LIB_DIR}/${HIP_INFO_FILE} DESTINATION hip/lib COMPONENT binary) +#create symlink to cmake files +create_cmake_symlink() +install(DIRECTORY ${HIP_WRAPPER_CMAKE_DIR} DESTINATION hip/lib COMPONENT binary) +install(DIRECTORY ${HIP_WRAPPER_FINDHIP_DIR}/ DESTINATION hip/cmake COMPONENT dev) diff --git a/projects/clr/hipamd/hip-config.cmake.in b/projects/clr/hipamd/hip-config.cmake.in new file mode 100755 index 0000000000..537a599244 --- /dev/null +++ b/projects/clr/hipamd/hip-config.cmake.in @@ -0,0 +1,266 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +cmake_minimum_required(VERSION 3.3) + +@PACKAGE_INIT@ +include(CheckCXXCompilerFlag) +include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) +if (NOT _CMakeFindDependencyMacro_FOUND) + macro(find_dependency dep) + if (NOT ${dep}_FOUND) + set(cmake_fd_version) + if (${ARGC} GREATER 1) + set(cmake_fd_version ${ARGV1}) + endif() + set(cmake_fd_exact_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) + set(cmake_fd_exact_arg EXACT) + endif() + set(cmake_fd_quiet_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) + set(cmake_fd_quiet_arg QUIET) + endif() + set(cmake_fd_required_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + set(cmake_fd_required_arg REQUIRED) + endif() + find_package(${dep} ${cmake_fd_version} + ${cmake_fd_exact_arg} + ${cmake_fd_quiet_arg} + ${cmake_fd_required_arg} + ) + string(TOUPPER ${dep} cmake_dep_upper) + if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") + set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) + return() + endif() + set(cmake_fd_version) + set(cmake_fd_required_arg) + set(cmake_fd_quiet_arg) + set(cmake_fd_exact_arg) + endif() + endmacro() +endif() + +set(_HIP_SHELL "SHELL:") +if(CMAKE_VERSION VERSION_LESS 3.12) + set(_HIP_SHELL "") +endif() + +function(hip_add_interface_compile_flags TARGET) + set_property(TARGET ${TARGET} APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS "$<$:${_HIP_SHELL}${ARGN}>" + ) +endfunction() + +function(hip_add_interface_link_flags TARGET) + if(CMAKE_VERSION VERSION_LESS 3.20) + set_property(TARGET ${TARGET} APPEND PROPERTY + INTERFACE_LINK_LIBRARIES "${ARGN}" + ) + else() + set_property(TARGET ${TARGET} APPEND PROPERTY + INTERFACE_LINK_LIBRARIES "$<$:${ARGN}>" + ) + endif() +endfunction() + +#Number of parallel jobs by default is 1 +if(NOT DEFINED HIP_CLANG_NUM_PARALLEL_JOBS) + set(HIP_CLANG_NUM_PARALLEL_JOBS 1) +endif() +set(HIP_COMPILER "@HIP_COMPILER@") +set(HIP_RUNTIME "@HIP_RUNTIME@") + +# NOTE: If hip-config is invoked from /opt/rocm-ver/hip/lib/cmake/hip/ +# then PACKAGE_PREFIX_DIR will resolve to /opt/rocm-ver/hip, which is for backward compatibility +# The following will ensure PACKAGE_PREFIX_DIR will resolves to /opt/rocm-ver +# First find the real path to hip-config file with symlinks resolved +# Real Path : /opt/rocm-ver/lib/cmake/hip/hip-config.cmake +# Then go up 4 levels to get PACKAGE_PREFIX_DIR +# PACKAGE_PREFIX_DIR : /opt/rocm-ver +# TODO:once file reorg backward compatibility is turned off this can be removed. +if(IS_SYMLINK ${CMAKE_CURRENT_LIST_FILE}) + get_filename_component(CONFIG_FILE_PATH "${CMAKE_CURRENT_LIST_FILE}" REALPATH) + get_filename_component(PACKAGE_PREFIX_DIR "${CONFIG_FILE_PATH}/../../../../" ABSOLUTE) +endif() +# end of TODO +set(HIP_PACKAGE_PREFIX_DIR ${PACKAGE_PREFIX_DIR}) + +set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) +set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) +set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) +set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) +if(WIN32) + set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc.bat") + set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig.bat") +else() + set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") + set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") +endif() +# Windows Specific Definition here: +if(WIN32) + if(DEFINED ENV{HIP_PATH}) + file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_PATH) + elseif(DEFINED ENV{HIP_DIR}) + file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR) + else() + # using the HIP found + set(HIP_PATH ${PACKAGE_PREFIX_DIR}) + endif() +else() + # Linux + # If HIP is not installed under ROCm, need this to find HSA assuming HSA is under ROCm + if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH "$ENV{ROCM_PATH}") + endif() + + # set a default path for ROCM_PATH + if(NOT DEFINED ROCM_PATH) + set(ROCM_PATH ${PACKAGE_PREFIX_DIR}) + endif() + +endif() + +if(HIP_COMPILER STREQUAL "clang") + if(WIN32) + # Using SDK folder + file(TO_CMAKE_PATH "${HIP_PATH}" HIP_CLANG_ROOT) + if (NOT EXISTS "${HIP_CLANG_ROOT}/bin/clang.exe") + # if using install folder + file(TO_CMAKE_PATH "${HIP_PATH}/../lc" HIP_CLANG_ROOT) + endif() + else() + set(HIP_CLANG_ROOT "${ROCM_PATH}/llvm") + endif() + if(NOT HIP_CXX_COMPILER) + set(HIP_CXX_COMPILER ${CMAKE_CXX_COMPILER}) + endif() + + if(NOT WIN32) + find_dependency(AMDDeviceLibs) + endif() + set(AMDGPU_TARGETS "gfx900;gfx906;gfx908;gfx90a;gfx1030" CACHE STRING "AMD GPU targets to compile for") + set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") +endif() # HIP_COMPILER check + +if(NOT WIN32) + find_dependency(amd_comgr) +endif() + +include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) + +#Using find_dependency to locate the dependency for the packages +#This makes the cmake generated file xxxx-targets to supply the linker libraries +# without worrying other transitive dependencies +if(NOT WIN32) + find_dependency(hsa-runtime64) + find_dependency(Threads) +endif() + +set(_IMPORT_PREFIX ${HIP_PACKAGE_PREFIX_DIR}) +# Right now this is only supported for amd platforms +set_target_properties(hip::host PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_HCC__=1;__HIP_PLATFORM_AMD__=1" +) + +if(HIP_RUNTIME MATCHES "rocclr") + set_target_properties(hip::amdhip64 PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + ) + + get_target_property(amdhip64_type hip::amdhip64 TYPE) + message(STATUS "hip::amdhip64 is ${amdhip64_type}") + + if(NOT WIN32) + set_target_properties(hip::device PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" + ) + endif() +endif() + +if(HIP_COMPILER STREQUAL "clang") + get_property(compilePropIsSet TARGET hip::device PROPERTY INTERFACE_COMPILE_OPTIONS SET) + + if (NOT compilePropIsSet AND HIP_CXX_COMPILER MATCHES ".*clang\\+\\+") + hip_add_interface_compile_flags(hip::device -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false) + endif() + + if (NOT compilePropIsSet) + hip_add_interface_compile_flags(hip::device -x hip) + endif() + + hip_add_interface_link_flags(hip::device --hip-link) + + foreach(GPU_TARGET ${GPU_TARGETS}) + if (NOT compilePropIsSet) + hip_add_interface_compile_flags(hip::device --offload-arch=${GPU_TARGET}) + endif() + hip_add_interface_link_flags(hip::device --offload-arch=${GPU_TARGET}) + endforeach() + #Add support for parallel build and link + if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + check_cxx_compiler_flag("-parallel-jobs=1" HIP_CLANG_SUPPORTS_PARALLEL_JOBS) + endif() + if(HIP_CLANG_NUM_PARALLEL_JOBS GREATER 1) + if(${HIP_CLANG_SUPPORTS_PARALLEL_JOBS} ) + if (NOT compilePropIsSet) + hip_add_interface_compile_flags(hip::device -parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS} -Wno-format-nonliteral) + endif() + hip_add_interface_link_flags(hip::device -parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS}) + else() + message("clang compiler doesn't support parallel jobs") + endif() + endif() + + + # Use HIP_CXX option -print-libgcc-file-name --rtlib=compiler-rt + # To fetch the compiler rt library file name. + execute_process( + COMMAND ${HIP_CXX_COMPILER} -print-libgcc-file-name --rtlib=compiler-rt + OUTPUT_VARIABLE CLANGRT_BUILTINS + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE CLANGRT_BUILTINS_FETCH_EXIT_CODE) + + # Add support for __fp16 and _Float16, explicitly link with compiler-rt + if( "${CLANGRT_BUILTINS_FETCH_EXIT_CODE}" STREQUAL "0" ) + # CLANG_RT Builtins found Successfully Set interface link libraries property + set_property(TARGET hip::host APPEND PROPERTY INTERFACE_LINK_LIBRARIES "${CLANGRT_BUILTINS}") + set_property(TARGET hip::device APPEND PROPERTY INTERFACE_LINK_LIBRARIES "${CLANGRT_BUILTINS}") + else() + message(STATUS "clangrt builtins lib not found: ${CLANGRT_BUILTINS_FETCH_EXIT_CODE}") + endif() # CLANGRT_BUILTINS_FETCH_EXIT_CODE Check +endif() # HIP_COMPILER Check + +set( hip_LIBRARIES hip::host hip::device) +set( hip_LIBRARY ${hip_LIBRARIES}) + +set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) +set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) +set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) +set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) +set(HIP_LIBRARIES ${hip_LIBRARIES}) +set(HIP_LIBRARY ${hip_LIBRARY}) +set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) +set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) + diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h b/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h new file mode 100644 index 0000000000..5532fec622 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h @@ -0,0 +1,348 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H + +#include +#include +#include + +#ifdef __cplusplus + +extern "C" HIP_PUBLIC_API +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f); + +static inline hipChannelFormatDesc hipCreateChannelDescHalf() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +static inline hipChannelFormatDesc hipCreateChannelDescHalf1() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +static inline hipChannelFormatDesc hipCreateChannelDescHalf2() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template +static inline hipChannelFormatDesc hipCreateChannelDesc() { + return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ // vector3 is the same as vector4 +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned char) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed char) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned short) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed short) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned int) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed int) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(float) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned); +} + +#ifndef __GNUC__ +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned); +} +#endif + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(unsigned long) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned); +} + +template <> +inline hipChannelFormatDesc hipCreateChannelDesc() { + int e = (int)sizeof(signed long) * 8; + return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned); +} + +#else + +struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, + enum hipChannelFormatKind f); + +#endif + +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h new file mode 100644 index 0000000000..57576f57a4 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -0,0 +1,1038 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H + +#include "host_defines.h" +#include "math_fwd.h" + +#if !defined(__HIPCC_RTC__) +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#include +#include + +#if __HIP_CLANG_ONLY__ +extern "C" __device__ int printf(const char *fmt, ...); +#else +template +static inline __device__ void printf(const char* format, All... all) {} +#endif // __HIP_CLANG_ONLY__ + +extern "C" __device__ unsigned long long __ockl_steadyctr_u64(); + +/* +Integer Intrinsics +*/ + +// integer intrinsic function __poc __clz __ffs __brev +__device__ static inline unsigned int __popc(unsigned int input) { + return __builtin_popcount(input); +} +__device__ static inline unsigned int __popcll(unsigned long long int input) { + return __builtin_popcountll(input); +} + +__device__ static inline int __clz(int input) { + return __ockl_clz_u32((uint)input); +} + +__device__ static inline int __clzll(long long int input) { + return __ockl_clz_u64((uint64_t)input); +} + +__device__ static inline unsigned int __ffs(unsigned int input) { + return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; +} + +__device__ static inline unsigned int __ffsll(unsigned long long int input) { + return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1; +} + +__device__ static inline unsigned int __ffs(int input) { + return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1; +} + +__device__ static inline unsigned int __ffsll(long long int input) { + return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1; +} + +// Given a 32/64-bit value exec mask and an integer value base (between 0 and WAVEFRONT_SIZE), +// find the n-th (given by offset) set bit in the exec mask from the base bit, and return the bit position. +// If not found, return -1. +__device__ static int32_t __fns64(uint64_t mask, uint32_t base, int32_t offset) { + uint64_t temp_mask = mask; + int32_t temp_offset = offset; + + if (offset == 0) { + temp_mask &= (1 << base); + temp_offset = 1; + } + else if (offset < 0) { + temp_mask = __builtin_bitreverse64(mask); + base = 63 - base; + temp_offset = -offset; + } + + temp_mask = temp_mask & ((~0ULL) << base); + if (__builtin_popcountll(temp_mask) < temp_offset) + return -1; + int32_t total = 0; + for (int i = 0x20; i > 0; i >>= 1) { + uint64_t temp_mask_lo = temp_mask & ((1ULL << i) - 1); + int32_t pcnt = __builtin_popcountll(temp_mask_lo); + if (pcnt < temp_offset) { + temp_mask = temp_mask >> i; + temp_offset -= pcnt; + total += i; + } + else { + temp_mask = temp_mask_lo; + } + } + if (offset < 0) + return 63 - total; + else + return total; +} + +__device__ static int32_t __fns32(uint64_t mask, uint32_t base, int32_t offset) { + uint64_t temp_mask = mask; + int32_t temp_offset = offset; + if (offset == 0) { + temp_mask &= (1 << base); + temp_offset = 1; + } + else if (offset < 0) { + temp_mask = __builtin_bitreverse64(mask); + base = 63 - base; + temp_offset = -offset; + } + temp_mask = temp_mask & ((~0ULL) << base); + if (__builtin_popcountll(temp_mask) < temp_offset) + return -1; + int32_t total = 0; + for (int i = 0x20; i > 0; i >>= 1) { + uint64_t temp_mask_lo = temp_mask & ((1ULL << i) - 1); + int32_t pcnt = __builtin_popcountll(temp_mask_lo); + if (pcnt < temp_offset) { + temp_mask = temp_mask >> i; + temp_offset -= pcnt; + total += i; + } + else { + temp_mask = temp_mask_lo; + } + } + if (offset < 0) + return 63 - total; + else + return total; +} +__device__ static inline unsigned int __brev(unsigned int input) { + return __builtin_bitreverse32(input); +} + +__device__ static inline unsigned long long int __brevll(unsigned long long int input) { + return __builtin_bitreverse64(input); +} + +__device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) { + return input == 0 ? -1 : __builtin_ctzl(input); +} + +__device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) { + uint32_t offset = src1 & 31; + uint32_t width = src2 & 31; + return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width); +} + +__device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) { + uint64_t offset = src1 & 63; + uint64_t width = src2 & 63; + return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width); +} + +__device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) { + uint32_t offset = src2 & 31; + uint32_t width = src3 & 31; + uint32_t mask = (1 << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + +__device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) { + uint64_t offset = src2 & 63; + uint64_t width = src3 & 63; + uint64_t mask = (1ULL << width) - 1; + return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); +} + +__device__ inline unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift) +{ + uint32_t mask_shift = shift & 31; + return mask_shift == 0 ? hi : __builtin_amdgcn_alignbit(hi, lo, 32 - mask_shift); +} + +__device__ inline unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift) +{ + uint32_t min_shift = shift >= 32 ? 32 : shift; + return min_shift == 0 ? hi : __builtin_amdgcn_alignbit(hi, lo, 32 - min_shift); +} + +__device__ inline unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift) +{ + return __builtin_amdgcn_alignbit(hi, lo, shift); +} + +__device__ inline unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift) +{ + return shift >= 32 ? hi : __builtin_amdgcn_alignbit(hi, lo, shift); +} + +__device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s); +__device__ static unsigned int __hadd(int x, int y); +__device__ static int __mul24(int x, int y); +__device__ static long long int __mul64hi(long long int x, long long int y); +__device__ static int __mulhi(int x, int y); +__device__ static int __rhadd(int x, int y); +__device__ static unsigned int __sad(int x, int y,unsigned int z); +__device__ static unsigned int __uhadd(unsigned int x, unsigned int y); +__device__ static int __umul24(unsigned int x, unsigned int y); +__device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); +__device__ static unsigned int __umulhi(unsigned int x, unsigned int y); +__device__ static unsigned int __urhadd(unsigned int x, unsigned int y); +__device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z); + +struct ucharHolder { + union { + unsigned char c[4]; + unsigned int ui; + }; +} __attribute__((aligned(4))); + +struct uchar2Holder { + union { + unsigned int ui[2]; + unsigned char c[8]; + }; +} __attribute__((aligned(8))); + +__device__ +static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { + struct uchar2Holder cHoldVal; + struct ucharHolder cHoldKey; + cHoldKey.ui = s; + cHoldVal.ui[0] = x; + cHoldVal.ui[1] = y; + unsigned int result; + result = cHoldVal.c[cHoldKey.c[0] & 0x07]; + result += (cHoldVal.c[(cHoldKey.c[0] & 0x70) >> 4] << 8); + result += (cHoldVal.c[cHoldKey.c[1] & 0x07] << 16); + result += (cHoldVal.c[(cHoldKey.c[1] & 0x70) >> 4] << 24); + return result; +} + +__device__ static inline unsigned int __hadd(int x, int y) { + int z = x + y; + int sign = z & 0x8000000; + int value = z & 0x7FFFFFFF; + return ((value) >> 1 || sign); +} + +__device__ static inline int __mul24(int x, int y) { + return __ockl_mul24_i32(x, y); +} + +__device__ static inline long long __mul64hi(long long int x, long long int y) { + ulong x0 = (ulong)x & 0xffffffffUL; + long x1 = x >> 32; + ulong y0 = (ulong)y & 0xffffffffUL; + long y1 = y >> 32; + ulong z0 = x0*y0; + long t = x1*y0 + (z0 >> 32); + long z1 = t & 0xffffffffL; + long z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); +} + +__device__ static inline int __mulhi(int x, int y) { + return __ockl_mul_hi_i32(x, y); +} + +__device__ static inline int __rhadd(int x, int y) { + int z = x + y + 1; + int sign = z & 0x8000000; + int value = z & 0x7FFFFFFF; + return ((value) >> 1 || sign); +} +__device__ static inline unsigned int __sad(int x, int y, unsigned int z) { + return x > y ? x - y + z : y - x + z; +} +__device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { + return (x + y) >> 1; +} +__device__ static inline int __umul24(unsigned int x, unsigned int y) { + return __ockl_mul24_u32(x, y); +} + +__device__ +static inline unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) { + ulong x0 = x & 0xffffffffUL; + ulong x1 = x >> 32; + ulong y0 = y & 0xffffffffUL; + ulong y1 = y >> 32; + ulong z0 = x0*y0; + ulong t = x1*y0 + (z0 >> 32); + ulong z1 = t & 0xffffffffUL; + ulong z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); +} + +__device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) { + return __ockl_mul_hi_u32(x, y); +} +__device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { + return (x + y + 1) >> 1; +} +__device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { + return __ockl_sadd_u32(x, y, z); +} + +__device__ static inline unsigned int __lane_id() { + return __builtin_amdgcn_mbcnt_hi( + -1, __builtin_amdgcn_mbcnt_lo(-1, 0)); +} + +__device__ +static inline unsigned int __mbcnt_lo(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_lo(x,y);}; + +__device__ +static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_hi(x,y);}; + +/* +HIP specific device functions +*/ + +#if !defined(__HIPCC_RTC__) +#include "amd_warp_functions.h" +#endif + +#define MASK1 0x00ff00ff +#define MASK2 0xff00ff00 + +__device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 + one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 + one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 - one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 - one2) & MASK2); + return out; +} + +__device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) { + char4 out; + unsigned one1 = in1.w & MASK1; + unsigned one2 = in2.w & MASK1; + out.w = (one1 * one2) & MASK1; + one1 = in1.w & MASK2; + one2 = in2.w & MASK2; + out.w = out.w | ((one1 * one2) & MASK2); + return out; +} + +__device__ static inline float __double2float_rd(double x) { + return __ocml_cvtrtn_f32_f64(x); +} +__device__ static inline float __double2float_rn(double x) { return x; } +__device__ static inline float __double2float_ru(double x) { + return __ocml_cvtrtp_f32_f64(x); +} +__device__ static inline float __double2float_rz(double x) { + return __ocml_cvtrtz_f32_f64(x); +} + +__device__ static inline int __double2hiint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); + + return tmp[1]; +} +__device__ static inline int __double2loint(double x) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + + int tmp[2]; + __builtin_memcpy(tmp, &x, sizeof(tmp)); + + return tmp[0]; +} + +__device__ static inline int __double2int_rd(double x) { return (int)__ocml_floor_f64(x); } +__device__ static inline int __double2int_rn(double x) { return (int)__ocml_rint_f64(x); } +__device__ static inline int __double2int_ru(double x) { return (int)__ocml_ceil_f64(x); } +__device__ static inline int __double2int_rz(double x) { return (int)x; } + +__device__ static inline long long int __double2ll_rd(double x) { + return (long long)__ocml_floor_f64(x); +} +__device__ static inline long long int __double2ll_rn(double x) { + return (long long)__ocml_rint_f64(x); +} +__device__ static inline long long int __double2ll_ru(double x) { + return (long long)__ocml_ceil_f64(x); +} +__device__ static inline long long int __double2ll_rz(double x) { return (long long)x; } + +__device__ static inline unsigned int __double2uint_rd(double x) { + return (unsigned int)__ocml_floor_f64(x); +} +__device__ static inline unsigned int __double2uint_rn(double x) { + return (unsigned int)__ocml_rint_f64(x); +} +__device__ static inline unsigned int __double2uint_ru(double x) { + return (unsigned int)__ocml_ceil_f64(x); +} +__device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; } + +__device__ static inline unsigned long long int __double2ull_rd(double x) { + return (unsigned long long int)__ocml_floor_f64(x); +} +__device__ static inline unsigned long long int __double2ull_rn(double x) { + return (unsigned long long int)__ocml_rint_f64(x); +} +__device__ static inline unsigned long long int __double2ull_ru(double x) { + return (unsigned long long int)__ocml_ceil_f64(x); +} +__device__ static inline unsigned long long int __double2ull_rz(double x) { + return (unsigned long long int)x; +} +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +__device__ static inline long long int __double_as_longlong(double x) { + static_assert(sizeof(long long) == sizeof(double), ""); + + long long tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} +#pragma clang diagnostic pop + +/* +__device__ unsigned short __float2half_rn(float x); +__device__ float __half2float(unsigned short); + +The above device function are not a valid . +Use +__device__ __half __float2half_rn(float x); +__device__ float __half2float(__half); +from hip_fp16.h + +CUDA implements half as unsigned short whereas, HIP doesn't. + +*/ + +__device__ static inline int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); } +__device__ static inline int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); } +__device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); } +__device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); } + +__device__ static inline long long int __float2ll_rd(float x) { + return (long long int)__ocml_floor_f32(x); +} +__device__ static inline long long int __float2ll_rn(float x) { + return (long long int)__ocml_rint_f32(x); +} +__device__ static inline long long int __float2ll_ru(float x) { + return (long long int)__ocml_ceil_f32(x); +} +__device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; } + +__device__ static inline unsigned int __float2uint_rd(float x) { + return (unsigned int)__ocml_floor_f32(x); +} +__device__ static inline unsigned int __float2uint_rn(float x) { + return (unsigned int)__ocml_rint_f32(x); +} +__device__ static inline unsigned int __float2uint_ru(float x) { + return (unsigned int)__ocml_ceil_f32(x); +} +__device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; } + +__device__ static inline unsigned long long int __float2ull_rd(float x) { + return (unsigned long long int)__ocml_floor_f32(x); +} +__device__ static inline unsigned long long int __float2ull_rn(float x) { + return (unsigned long long int)__ocml_rint_f32(x); +} +__device__ static inline unsigned long long int __float2ull_ru(float x) { + return (unsigned long long int)__ocml_ceil_f32(x); +} +__device__ static inline unsigned long long int __float2ull_rz(float x) { + return (unsigned long long int)x; +} + +__device__ static inline int __float_as_int(float x) { + static_assert(sizeof(int) == sizeof(float), ""); + + int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline unsigned int __float_as_uint(float x) { + static_assert(sizeof(unsigned int) == sizeof(float), ""); + + unsigned int tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __hiloint2double(int hi, int lo) { + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + uint64_t tmp0 = (static_cast(hi) << 32ull) | static_cast(lo); + double tmp1; + __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + + return tmp1; +} + +__device__ static inline double __int2double_rn(int x) { return (double)x; } + +__device__ static inline float __int2float_rd(int x) { + return __ocml_cvtrtn_f32_s32(x); +} +__device__ static inline float __int2float_rn(int x) { return (float)x; } +__device__ static inline float __int2float_ru(int x) { + return __ocml_cvtrtp_f32_s32(x); +} +__device__ static inline float __int2float_rz(int x) { + return __ocml_cvtrtz_f32_s32(x); +} + +__device__ static inline float __int_as_float(int x) { + static_assert(sizeof(float) == sizeof(int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ll2double_rd(long long int x) { + return __ocml_cvtrtn_f64_s64(x); +} +__device__ static inline double __ll2double_rn(long long int x) { return (double)x; } +__device__ static inline double __ll2double_ru(long long int x) { + return __ocml_cvtrtp_f64_s64(x); +} +__device__ static inline double __ll2double_rz(long long int x) { + return __ocml_cvtrtz_f64_s64(x); +} + +__device__ static inline float __ll2float_rd(long long int x) { + return __ocml_cvtrtn_f32_s64(x); +} +__device__ static inline float __ll2float_rn(long long int x) { return (float)x; } +__device__ static inline float __ll2float_ru(long long int x) { + return __ocml_cvtrtp_f32_s64(x); +} +__device__ static inline float __ll2float_rz(long long int x) { + return __ocml_cvtrtz_f32_s64(x); +} + +__device__ static inline double __longlong_as_double(long long int x) { + static_assert(sizeof(double) == sizeof(long long), ""); + + double tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __uint2double_rn(int x) { return (double)x; } + +__device__ static inline float __uint2float_rd(unsigned int x) { + return __ocml_cvtrtn_f32_u32(x); +} +__device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_ru(unsigned int x) { + return __ocml_cvtrtp_f32_u32(x); +} +__device__ static inline float __uint2float_rz(unsigned int x) { + return __ocml_cvtrtz_f32_u32(x); +} + +__device__ static inline float __uint_as_float(unsigned int x) { + static_assert(sizeof(float) == sizeof(unsigned int), ""); + + float tmp; + __builtin_memcpy(&tmp, &x, sizeof(tmp)); + + return tmp; +} + +__device__ static inline double __ull2double_rd(unsigned long long int x) { + return __ocml_cvtrtn_f64_u64(x); +} +__device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_ru(unsigned long long int x) { + return __ocml_cvtrtp_f64_u64(x); +} +__device__ static inline double __ull2double_rz(unsigned long long int x) { + return __ocml_cvtrtz_f64_u64(x); +} + +__device__ static inline float __ull2float_rd(unsigned long long int x) { + return __ocml_cvtrtn_f32_u64(x); +} +__device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_ru(unsigned long long int x) { + return __ocml_cvtrtp_f32_u64(x); +} +__device__ static inline float __ull2float_rz(unsigned long long int x) { + return __ocml_cvtrtz_f32_u64(x); +} + +#if __HIP_CLANG_ONLY__ + +// Clock functions +__device__ long long int __clock64(); +__device__ long long int __clock(); +__device__ long long int clock64(); +__device__ long long int clock(); +__device__ long long int wall_clock64(); +// hip.amdgcn.bc - named sync +__device__ void __named_sync(); + +#ifdef __HIP_DEVICE_COMPILE__ + +// Clock function to return GPU core cycle count. +// GPU can change its core clock frequency at runtime. The maximum frequency can be queried +// through hipDeviceAttributeClockRate attribute. +__device__ +inline __attribute((always_inline)) +long long int __clock64() { +#if __has_builtin(__builtin_amdgcn_s_memtime) + // Exists on gfx8, gfx9, gfx10.1, gfx10.2, gfx10.3 + return (long long int) __builtin_amdgcn_s_memtime(); +#else + // Subject to change when better solution available + return (long long int) __builtin_readcyclecounter(); +#endif +} + +__device__ +inline __attribute((always_inline)) +long long int __clock() { return __clock64(); } + +// Clock function to return wall clock count at a constant frequency that can be queried +// through hipDeviceAttributeWallClockRate attribute. +__device__ +inline __attribute__((always_inline)) +long long int wall_clock64() { + return (long long int) __ockl_steadyctr_u64(); +} + +__device__ +inline __attribute__((always_inline)) +long long int clock64() { return __clock64(); } + +__device__ +inline __attribute__((always_inline)) +long long int clock() { return __clock(); } + +// hip.amdgcn.bc - named sync +__device__ +inline +void __named_sync() { __builtin_amdgcn_s_barrier(); } + +#endif // __HIP_DEVICE_COMPILE__ + +// warp vote function __all __any __ballot +__device__ +inline +int __all(int predicate) { + return __ockl_wfall_i32(predicate); +} + +__device__ +inline +int __any(int predicate) { + return __ockl_wfany_i32(predicate); +} + +// XXX from llvm/include/llvm/IR/InstrTypes.h +#define ICMP_NE 33 + +__device__ +inline +unsigned long long int __ballot(int predicate) { + return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE); +} + +__device__ +inline +unsigned long long int __ballot64(int predicate) { + return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE); +} + +// hip.amdgcn.bc - lanemask +__device__ +inline +uint64_t __lanemask_gt() +{ + uint32_t lane = __ockl_lane_u32(); + if (lane == 63) + return 0; + uint64_t ballot = __ballot64(1); + uint64_t mask = (~((uint64_t)0)) << (lane + 1); + return mask & ballot; +} + +__device__ +inline +uint64_t __lanemask_lt() +{ + uint32_t lane = __ockl_lane_u32(); + int64_t ballot = __ballot64(1); + uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1; + return mask & ballot; +} + +__device__ +inline +uint64_t __lanemask_eq() +{ + uint32_t lane = __ockl_lane_u32(); + int64_t mask = ((uint64_t)1 << lane); + return mask; +} + + +__device__ inline void* __local_to_generic(void* p) { return p; } + +#ifdef __HIP_DEVICE_COMPILE__ +__device__ +inline +void* __get_dynamicgroupbaseptr() +{ + // Get group segment base pointer. + return (char*)__local_to_generic((void*)__to_local(__builtin_amdgcn_groupstaticsize())); +} +#else +__device__ +void* __get_dynamicgroupbaseptr(); +#endif // __HIP_DEVICE_COMPILE__ + +__device__ +inline +void *__amdgcn_get_dynamicgroupbaseptr() { + return __get_dynamicgroupbaseptr(); +} + +// Memory Fence Functions +__device__ +inline +static void __threadfence() +{ + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); +} + +__device__ +inline +static void __threadfence_block() +{ + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); +} + +__device__ +inline +static void __threadfence_system() +{ + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); +} + +// abort +__device__ +inline +__attribute__((weak)) +void abort() { + return __builtin_trap(); +} + +// The noinline attribute helps encapsulate the printf expansion, +// which otherwise has a performance impact just by increasing the +// size of the calling function. Additionally, the weak attribute +// allows the function to exist as a global although its definition is +// included in every compilation unit. +#if defined(_WIN32) || defined(_WIN64) +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) +void _wassert(const wchar_t *_msg, const wchar_t *_file, unsigned _line) { + // FIXME: Need `wchar_t` support to generate assertion message. + __builtin_trap(); +} +#else /* defined(_WIN32) || defined(_WIN64) */ +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) +void __assert_fail(const char *assertion, + const char *file, + unsigned int line, + const char *function) +{ + const char fmt[] = "%s:%u: %s: Device-side assertion `%s' failed.\n"; + + // strlen is not available as a built-in yet, so we create our own + // loop in a macro. With a string literal argument, the compiler + // usually manages to replace the loop with a constant. + // + // The macro does not check for null pointer, since all the string + // arguments are defined to be constant literals when called from + // the assert() macro. + // + // NOTE: The loop below includes the null terminator in the length + // as required by append_string_n(). +#define __hip_get_string_length(LEN, STR) \ + do { \ + const char *tmp = STR; \ + while (*tmp++); \ + LEN = tmp - STR; \ + } while (0) + + auto msg = __ockl_fprintf_stderr_begin(); + int len = 0; + __hip_get_string_length(len, fmt); + msg = __ockl_fprintf_append_string_n(msg, fmt, len, 0); + __hip_get_string_length(len, file); + msg = __ockl_fprintf_append_string_n(msg, file, len, 0); + msg = __ockl_fprintf_append_args(msg, 1, line, 0, 0, 0, 0, 0, 0, 0); + __hip_get_string_length(len, function); + msg = __ockl_fprintf_append_string_n(msg, function, len, 0); + __hip_get_string_length(len, assertion); + __ockl_fprintf_append_string_n(msg, assertion, len, /* is_last = */ 1); + +#undef __hip_get_string_length + + __builtin_trap(); +} + +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) +void __assertfail() +{ + // ignore all the args for now. + __builtin_trap(); +} +#endif /* defined(_WIN32) || defined(_WIN64) */ + +__device__ inline static void __work_group_barrier(__cl_mem_fence_flags flags) { + if (flags) { + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup"); + __builtin_amdgcn_s_barrier(); + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup"); + } else { + __builtin_amdgcn_s_barrier(); + } +} + +__device__ +inline +static void __barrier(int n) +{ + __work_group_barrier((__cl_mem_fence_flags)n); +} + +__device__ +inline +__attribute__((convergent)) +void __syncthreads() +{ + __barrier(__CLK_LOCAL_MEM_FENCE); +} + +__device__ +inline +__attribute__((convergent)) +int __syncthreads_count(int predicate) +{ + return __ockl_wgred_add_i32(!!predicate); +} + +__device__ +inline +__attribute__((convergent)) +int __syncthreads_and(int predicate) +{ + return __ockl_wgred_and_i32(!!predicate); +} + +__device__ +inline +__attribute__((convergent)) +int __syncthreads_or(int predicate) +{ + return __ockl_wgred_or_i32(!!predicate); +} + +// hip.amdgcn.bc - device routine +/* + HW_ID Register bit structure + WAVE_ID 3:0 Wave buffer slot number. 0-9. + SIMD_ID 5:4 SIMD which the wave is assigned to within the CU. + PIPE_ID 7:6 Pipeline from which the wave was dispatched. + CU_ID 11:8 Compute Unit the wave is assigned to. + SH_ID 12 Shader Array (within an SE) the wave is assigned to. + SE_ID 15:13 Shader Engine the wave is assigned to. + TG_ID 19:16 Thread-group ID + VM_ID 23:20 Virtual Memory ID + QUEUE_ID 26:24 Queue from which this wave was dispatched. + STATE_ID 29:27 State ID (graphics only, not compute). + ME_ID 31:30 Micro-engine ID. + */ + +#define HW_ID 4 + +#define HW_ID_CU_ID_SIZE 4 +#define HW_ID_CU_ID_OFFSET 8 + +#define HW_ID_SE_ID_SIZE 3 +#define HW_ID_SE_ID_OFFSET 13 + +/* + Encoding of parameter bitmask + HW_ID 5:0 HW_ID + OFFSET 10:6 Range: 0..31 + SIZE 15:11 Range: 1..32 + */ + +#define GETREG_IMMED(SZ,OFF,REG) (((SZ) << 11) | ((OFF) << 6) | (REG)) + +/* + __smid returns the wave's assigned Compute Unit and Shader Engine. + The Compute Unit, CU_ID returned in bits 3:0, and Shader Engine, SE_ID in bits 5:4. + Note: the results vary over time. + SZ minus 1 since SIZE is 1-based. +*/ +__device__ +inline +unsigned __smid(void) +{ + unsigned cu_id = __builtin_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_CU_ID_SIZE-1, HW_ID_CU_ID_OFFSET, HW_ID)); + unsigned se_id = __builtin_amdgcn_s_getreg( + GETREG_IMMED(HW_ID_SE_ID_SIZE-1, HW_ID_SE_ID_OFFSET, HW_ID)); + + /* Each shader engine has 16 CU */ + return (se_id << HW_ID_CU_ID_SIZE) + cu_id; +} + +/** + * Map HIP_DYNAMIC_SHARED to "extern __shared__" for compatibility with old HIP applications + * To be removed in a future release. + */ +#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + +#endif //defined(__clang__) && defined(__HIP__) + + +// loop unrolling +static inline __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size) { + auto dstPtr = static_cast(dst); + auto srcPtr = static_cast(src); + + while (size >= 4u) { + dstPtr[0] = srcPtr[0]; + dstPtr[1] = srcPtr[1]; + dstPtr[2] = srcPtr[2]; + dstPtr[3] = srcPtr[3]; + + size -= 4u; + srcPtr += 4u; + dstPtr += 4u; + } + switch (size) { + case 3: + dstPtr[2] = srcPtr[2]; + case 2: + dstPtr[1] = srcPtr[1]; + case 1: + dstPtr[0] = srcPtr[0]; + } + + return dst; +} + +static inline __device__ void* __hip_hc_memset(void* dst, unsigned char val, size_t size) { + auto dstPtr = static_cast(dst); + + while (size >= 4u) { + dstPtr[0] = val; + dstPtr[1] = val; + dstPtr[2] = val; + dstPtr[3] = val; + + size -= 4u; + dstPtr += 4u; + } + switch (size) { + case 3: + dstPtr[2] = val; + case 2: + dstPtr[1] = val; + case 1: + dstPtr[0] = val; + } + + return dst; +} +#ifndef __OPENMP_AMDGCN__ +static inline __device__ void* memcpy(void* dst, const void* src, size_t size) { + return __hip_hc_memcpy(dst, src, size); +} + +static inline __device__ void* memset(void* ptr, int val, size_t size) { + unsigned char val8 = static_cast(val); + return __hip_hc_memset(ptr, val8, size); +} +#endif // !__OPENMP_AMDGCN__ +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h new file mode 100644 index 0000000000..869f495c89 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h @@ -0,0 +1,1082 @@ +/* +Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "amd_device_functions.h" + +#if __has_builtin(__hip_atomic_compare_exchange_strong) + +#if !__HIP_DEVICE_COMPILE__ +//TODO: Remove this after compiler pre-defines the following Macros. +#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1 +#define __HIP_MEMORY_SCOPE_WAVEFRONT 2 +#define __HIP_MEMORY_SCOPE_WORKGROUP 3 +#define __HIP_MEMORY_SCOPE_AGENT 4 +#define __HIP_MEMORY_SCOPE_SYSTEM 5 +#endif + +#if !defined(__HIPCC_RTC__) +#include "amd_hip_unsafe_atomics.h" +#endif + +__device__ +inline +int atomicCAS(int* address, int compare, int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +int atomicCAS_system(int* address, int compare, int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +unsigned long atomicCAS(unsigned long* address, unsigned long compare, unsigned long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, unsigned long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare, + unsigned long long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare, + unsigned long long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +float atomicCAS(float* address, float compare, float val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +float atomicCAS_system(float* address, float compare, float val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +double atomicCAS(double* address, double compare, double val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +double atomicCAS_system(double* address, double compare, double val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +int atomicAdd(int* address, int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicAdd_system(int* address, int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicAdd(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicAdd_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long atomicAdd(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicAdd_system(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +float atomicAdd(float* address, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, val); +#else + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif +} + +__device__ +inline +float atomicAdd_system(float* address, float val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +#if !defined(__HIPCC_RTC__) +DEPRECATED("use atomicAdd instead") +#endif // !defined(__HIPCC_RTC__) +__device__ +inline +void atomicAddNoRet(float* address, float val) +{ + __ockl_atomic_add_noret_f32(address, val); +} + +__device__ +inline +double atomicAdd(double* address, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, val); +#else + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif +} + +__device__ +inline +double atomicAdd_system(double* address, double val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicSub(int* address, int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicSub_system(int* address, int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicSub(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicSub_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long atomicSub(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicSub_system(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicSub(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicSub_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +float atomicSub(float* address, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, -val); +#else + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif +} + +__device__ +inline +float atomicSub_system(float* address, float val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +double atomicSub(double* address, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, -val); +#else + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif +} + +__device__ +inline +double atomicSub_system(double* address, double val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicExch(int* address, int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicExch_system(int* address, int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicExch(unsigned int* address, unsigned int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicExch_system(unsigned int* address, unsigned int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long atomicExch(unsigned long* address, unsigned long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicExch_system(unsigned long* address, unsigned long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicExch(unsigned long long* address, unsigned long long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +float atomicExch(float* address, float val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +float atomicExch_system(float* address, float val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +double atomicExch(double* address, double val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +double atomicExch_system(double* address, double val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicMin(int* address, int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicMin_system(int* address, int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicMin(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicMin_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicMin(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicMin_system(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicMin(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +float atomicMin(float* addr, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMin(addr, val); +#else + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned int *uaddr = (unsigned int *)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __uint_as_float(value); + #endif +#endif +} + +__device__ +inline +float atomicMin_system(float* address, float val) { + unsigned int* uaddr { reinterpret_cast(address) }; + #if __has_builtin(__hip_atomic_load) + unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)}; + #else + unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + #endif + float value = __uint_as_float(tmp); + + while (val < value) { + value = atomicCAS_system(address, value, val); + } + + return value; +} + +__device__ +inline +double atomicMin(double* addr, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMin(addr, val); +#else + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned long long *uaddr = (unsigned long long *)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); + #endif +#endif +} + +__device__ +inline +double atomicMin_system(double* address, double val) { + unsigned long long* uaddr { reinterpret_cast(address) }; + #if __has_builtin(__hip_atomic_load) + unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)}; + #else + unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + #endif + double value = __longlong_as_double(tmp); + + while (val < value) { + value = atomicCAS_system(address, value, val); + } + + return value; +} + +__device__ +inline +int atomicMax(int* address, int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicMax_system(int* address, int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicMax(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicMax_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long atomicMax(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicMax_system(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicMax(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +float atomicMax(float* addr, float val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMax(addr, val); +#else + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned int *uaddr = (unsigned int *)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __uint_as_float(value); + #endif +#endif +} + +__device__ +inline +float atomicMax_system(float* address, float val) { + unsigned int* uaddr { reinterpret_cast(address) }; + #if __has_builtin(__hip_atomic_load) + unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)}; + #else + unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + #endif + float value = __uint_as_float(tmp); + + while (value < val) { + value = atomicCAS_system(address, value, val); + } + + return value; +} + +__device__ +inline +double atomicMax(double* addr, double val) { +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicMax(addr, val); +#else + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned long long *uaddr = (unsigned long long *)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); + #endif +#endif +} + +__device__ +inline +double atomicMax_system(double* address, double val) { + unsigned long long* uaddr { reinterpret_cast(address) }; + #if __has_builtin(__hip_atomic_load) + unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)}; + #else + unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; + #endif + double value = __longlong_as_double(tmp); + + while (value < val) { + value = atomicCAS_system(address, value, val); + } + + return value; +} + +__device__ +inline +unsigned int atomicInc(unsigned int* address, unsigned int val) +{ + return __builtin_amdgcn_atomic_inc32( + address, val, __ATOMIC_RELAXED, "agent"); +} + +__device__ +inline +unsigned int atomicDec(unsigned int* address, unsigned int val) +{ + return __builtin_amdgcn_atomic_dec32( + address, val, __ATOMIC_RELAXED, "agent"); +} + +__device__ +inline +int atomicAnd(int* address, int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicAnd_system(int* address, int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicAnd(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicAnd_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long atomicAnd(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicAnd_system(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicOr(int* address, int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicOr_system(int* address, int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicOr(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicOr_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long atomicOr(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicOr_system(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicOr(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicXor(int* address, int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicXor_system(int* address, int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicXor(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicXor_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long atomicXor(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long atomicXor_system(unsigned long* address, unsigned long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicXor(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +#else + +__device__ +inline +int atomicCAS(int* address, int compare, int val) +{ + __atomic_compare_exchange_n( + address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + return compare; +} +__device__ +inline +unsigned int atomicCAS( + unsigned int* address, unsigned int compare, unsigned int val) +{ + __atomic_compare_exchange_n( + address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + return compare; +} +__device__ +inline +unsigned long long atomicCAS( + unsigned long long* address, + unsigned long long compare, + unsigned long long val) +{ + __atomic_compare_exchange_n( + address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + return compare; +} + +__device__ +inline +int atomicAdd(int* address, int val) +{ + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicAdd(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicAdd( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +float atomicAdd(float* address, float val) +{ +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, val); +#else + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +#endif +} + +#if !defined(__HIPCC_RTC__) +DEPRECATED("use atomicAdd instead") +#endif // !defined(__HIPCC_RTC__) +__device__ +inline +void atomicAddNoRet(float* address, float val) +{ + __ockl_atomic_add_noret_f32(address, val); +} + +__device__ +inline +double atomicAdd(double* address, double val) +{ +#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__) + return unsafeAtomicAdd(address, val); +#else + return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +#endif +} + +__device__ +inline +int atomicSub(int* address, int val) +{ + return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicSub(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED); +} + +__device__ +inline +int atomicExch(int* address, int val) +{ + return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicExch(unsigned int* address, unsigned int val) +{ + return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicExch(unsigned long long* address, unsigned long long val) +{ + return __atomic_exchange_n(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +float atomicExch(float* address, float val) +{ + return __uint_as_float(__atomic_exchange_n( + reinterpret_cast(address), + __float_as_uint(val), + __ATOMIC_RELAXED)); +} + +__device__ +inline +int atomicMin(int* address, int val) +{ + return __atomic_fetch_min(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicMin(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_min(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicMin( + unsigned long long* address, unsigned long long val) +{ + unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; + while (val < tmp) { + const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED); + + if (tmp1 != tmp) { tmp = tmp1; continue; } + + tmp = atomicCAS(address, tmp, val); + } + + return tmp; +} + +__device__ +inline +int atomicMax(int* address, int val) +{ + return __atomic_fetch_max(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicMax(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_max(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicMax( + unsigned long long* address, unsigned long long val) +{ + unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; + while (tmp < val) { + const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED); + + if (tmp1 != tmp) { tmp = tmp1; continue; } + + tmp = atomicCAS(address, tmp, val); + } + + return tmp; +} + +__device__ +inline +unsigned int atomicInc(unsigned int* address, unsigned int val) +{ + return __builtin_amdgcn_atomic_inc32( + address, val, __ATOMIC_RELAXED, "agent"); +} + +__device__ +inline +unsigned int atomicDec(unsigned int* address, unsigned int val) +{ + return __builtin_amdgcn_atomic_dec32( + address, val, __ATOMIC_RELAXED, "agent"); +} + +__device__ +inline +int atomicAnd(int* address, int val) +{ + return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicAnd(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicAnd( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_and(address, val, __ATOMIC_RELAXED); +} + +__device__ +inline +int atomicOr(int* address, int val) +{ + return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicOr(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicOr( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_or(address, val, __ATOMIC_RELAXED); +} + +__device__ +inline +int atomicXor(int* address, int val) +{ + return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned int atomicXor(unsigned int* address, unsigned int val) +{ + return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); +} +__device__ +inline +unsigned long long atomicXor( + unsigned long long* address, unsigned long long val) +{ + return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); +} + +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h new file mode 100644 index 0000000000..deb3bfb7e2 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h @@ -0,0 +1,293 @@ +/** + * MIT License + * + * Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/*!\file + * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef + */ + +#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BFLOAT16_H_ +#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BFLOAT16_H_ + +#include "host_defines.h" +#if defined(__HIPCC_RTC__) + #define __HOST_DEVICE__ __device__ +#else + #define __HOST_DEVICE__ __host__ __device__ +#endif + +#if __cplusplus < 201103L || !defined(__HIPCC__) + +// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only +// include a minimal definition of hip_bfloat16 + +#include +/*! \brief Struct to represent a 16 bit brain floating point number. */ +typedef struct +{ + uint16_t data; +} hip_bfloat16; + +#else // __cplusplus < 201103L || !defined(__HIPCC__) + +#include + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" +struct hip_bfloat16 +{ + __hip_uint16_t data; + + enum truncate_t + { + truncate + }; + + __HOST_DEVICE__ hip_bfloat16() = default; + + // round upper 16 bits of IEEE float to convert to bfloat16 + explicit __HOST_DEVICE__ hip_bfloat16(float f) + : data(float_to_bfloat16(f)) + { + } + + explicit __HOST_DEVICE__ hip_bfloat16(float f, truncate_t) + : data(truncate_float_to_bfloat16(f)) + { + } + + // zero extend lower 16 bits of bfloat16 to convert to IEEE float + __HOST_DEVICE__ operator float() const + { + union + { + uint32_t int32; + float fp32; + } u = {uint32_t(data) << 16}; + return u.fp32; + } + + __HOST_DEVICE__ hip_bfloat16 &operator=(const float& f) + { + data = float_to_bfloat16(f); + return *this; + } + + static __HOST_DEVICE__ hip_bfloat16 round_to_bfloat16(float f) + { + hip_bfloat16 output; + output.data = float_to_bfloat16(f); + return output; + } + + static __HOST_DEVICE__ hip_bfloat16 round_to_bfloat16(float f, truncate_t) + { + hip_bfloat16 output; + output.data = truncate_float_to_bfloat16(f); + return output; + } + +private: + static __HOST_DEVICE__ __hip_uint16_t float_to_bfloat16(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + if(~u.int32 & 0x7f800000) + { + // When the exponent bits are not all 1s, then the value is zero, normal, + // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus + // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd). + // This causes the bfloat16's mantissa to be incremented by 1 if the 16 + // least significant bits of the float mantissa are greater than 0x8000, + // or if they are equal to 0x8000 and the least significant bit of the + // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when + // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already + // has the value 0x7f, then incrementing it causes it to become 0x00 and + // the exponent is incremented by one, which is the next higher FP value + // to the unrounded bfloat16 value. When the bfloat16 value is subnormal + // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up + // to a normal value with an exponent of 0x01 and a mantissa of 0x00. + // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, + // incrementing it causes it to become an exponent of 0xFF and a mantissa + // of 0x00, which is Inf, the next higher value to the unrounded value. + u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even + } + else if(u.int32 & 0xffff) + { + // When all of the exponent bits are 1, the value is Inf or NaN. + // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero + // mantissa bit. Quiet NaN is indicated by the most significant mantissa + // bit being 1. Signaling NaN is indicated by the most significant + // mantissa bit being 0 but some other bit(s) being 1. If any of the + // lower 16 bits of the mantissa are 1, we set the least significant bit + // of the bfloat16 mantissa, in order to preserve signaling NaN in case + // the bloat16's mantissa bits are all 0. + u.int32 |= 0x10000; // Preserve signaling NaN + } + return __hip_uint16_t(u.int32 >> 16); + } + + // Truncate instead of rounding, preserving SNaN + static __HOST_DEVICE__ __hip_uint16_t truncate_float_to_bfloat16(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + return __hip_uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff)); + } +}; +#pragma clang diagnostic pop + +typedef struct +{ + __hip_uint16_t data; +} hip_bfloat16_public; + +static_assert(__hip_internal::is_standard_layout{}, + "hip_bfloat16 is not a standard layout type, and thus is " + "incompatible with C."); + +static_assert(__hip_internal::is_trivial{}, + "hip_bfloat16 is not a trivial type, and thus is " + "incompatible with C."); +#if !defined(__HIPCC_RTC__) +static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public) + && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data), + "internal hip_bfloat16 does not match public hip_bfloat16"); + +inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16) +{ + return os << float(bf16); +} +#endif + +inline __HOST_DEVICE__ hip_bfloat16 operator+(hip_bfloat16 a) +{ + return a; +} +inline __HOST_DEVICE__ hip_bfloat16 operator-(hip_bfloat16 a) +{ + a.data ^= 0x8000; + return a; +} +inline __HOST_DEVICE__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) + float(b)); +} +inline __HOST_DEVICE__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) - float(b)); +} +inline __HOST_DEVICE__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) * float(b)); +} +inline __HOST_DEVICE__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) / float(b)); +} +inline __HOST_DEVICE__ bool operator<(hip_bfloat16 a, hip_bfloat16 b) +{ + return float(a) < float(b); +} +inline __HOST_DEVICE__ bool operator==(hip_bfloat16 a, hip_bfloat16 b) +{ + return float(a) == float(b); +} +inline __HOST_DEVICE__ bool operator>(hip_bfloat16 a, hip_bfloat16 b) +{ + return b < a; +} +inline __HOST_DEVICE__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a > b); +} +inline __HOST_DEVICE__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a == b); +} +inline __HOST_DEVICE__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a < b); +} +inline __HOST_DEVICE__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a + b; +} +inline __HOST_DEVICE__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a - b; +} +inline __HOST_DEVICE__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a * b; +} +inline __HOST_DEVICE__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a / b; +} +inline __HOST_DEVICE__ hip_bfloat16& operator++(hip_bfloat16& a) +{ + return a += hip_bfloat16(1.0f); +} +inline __HOST_DEVICE__ hip_bfloat16& operator--(hip_bfloat16& a) +{ + return a -= hip_bfloat16(1.0f); +} +inline __HOST_DEVICE__ hip_bfloat16 operator++(hip_bfloat16& a, int) +{ + hip_bfloat16 orig = a; + ++a; + return orig; +} +inline __HOST_DEVICE__ hip_bfloat16 operator--(hip_bfloat16& a, int) +{ + hip_bfloat16 orig = a; + --a; + return orig; +} + +namespace std +{ + constexpr __HOST_DEVICE__ bool isinf(hip_bfloat16 a) + { + return !(~a.data & 0x7f80) && !(a.data & 0x7f); + } + constexpr __HOST_DEVICE__ bool isnan(hip_bfloat16 a) + { + return !(~a.data & 0x7f80) && +(a.data & 0x7f); + } + constexpr __HOST_DEVICE__ bool iszero(hip_bfloat16 a) + { + return !(a.data & 0x7fff); + } +} + +#endif // __cplusplus < 201103L || !defined(__HIPCC__) + +#endif // _HIP_BFLOAT16_H_ diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_common.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_common.h new file mode 100644 index 0000000000..0c7dc51b50 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_common.h @@ -0,0 +1,32 @@ +/* +Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H + +#if defined(__clang__) && defined(__HIP__) +#define __HIP_CLANG_ONLY__ 1 +#else +#define __HIP_CLANG_ONLY__ 0 +#endif + +#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h new file mode 100644 index 0000000000..9d9dfd5e9d --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h @@ -0,0 +1,314 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H + +#include "hip/amd_detail/amd_hip_vector_types.h" + +#if defined(__HIPCC_RTC__) +#define __HOST_DEVICE__ __device__ +#else +#define __HOST_DEVICE__ __host__ __device__ +// TODO: Clang has a bug which allows device functions to call std functions +// when std functions are introduced into default namespace by using statement. +// math.h may be included after this bug is fixed. +#if __cplusplus +#include +#else +#include "math.h" +#endif +#endif // !defined(__HIPCC_RTC__) + +#if __cplusplus +#define COMPLEX_NEG_OP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type operator-(const type& op) { \ + type ret; \ + ret.x = -op.x; \ + ret.y = -op.y; \ + return ret; \ + } + +#define COMPLEX_EQ_OP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline bool operator==(const type& lhs, const type& rhs) { \ + return lhs.x == rhs.x && lhs.y == rhs.y; \ + } + +#define COMPLEX_NE_OP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline bool operator!=(const type& lhs, const type& rhs) { \ + return !(lhs == rhs); \ + } + +#define COMPLEX_ADD_OP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type operator+(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = lhs.x + rhs.x; \ + ret.y = lhs.y + rhs.y; \ + return ret; \ + } + +#define COMPLEX_SUB_OP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type operator-(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = lhs.x - rhs.x; \ + ret.y = lhs.y - rhs.y; \ + return ret; \ + } + +#define COMPLEX_MUL_OP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type operator*(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \ + ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \ + return ret; \ + } + +#define COMPLEX_DIV_OP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type operator/(const type& lhs, const type& rhs) { \ + type ret; \ + ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \ + ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \ + ret.x = ret.x / (rhs.x * rhs.x + rhs.y * rhs.y); \ + ret.y = ret.y / (rhs.x * rhs.x + rhs.y * rhs.y); \ + return ret; \ + } + +#define COMPLEX_ADD_PREOP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type& operator+=(type& lhs, const type& rhs) { \ + lhs.x += rhs.x; \ + lhs.y += rhs.y; \ + return lhs; \ + } + +#define COMPLEX_SUB_PREOP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type& operator-=(type& lhs, const type& rhs) { \ + lhs.x -= rhs.x; \ + lhs.y -= rhs.y; \ + return lhs; \ + } + +#define COMPLEX_MUL_PREOP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type& operator*=(type& lhs, const type& rhs) { \ + type temp{lhs}; \ + lhs.x = rhs.x * temp.x - rhs.y * temp.y; \ + lhs.y = rhs.y * temp.x + rhs.x * temp.y; \ + return lhs; \ + } + +#define COMPLEX_DIV_PREOP_OVERLOAD(type) \ + __HOST_DEVICE__ static inline type& operator/=(type& lhs, const type& rhs) { \ + type temp; \ + temp.x = (lhs.x*rhs.x + lhs.y * rhs.y) / (rhs.x*rhs.x + rhs.y*rhs.y); \ + temp.y = (lhs.y * rhs.x - lhs.x * rhs.y) / (rhs.x*rhs.x + rhs.y*rhs.y); \ + lhs = temp; \ + return lhs; \ + } + +#define COMPLEX_SCALAR_PRODUCT(type, type1) \ + __HOST_DEVICE__ static inline type operator*(const type& lhs, type1 rhs) { \ + type ret; \ + ret.x = lhs.x * rhs; \ + ret.y = lhs.y * rhs; \ + return ret; \ + } + +#endif + +typedef float2 hipFloatComplex; + +__HOST_DEVICE__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } + +__HOST_DEVICE__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } + +__HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { + hipFloatComplex z; + z.x = a; + z.y = b; + return z; +} + +__HOST_DEVICE__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { + hipFloatComplex ret; + ret.x = z.x; + ret.y = -z.y; + return ret; +} + +__HOST_DEVICE__ static inline float hipCsqabsf(hipFloatComplex z) { + return z.x * z.x + z.y * z.y; +} + +__HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x + q.x, p.y + q.y); +} + +__HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x - q.x, p.y - q.y); +} + +__HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +} + +__HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { + float sqabs = hipCsqabsf(q); + hipFloatComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; +} + +__HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } + + +typedef double2 hipDoubleComplex; + +__HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } + +__HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } + +__HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { + hipDoubleComplex z; + z.x = a; + z.y = b; + return z; +} + +__HOST_DEVICE__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { + hipDoubleComplex ret; + ret.x = z.x; + ret.y = -z.y; + return ret; +} + +__HOST_DEVICE__ static inline double hipCsqabs(hipDoubleComplex z) { + return z.x * z.x + z.y * z.y; +} + +__HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x + q.x, p.y + q.y); +} + +__HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x - q.x, p.y - q.y); +} + +__HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +} + +__HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { + double sqabs = hipCsqabs(q); + hipDoubleComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; +} + +__HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); } + + +#if __cplusplus + +COMPLEX_NEG_OP_OVERLOAD(hipFloatComplex) +COMPLEX_EQ_OP_OVERLOAD(hipFloatComplex) +COMPLEX_NE_OP_OVERLOAD(hipFloatComplex) +COMPLEX_ADD_OP_OVERLOAD(hipFloatComplex) +COMPLEX_SUB_OP_OVERLOAD(hipFloatComplex) +COMPLEX_MUL_OP_OVERLOAD(hipFloatComplex) +COMPLEX_DIV_OP_OVERLOAD(hipFloatComplex) +COMPLEX_ADD_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_SUB_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_MUL_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_DIV_PREOP_OVERLOAD(hipFloatComplex) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned short) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed short) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned int) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed int) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, float) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, double) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long long) +COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long long) + +COMPLEX_NEG_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_EQ_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_NE_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_ADD_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_SUB_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_MUL_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_DIV_OP_OVERLOAD(hipDoubleComplex) +COMPLEX_ADD_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_SUB_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_MUL_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_DIV_PREOP_OVERLOAD(hipDoubleComplex) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned short) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed short) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned int) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed int) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, float) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, double) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long long) +COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long) + +#endif + + +typedef hipFloatComplex hipComplex; + +__HOST_DEVICE__ static inline hipComplex make_hipComplex(float x, float y) { + return make_hipFloatComplex(x, y); +} + +__HOST_DEVICE__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { + return make_hipFloatComplex((float)z.x, (float)z.y); +} + +__HOST_DEVICE__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { + return make_hipDoubleComplex((double)z.x, (double)z.y); +} + +__HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { + float real = (p.x * q.x) + r.x; + float imag = (q.x * p.y) + r.y; + + real = -(p.y * q.y) + real; + imag = (p.x * q.y) + imag; + + return make_hipComplex(real, imag); +} + +__HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, + hipDoubleComplex r) { + double real = (p.x * q.x) + r.x; + double imag = (q.x * p.y) + r.y; + + real = -(p.y * q.y) + real; + imag = (p.x * q.y) + imag; + + return make_hipDoubleComplex(real, imag); +} + +#endif //HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h new file mode 100644 index 0000000000..575a9f8ef7 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h @@ -0,0 +1,708 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/hip_cooperative_groups.h + * + * @brief Device side implementation of `Cooperative Group` feature. + * + * Defines new types and device API wrappers related to `Cooperative Group` + * feature, which the programmer can directly use in his kernel(s) in order to + * make use of this feature. + */ +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wpadded" + +#if __cplusplus +#if !defined(__HIPCC_RTC__) +#include +#endif + +#define __hip_abort() \ + { asm("trap;"); } +#if defined(NDEBUG) +#define __hip_assert(COND) +#else +#define __hip_assert(COND) \ + { \ + if (!COND) { \ + __hip_abort(); \ + } \ + } +#endif + +namespace cooperative_groups { + +/** \brief The base type of all cooperative group types + * + * \details Holds the key properties of a constructed cooperative group types + * object, like the group type, its size, etc + */ +class thread_group { + protected: + uint32_t _type; // thread_group type + uint32_t _size; // total number of threads in the tread_group + uint64_t _mask; // Lanemask for coalesced and tiled partitioned group types, + // LSB represents lane 0, and MSB represents lane 63 + + // Construct a thread group, and set thread group type and other essential + // thread group properties. This generic thread group is directly constructed + // only when the group is supposed to contain only the calling the thread + // (throurh the API - `this_thread()`), and in all other cases, this thread + // group object is a sub-object of some other derived thread group object + __CG_QUALIFIER__ thread_group(internal::group_type type, uint32_t size = static_cast(0), + uint64_t mask = static_cast(0)) { + _type = type; + _size = size; + _mask = mask; + } + + struct _tiled_info { + bool is_tiled; + unsigned int size; + }; + + struct _coalesced_info { + lane_mask member_mask; + unsigned int size; + struct _tiled_info tiled_info; + } coalesced_info; + + friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, + unsigned int tile_size); + friend class thread_block; + + public: + // Total number of threads in the thread group, and this serves the purpose + // for all derived cooperative group types since their `size` is directly + // saved during the construction + __CG_QUALIFIER__ uint32_t size() const { return _size; } + __CG_QUALIFIER__ unsigned int cg_type() const { return _type; } + // Rank of the calling thread within [0, size()) + __CG_QUALIFIER__ uint32_t thread_rank() const; + // Is this cooperative group type valid? + __CG_QUALIFIER__ bool is_valid() const; + // synchronize the threads in the thread group + __CG_QUALIFIER__ void sync() const; +}; + +/** \brief The multi-grid cooperative group type + * + * \details Represents an inter-device cooperative group type where the + * participating threads within the group spans across multple + * devices, running the (same) kernel on these devices + */ +class multi_grid_group : public thread_group { + // Only these friend functions are allowed to construct an object of this class + // and access its resources + friend __CG_QUALIFIER__ multi_grid_group this_multi_grid(); + + protected: + // Construct mutli-grid thread group (through the API this_multi_grid()) + explicit __CG_QUALIFIER__ multi_grid_group(uint32_t size) + : thread_group(internal::cg_multi_grid, size) {} + + public: + // Number of invocations participating in this multi-grid group. In other + // words, the number of GPUs + __CG_QUALIFIER__ uint32_t num_grids() { return internal::multi_grid::num_grids(); } + // Rank of this invocation. In other words, an ID number within the range + // [0, num_grids()) of the GPU, this kernel is running on + __CG_QUALIFIER__ uint32_t grid_rank() { return internal::multi_grid::grid_rank(); } + __CG_QUALIFIER__ uint32_t thread_rank() const { return internal::multi_grid::thread_rank(); } + __CG_QUALIFIER__ bool is_valid() const { return internal::multi_grid::is_valid(); } + __CG_QUALIFIER__ void sync() const { internal::multi_grid::sync(); } +}; + +/** \brief User exposed API interface to construct multi-grid cooperative + * group type object - `multi_grid_group` + * + * \details User is not allowed to directly construct an object of type + * `multi_grid_group`. Instead, he should construct it through this + * API function + */ +__CG_QUALIFIER__ multi_grid_group this_multi_grid() { + return multi_grid_group(internal::multi_grid::size()); +} + +/** \brief The grid cooperative group type + * + * \details Represents an inter-workgroup cooperative group type where the + * participating threads within the group spans across multiple + * workgroups running the (same) kernel on the same device + */ +class grid_group : public thread_group { + // Only these friend functions are allowed to construct an object of this class + // and access its resources + friend __CG_QUALIFIER__ grid_group this_grid(); + + protected: + // Construct grid thread group (through the API this_grid()) + explicit __CG_QUALIFIER__ grid_group(uint32_t size) : thread_group(internal::cg_grid, size) {} + + public: + __CG_QUALIFIER__ uint32_t thread_rank() const { return internal::grid::thread_rank(); } + __CG_QUALIFIER__ bool is_valid() const { return internal::grid::is_valid(); } + __CG_QUALIFIER__ void sync() const { internal::grid::sync(); } +}; + +/** \brief User exposed API interface to construct grid cooperative group type + * object - `grid_group` + * + * \details User is not allowed to directly construct an object of type + * `multi_grid_group`. Instead, he should construct it through this + * API function + */ +__CG_QUALIFIER__ grid_group this_grid() { return grid_group(internal::grid::size()); } + +/** \brief The workgroup (thread-block in CUDA terminology) cooperative group + * type + * + * \details Represents an intra-workgroup cooperative group type where the + * participating threads within the group are exactly the same threads + * which are participated in the currently executing `workgroup` + */ +class thread_block : public thread_group { + // Only these friend functions are allowed to construct an object of thi + // class and access its resources + friend __CG_QUALIFIER__ thread_block this_thread_block(); + friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, + unsigned int tile_size); + friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_block& parent, + unsigned int tile_size); + + protected: + // Construct a workgroup thread group (through the API this_thread_block()) + explicit __CG_QUALIFIER__ thread_block(uint32_t size) + : thread_group(internal::cg_workgroup, size) {} + + __CG_QUALIFIER__ thread_group new_tiled_group(unsigned int tile_size) const { + const bool pow2 = ((tile_size & (tile_size - 1)) == 0); + // Invalid tile size, assert + if (!tile_size || (tile_size > __AMDGCN_WAVEFRONT_SIZE) || !pow2) { + __hip_assert(false && "invalid tile size") + } + + thread_group tiledGroup = thread_group(internal::cg_tiled_group, tile_size); + tiledGroup.coalesced_info.tiled_info.size = tile_size; + tiledGroup.coalesced_info.tiled_info.is_tiled = true; + return tiledGroup; + } + + public: + // 3-dimensional block index within the grid + __CG_QUALIFIER__ dim3 group_index() { return internal::workgroup::group_index(); } + // 3-dimensional thread index within the block + __CG_QUALIFIER__ dim3 thread_index() { return internal::workgroup::thread_index(); } + __CG_QUALIFIER__ uint32_t thread_rank() const { return internal::workgroup::thread_rank(); } + __CG_QUALIFIER__ bool is_valid() const { return internal::workgroup::is_valid(); } + __CG_QUALIFIER__ void sync() const { internal::workgroup::sync(); } +}; + +/** \brief User exposed API interface to construct workgroup cooperative + * group type object - `thread_block`. + * + * \details User is not allowed to directly construct an object of type + * `thread_block`. Instead, he should construct it through this API + * function. + */ +__CG_QUALIFIER__ thread_block this_thread_block() { + return thread_block(internal::workgroup::size()); +} + +/** \brief The tiled_group cooperative group type + * + * \details Represents one tiled thread group in a wavefront. + * This group type also supports sub-wave level intrinsics. + */ + +class tiled_group : public thread_group { + private: + friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, + unsigned int tile_size); + friend __CG_QUALIFIER__ tiled_group tiled_partition(const tiled_group& parent, + unsigned int tile_size); + + __CG_QUALIFIER__ tiled_group new_tiled_group(unsigned int tile_size) const { + const bool pow2 = ((tile_size & (tile_size - 1)) == 0); + + if (!tile_size || (tile_size > __AMDGCN_WAVEFRONT_SIZE) || !pow2) { + __hip_assert(false && "invalid tile size") + } + + if (size() <= tile_size) { + return *this; + } + + tiled_group tiledGroup = tiled_group(tile_size); + tiledGroup.coalesced_info.tiled_info.is_tiled = true; + return tiledGroup; + } + + protected: + explicit __CG_QUALIFIER__ tiled_group(unsigned int tileSize) + : thread_group(internal::cg_tiled_group, tileSize) { + coalesced_info.tiled_info.size = tileSize; + coalesced_info.tiled_info.is_tiled = true; + } + + public: + __CG_QUALIFIER__ unsigned int size() const { return (coalesced_info.tiled_info.size); } + + __CG_QUALIFIER__ unsigned int thread_rank() const { + return (internal::workgroup::thread_rank() & (coalesced_info.tiled_info.size - 1)); + } + + __CG_QUALIFIER__ void sync() const { + internal::tiled_group::sync(); + } +}; + +/** \brief The coalesced_group cooperative group type + * + * \details Represents a active thread group in a wavefront. + * This group type also supports sub-wave level intrinsics. + */ +class coalesced_group : public thread_group { + private: + friend __CG_QUALIFIER__ coalesced_group coalesced_threads(); + friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size); + friend __CG_QUALIFIER__ coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tile_size); + + __CG_QUALIFIER__ coalesced_group new_tiled_group(unsigned int tile_size) const { + const bool pow2 = ((tile_size & (tile_size - 1)) == 0); + + if (!tile_size || (tile_size > size()) || !pow2) { + return coalesced_group(0); + } + + // If a tiled group is passed to be partitioned further into a coalesced_group. + // prepare a mask for further partitioning it so that it stays coalesced. + if (coalesced_info.tiled_info.is_tiled) { + unsigned int base_offset = (thread_rank() & (~(tile_size - 1))); + unsigned int masklength = min(static_cast(size()) - base_offset, tile_size); + lane_mask member_mask = static_cast(-1) >> (__AMDGCN_WAVEFRONT_SIZE - masklength); + + member_mask <<= (__lane_id() & ~(tile_size - 1)); + coalesced_group coalesced_tile = coalesced_group(member_mask); + coalesced_tile.coalesced_info.tiled_info.is_tiled = true; + return coalesced_tile; + } + // Here the parent coalesced_group is not partitioned. + else { + lane_mask member_mask = 0; + unsigned int tile_rank = 0; + int lanes_to_skip = ((thread_rank()) / tile_size) * tile_size; + + for (unsigned int i = 0; i < __AMDGCN_WAVEFRONT_SIZE; i++) { + lane_mask active = coalesced_info.member_mask & (1 << i); + // Make sure the lane is active + if (active) { + if (lanes_to_skip <= 0 && tile_rank < tile_size) { + // Prepare a member_mask that is appropriate for a tile + member_mask |= active; + tile_rank++; + } + lanes_to_skip--; + } + } + coalesced_group coalesced_tile = coalesced_group(member_mask); + return coalesced_tile; + } + return coalesced_group(0); + } + + protected: + // Constructor + explicit __CG_QUALIFIER__ coalesced_group(lane_mask member_mask) + : thread_group(internal::cg_coalesced_group) { + coalesced_info.member_mask = member_mask; // Which threads are active + coalesced_info.size = __popcll(coalesced_info.member_mask); // How many threads are active + coalesced_info.tiled_info.is_tiled = false; // Not a partitioned group + } + + public: + __CG_QUALIFIER__ unsigned int size() const { + return coalesced_info.size; + } + + __CG_QUALIFIER__ unsigned int thread_rank() const { + return internal::coalesced_group::masked_bit_count(coalesced_info.member_mask); + } + + __CG_QUALIFIER__ void sync() const { + internal::coalesced_group::sync(); + } + + template + __CG_QUALIFIER__ T shfl(T var, int srcRank) const { + static_assert(is_valid_type::value, "Neither an integer or float type."); + + srcRank = srcRank % static_cast(size()); + + int lane = (size() == __AMDGCN_WAVEFRONT_SIZE) ? srcRank + : (__AMDGCN_WAVEFRONT_SIZE == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1)) + : __fns32(coalesced_info.member_mask, 0, (srcRank + 1)); + + return __shfl(var, lane, __AMDGCN_WAVEFRONT_SIZE); + } + + template + __CG_QUALIFIER__ T shfl_down(T var, unsigned int lane_delta) const { + static_assert(is_valid_type::value, "Neither an integer or float type."); + + // Note: The cuda implementation appears to use the remainder of lane_delta + // and WARP_SIZE as the shift value rather than lane_delta itself. + // This is not described in the documentation and is not done here. + + if (size() == __AMDGCN_WAVEFRONT_SIZE) { + return __shfl_down(var, lane_delta, __AMDGCN_WAVEFRONT_SIZE); + } + + int lane; + if (__AMDGCN_WAVEFRONT_SIZE == 64) { + lane = __fns64(coalesced_info.member_mask, __lane_id(), lane_delta + 1); + } + else { + lane = __fns32(coalesced_info.member_mask, __lane_id(), lane_delta + 1); + } + + if (lane == -1) { + lane = __lane_id(); + } + + return __shfl(var, lane, __AMDGCN_WAVEFRONT_SIZE); + } + + template + __CG_QUALIFIER__ T shfl_up(T var, unsigned int lane_delta) const { + static_assert(is_valid_type::value, "Neither an integer or float type."); + + // Note: The cuda implementation appears to use the remainder of lane_delta + // and WARP_SIZE as the shift value rather than lane_delta itself. + // This is not described in the documentation and is not done here. + + if (size() == __AMDGCN_WAVEFRONT_SIZE) { + return __shfl_up(var, lane_delta, __AMDGCN_WAVEFRONT_SIZE); + } + + int lane; + if (__AMDGCN_WAVEFRONT_SIZE == 64) { + lane = __fns64(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1)); + } + else if (__AMDGCN_WAVEFRONT_SIZE == 32) { + lane = __fns32(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1)); + } + + if (lane == -1) { + lane = __lane_id(); + } + + return __shfl(var, lane, __AMDGCN_WAVEFRONT_SIZE); + } +}; + +/** \brief User exposed API to create coalesced groups. + * + * \details A collective operation that groups all active lanes into a new thread group. + */ + +__CG_QUALIFIER__ coalesced_group coalesced_threads() { + return cooperative_groups::coalesced_group(__builtin_amdgcn_read_exec()); +} + +/** + * Implemenation of all publicly exposed base class APIs + */ +__CG_QUALIFIER__ uint32_t thread_group::thread_rank() const { + switch (this->_type) { + case internal::cg_multi_grid: { + return (static_cast(this)->thread_rank()); + } + case internal::cg_grid: { + return (static_cast(this)->thread_rank()); + } + case internal::cg_workgroup: { + return (static_cast(this)->thread_rank()); + } + case internal::cg_tiled_group: { + return (static_cast(this)->thread_rank()); + } + case internal::cg_coalesced_group: { + return (static_cast(this)->thread_rank()); + } + default: { + __hip_assert(false && "invalid cooperative group type") + return -1; + } + } +} + +__CG_QUALIFIER__ bool thread_group::is_valid() const { + switch (this->_type) { + case internal::cg_multi_grid: { + return (static_cast(this)->is_valid()); + } + case internal::cg_grid: { + return (static_cast(this)->is_valid()); + } + case internal::cg_workgroup: { + return (static_cast(this)->is_valid()); + } + case internal::cg_tiled_group: { + return (static_cast(this)->is_valid()); + } + case internal::cg_coalesced_group: { + return (static_cast(this)->is_valid()); + } + default: { + __hip_assert(false && "invalid cooperative group type") + return false; + } + } +} + +__CG_QUALIFIER__ void thread_group::sync() const { + switch (this->_type) { + case internal::cg_multi_grid: { + static_cast(this)->sync(); + break; + } + case internal::cg_grid: { + static_cast(this)->sync(); + break; + } + case internal::cg_workgroup: { + static_cast(this)->sync(); + break; + } + case internal::cg_tiled_group: { + static_cast(this)->sync(); + break; + } + case internal::cg_coalesced_group: { + static_cast(this)->sync(); + break; + } + default: { + __hip_assert(false && "invalid cooperative group type") + } + } +} + +/** + * Implemenation of publicly exposed `wrapper` APIs on top of basic cooperative + * group type APIs + */ +template __CG_QUALIFIER__ uint32_t group_size(CGTy const& g) { return g.size(); } + +template __CG_QUALIFIER__ uint32_t thread_rank(CGTy const& g) { + return g.thread_rank(); +} + +template __CG_QUALIFIER__ bool is_valid(CGTy const& g) { return g.is_valid(); } + +template __CG_QUALIFIER__ void sync(CGTy const& g) { g.sync(); } + +template class tile_base { + protected: + _CG_STATIC_CONST_DECL_ unsigned int numThreads = tileSize; + + public: + // Rank of the thread within this tile + _CG_STATIC_CONST_DECL_ unsigned int thread_rank() { + return (internal::workgroup::thread_rank() & (numThreads - 1)); + } + + // Number of threads within this tile + __CG_STATIC_QUALIFIER__ unsigned int size() { return numThreads; } +}; + +template class thread_block_tile_base : public tile_base { + static_assert(is_valid_tile_size::value, + "Tile size is either not a power of 2 or greater than the wavefront size"); + using tile_base::numThreads; + + public: + __CG_STATIC_QUALIFIER__ void sync() { + internal::tiled_group::sync(); + } + + template __CG_QUALIFIER__ T shfl(T var, int srcRank) const { + static_assert(is_valid_type::value, "Neither an integer or float type."); + return (__shfl(var, srcRank, numThreads)); + } + + template __CG_QUALIFIER__ T shfl_down(T var, unsigned int lane_delta) const { + static_assert(is_valid_type::value, "Neither an integer or float type."); + return (__shfl_down(var, lane_delta, numThreads)); + } + + template __CG_QUALIFIER__ T shfl_up(T var, unsigned int lane_delta) const { + static_assert(is_valid_type::value, "Neither an integer or float type."); + return (__shfl_up(var, lane_delta, numThreads)); + } + + template __CG_QUALIFIER__ T shfl_xor(T var, unsigned int laneMask) const { + static_assert(is_valid_type::value, "Neither an integer or float type."); + return (__shfl_xor(var, laneMask, numThreads)); + } +}; + +/** \brief Group type - thread_block_tile + * + * \details Represents one tile of thread group. + */ + +template +class thread_block_tile_type : public thread_block_tile_base, public tiled_group { + _CG_STATIC_CONST_DECL_ unsigned int numThreads = tileSize; + + friend class thread_block_tile_type; + + typedef thread_block_tile_base tbtBase; + + protected: + __CG_QUALIFIER__ thread_block_tile_type() : tiled_group(numThreads) { + coalesced_info.tiled_info.size = numThreads; + coalesced_info.tiled_info.is_tiled = true; + } + + public: + using tbtBase::size; + using tbtBase::sync; + using tbtBase::thread_rank; +}; + + +/** \brief User exposed API to partition groups. + * + * \details A collective operation that partitions the parent group into a one-dimensional, + * row-major, tiling of subgroups. + */ + +__CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size) { + if (parent.cg_type() == internal::cg_tiled_group) { + const tiled_group* cg = static_cast(&parent); + return cg->new_tiled_group(tile_size); + } + else if(parent.cg_type() == internal::cg_coalesced_group) { + const coalesced_group* cg = static_cast(&parent); + return cg->new_tiled_group(tile_size); + } + else { + const thread_block* tb = static_cast(&parent); + return tb->new_tiled_group(tile_size); + } +} + +// Thread block type overload +__CG_QUALIFIER__ thread_group tiled_partition(const thread_block& parent, unsigned int tile_size) { + return (parent.new_tiled_group(tile_size)); +} + +__CG_QUALIFIER__ tiled_group tiled_partition(const tiled_group& parent, unsigned int tile_size) { + return (parent.new_tiled_group(tile_size)); +} + +// If a coalesced group is passed to be partitioned, it should remain coalesced +__CG_QUALIFIER__ coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tile_size) { + return (parent.new_tiled_group(tile_size)); +} + +template class thread_block_tile; + +namespace impl { +template class thread_block_tile_internal; + +template +class thread_block_tile_internal : public thread_block_tile_type { + protected: + template + __CG_QUALIFIER__ thread_block_tile_internal( + const thread_block_tile_internal& g) + : thread_block_tile_type() {} + + __CG_QUALIFIER__ thread_block_tile_internal(const thread_block& g) + : thread_block_tile_type() {} +}; +} // namespace impl + +template +class thread_block_tile : public impl::thread_block_tile_internal { + protected: + __CG_QUALIFIER__ thread_block_tile(const ParentCGTy& g) + : impl::thread_block_tile_internal(g) {} + + public: + __CG_QUALIFIER__ operator thread_block_tile() const { + return thread_block_tile(*this); + } +}; + + +template +class thread_block_tile : public impl::thread_block_tile_internal { + template friend class thread_block_tile; + + protected: + public: + template + __CG_QUALIFIER__ thread_block_tile(const thread_block_tile& g) + : impl::thread_block_tile_internal(g) {} +}; + +template class thread_block_tile; + +namespace impl { +template struct tiled_partition_internal; + +template +struct tiled_partition_internal : public thread_block_tile { + __CG_QUALIFIER__ tiled_partition_internal(const thread_block& g) + : thread_block_tile(g) {} +}; + +} // namespace impl + +/** \brief User exposed API to partition groups. + * + * \details This constructs a templated class derieved from thread_group. + * The template defines tile size of the new thread group at compile time. + */ +template +__CG_QUALIFIER__ thread_block_tile tiled_partition(const ParentCGTy& g) { + static_assert(is_valid_tile_size::value, + "Tiled partition with size > wavefront size. Currently not supported "); + return impl::tiled_partition_internal(g); +} +} // namespace cooperative_groups +#pragma clang diagnostic pop +#endif // __cplusplus +#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h new file mode 100644 index 0000000000..edac461eb0 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_fp16.h @@ -0,0 +1,1778 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H + +#if defined(__HIPCC_RTC__) + #define __HOST_DEVICE__ __device__ +#else + #define __HOST_DEVICE__ __host__ __device__ + #include + #include "hip/amd_detail/host_defines.h" + #include + #if defined(__cplusplus) + #include + #include + #include +#endif +#endif // !defined(__HIPCC_RTC__) + +#if defined(__clang__) && defined(__HIP__) + typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2))); + + struct __half_raw { + union { + static_assert(sizeof(_Float16) == sizeof(unsigned short), ""); + + _Float16 data; + unsigned short x; + }; + }; + + struct __half2_raw { + union { + static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), ""); + + _Float16_2 data; + struct { + unsigned short x; + unsigned short y; + }; + }; + }; + + #if defined(__cplusplus) + #if !defined(__HIPCC_RTC__) + #include "hip_fp16_math_fwd.h" + #include "amd_hip_vector_types.h" + #include "host_defines.h" + #include "amd_device_functions.h" + #include "amd_warp_functions.h" + #endif + namespace std + { + template<> struct is_floating_point<_Float16> : std::true_type {}; + } + + template + using Enable_if_t = typename std::enable_if::type; + + // BEGIN STRUCT __HALF + struct __half { + protected: + union { + static_assert(sizeof(_Float16) == sizeof(unsigned short), ""); + + _Float16 data; + unsigned short __x; + }; + public: + // CREATORS + __HOST_DEVICE__ + __half() = default; + __HOST_DEVICE__ + __half(const __half_raw& x) : data{x.data} {} + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + __HOST_DEVICE__ + __half(decltype(data) x) : data{x} {} + template< + typename T, + Enable_if_t{}>* = nullptr> + __HOST_DEVICE__ + __half(T x) : data{static_cast<_Float16>(x)} {} + #endif + __HOST_DEVICE__ + __half(const __half&) = default; + __HOST_DEVICE__ + __half(__half&&) = default; + __HOST_DEVICE__ + ~__half() = default; + + // CREATORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, Enable_if_t{}>* = nullptr> + __HOST_DEVICE__ + __half(T x) : data{static_cast<_Float16>(x)} {} + #endif + + // MANIPULATORS + __HOST_DEVICE__ + __half& operator=(const __half&) = default; + __HOST_DEVICE__ + __half& operator=(__half&&) = default; + __HOST_DEVICE__ + __half& operator=(const __half_raw& x) + { + data = x.data; + return *this; + } + __HOST_DEVICE__ + volatile __half& operator=(const __half_raw& x) volatile + { + data = x.data; + return *this; + } + volatile __half& operator=(const volatile __half_raw& x) volatile + { + data = x.data; + return *this; + } + __half& operator=(__half_raw&& x) + { + data = x.data; + return *this; + } + volatile __half& operator=(__half_raw&& x) volatile + { + data = x.data; + return *this; + } + volatile __half& operator=(volatile __half_raw&& x) volatile + { + data = x.data; + return *this; + } + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, + Enable_if_t{}>* = nullptr> + __HOST_DEVICE__ + __half& operator=(T x) + { + data = static_cast<_Float16>(x); + return *this; + } + #endif + + // MANIPULATORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, Enable_if_t{}>* = nullptr> + __device__ + __half& operator=(T x) + { + data = static_cast<_Float16>(x); + return *this; + } + #endif + + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half& operator+=(const __half& x) + { + data += x.data; + return *this; + } + __device__ + __half& operator-=(const __half& x) + { + data -= x.data; + return *this; + } + __device__ + __half& operator*=(const __half& x) + { + data *= x.data; + return *this; + } + __device__ + __half& operator/=(const __half& x) + { + data /= x.data; + return *this; + } + __device__ + __half& operator++() { ++data; return *this; } + __device__ + __half operator++(int) + { + __half tmp{*this}; + ++*this; + return tmp; + } + __device__ + __half& operator--() { --data; return *this; } + __device__ + __half operator--(int) + { + __half tmp{*this}; + --*this; + return tmp; + } + #endif + + // ACCESSORS + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, + Enable_if_t{}>* = nullptr> + __HOST_DEVICE__ + operator T() const { return data; } + #endif + __HOST_DEVICE__ + operator __half_raw() const { return __half_raw{data}; } + __HOST_DEVICE__ + operator __half_raw() const volatile + { + return __half_raw{data}; + } + + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + template< + typename T, Enable_if_t{}>* = nullptr> + __HOST_DEVICE__ + operator T() const { return data; } + #endif + + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half operator+() const { return *this; } + __device__ + __half operator-() const + { + __half tmp{*this}; + tmp.data = -tmp.data; + return tmp; + } + #endif + + // FRIENDS + #if !defined(__HIP_NO_HALF_OPERATORS__) + friend + inline + __device__ + __half operator+(const __half& x, const __half& y) + { + return __half{x} += y; + } + friend + inline + __device__ + __half operator-(const __half& x, const __half& y) + { + return __half{x} -= y; + } + friend + inline + __device__ + __half operator*(const __half& x, const __half& y) + { + return __half{x} *= y; + } + friend + inline + __device__ + __half operator/(const __half& x, const __half& y) + { + return __half{x} /= y; + } + friend + inline + __device__ + bool operator==(const __half& x, const __half& y) + { + return x.data == y.data; + } + friend + inline + __device__ + bool operator!=(const __half& x, const __half& y) + { + return !(x == y); + } + friend + inline + __device__ + bool operator<(const __half& x, const __half& y) + { + return x.data < y.data; + } + friend + inline + __device__ + bool operator>(const __half& x, const __half& y) + { + return y.data < x.data; + } + friend + inline + __device__ + bool operator<=(const __half& x, const __half& y) + { + return !(y < x); + } + friend + inline + __device__ + bool operator>=(const __half& x, const __half& y) + { + return !(x < y); + } + #endif // !defined(__HIP_NO_HALF_OPERATORS__) + }; + // END STRUCT __HALF + + // BEGIN STRUCT __HALF2 + struct __half2 { + public: + union { + static_assert( + sizeof(_Float16_2) == sizeof(unsigned short[2]), ""); + + _Float16_2 data; + struct { + unsigned short x; + unsigned short y; + }; + }; + + // CREATORS + __HOST_DEVICE__ + __half2() = default; + __HOST_DEVICE__ + __half2(const __half2_raw& x) : data{x.data} {} + __HOST_DEVICE__ + __half2(decltype(data) x) : data{x} {} + __HOST_DEVICE__ + __half2(const __half& x, const __half& y) + : + data{ + static_cast<__half_raw>(x).data, + static_cast<__half_raw>(y).data} + {} + __HOST_DEVICE__ + __half2(const __half2&) = default; + __HOST_DEVICE__ + __half2(__half2&&) = default; + __HOST_DEVICE__ + ~__half2() = default; + + // MANIPULATORS + __HOST_DEVICE__ + __half2& operator=(const __half2&) = default; + __HOST_DEVICE__ + __half2& operator=(__half2&&) = default; + __HOST_DEVICE__ + __half2& operator=(const __half2_raw& x) + { + data = x.data; + return *this; + } + + // MANIPULATORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half2& operator+=(const __half2& x) + { + data += x.data; + return *this; + } + __device__ + __half2& operator-=(const __half2& x) + { + data -= x.data; + return *this; + } + __device__ + __half2& operator*=(const __half2& x) + { + data *= x.data; + return *this; + } + __device__ + __half2& operator/=(const __half2& x) + { + data /= x.data; + return *this; + } + __device__ + __half2& operator++() { return *this += _Float16_2{1, 1}; } + __device__ + __half2 operator++(int) + { + __half2 tmp{*this}; + ++*this; + return tmp; + } + __device__ + __half2& operator--() { return *this -= _Float16_2{1, 1}; } + __device__ + __half2 operator--(int) + { + __half2 tmp{*this}; + --*this; + return tmp; + } + #endif + + // ACCESSORS + __HOST_DEVICE__ + operator decltype(data)() const { return data; } + __HOST_DEVICE__ + operator __half2_raw() const { return __half2_raw{data}; } + + // ACCESSORS - DEVICE ONLY + #if !defined(__HIP_NO_HALF_OPERATORS__) + __device__ + __half2 operator+() const { return *this; } + __device__ + __half2 operator-() const + { + __half2 tmp{*this}; + tmp.data = -tmp.data; + return tmp; + } + #endif + + // FRIENDS + #if !defined(__HIP_NO_HALF_OPERATORS__) + friend + inline + __device__ + __half2 operator+(const __half2& x, const __half2& y) + { + return __half2{x} += y; + } + friend + inline + __device__ + __half2 operator-(const __half2& x, const __half2& y) + { + return __half2{x} -= y; + } + friend + inline + __device__ + __half2 operator*(const __half2& x, const __half2& y) + { + return __half2{x} *= y; + } + friend + inline + __device__ + __half2 operator/(const __half2& x, const __half2& y) + { + return __half2{x} /= y; + } + friend + inline + __device__ + bool operator==(const __half2& x, const __half2& y) + { + auto r = x.data == y.data; + return r.x != 0 && r.y != 0; + } + friend + inline + __device__ + bool operator!=(const __half2& x, const __half2& y) + { + return !(x == y); + } + friend + inline + __device__ + bool operator<(const __half2& x, const __half2& y) + { + auto r = x.data < y.data; + return r.x != 0 && r.y != 0; + } + friend + inline + __device__ + bool operator>(const __half2& x, const __half2& y) + { + return y < x; + } + friend + inline + __device__ + bool operator<=(const __half2& x, const __half2& y) + { + return !(y < x); + } + friend + inline + __device__ + bool operator>=(const __half2& x, const __half2& y) + { + return !(x < y); + } + #endif // !defined(__HIP_NO_HALF_OPERATORS__) + }; + // END STRUCT __HALF2 + + namespace + { + inline + __HOST_DEVICE__ + __half2 make_half2(__half x, __half y) + { + return __half2{x, y}; + } + + inline + __HOST_DEVICE__ + __half __low2half(__half2 x) + { + return __half{__half_raw{static_cast<__half2_raw>(x).data.x}}; + } + + inline + __HOST_DEVICE__ + __half __high2half(__half2 x) + { + return __half{__half_raw{static_cast<__half2_raw>(x).data.y}}; + } + + inline + __HOST_DEVICE__ + __half2 __half2half2(__half x) + { + return __half2{x, x}; + } + + inline + __HOST_DEVICE__ + __half2 __halves2half2(__half x, __half y) + { + return __half2{x, y}; + } + + inline + __HOST_DEVICE__ + __half2 __low2half2(__half2 x) + { + return __half2{ + _Float16_2{ + static_cast<__half2_raw>(x).data.x, + static_cast<__half2_raw>(x).data.x}}; + } + + inline + __HOST_DEVICE__ + __half2 __high2half2(__half2 x) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.y, + static_cast<__half2_raw>(x).data.y}}; + } + + inline + __HOST_DEVICE__ + __half2 __lows2half2(__half2 x, __half2 y) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.x, + static_cast<__half2_raw>(y).data.x}}; + } + + inline + __HOST_DEVICE__ + __half2 __highs2half2(__half2 x, __half2 y) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.y, + static_cast<__half2_raw>(y).data.y}}; + } + + inline + __HOST_DEVICE__ + __half2 __lowhigh2highlow(__half2 x) + { + return __half2_raw{ + _Float16_2{ + static_cast<__half2_raw>(x).data.y, + static_cast<__half2_raw>(x).data.x}}; + } + + // Bitcasts + inline + __device__ + short __half_as_short(__half x) + { + return static_cast<__half_raw>(x).x; + } + + inline + __device__ + unsigned short __half_as_ushort(__half x) + { + return static_cast<__half_raw>(x).x; + } + + inline + __device__ + __half __short_as_half(short x) + { + __half_raw r; r.x = x; + return r; + } + + inline + __device__ + __half __ushort_as_half(unsigned short x) + { + __half_raw r; r.x = x; + return r; + } + + // float -> half | half2 + inline + __HOST_DEVICE__ + __half __float2half(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __HOST_DEVICE__ + __half __float2half_rn(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + #if !defined(__HIPCC_RTC__) + // TODO: rounding behaviour is not correct for host functions. + inline + __host__ + __half __float2half_rz(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __host__ + __half __float2half_rd(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __host__ + __half __float2half_ru(float x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + #endif + inline + __device__ + __half __float2half_rz(float x) + { + return __half_raw{__ocml_cvtrtz_f16_f32(x)}; + } + inline + __device__ + __half __float2half_rd(float x) + { + return __half_raw{__ocml_cvtrtn_f16_f32(x)}; + } + inline + __device__ + __half __float2half_ru(float x) + { + return __half_raw{__ocml_cvtrtp_f16_f32(x)}; + } + inline + __HOST_DEVICE__ + __half2 __float2half2_rn(float x) + { + return __half2_raw{ + _Float16_2{ + static_cast<_Float16>(x), static_cast<_Float16>(x)}}; + } + inline + __HOST_DEVICE__ + __half2 __floats2half2_rn(float x, float y) + { + return __half2_raw{_Float16_2{ + static_cast<_Float16>(x), static_cast<_Float16>(y)}}; + } + inline + __HOST_DEVICE__ + __half2 __float22half2_rn(float2 x) + { + return __floats2half2_rn(x.x, x.y); + } + + // half | half2 -> float + inline + __HOST_DEVICE__ + float __half2float(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __HOST_DEVICE__ + float __low2float(__half2 x) + { + return static_cast<__half2_raw>(x).data.x; + } + inline + __HOST_DEVICE__ + float __high2float(__half2 x) + { + return static_cast<__half2_raw>(x).data.y; + } + inline + __HOST_DEVICE__ + float2 __half22float2(__half2 x) + { + return make_float2( + static_cast<__half2_raw>(x).data.x, + static_cast<__half2_raw>(x).data.y); + } + + // half -> int + inline + __device__ + int __half2int_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + int __half2int_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + int __half2int_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + int __half2int_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // int -> half + inline + __device__ + __half __int2half_rn(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __int2half_rz(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __int2half_rd(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __int2half_ru(int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> short + inline + __device__ + short __half2short_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + short __half2short_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + short __half2short_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + short __half2short_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // short -> half + inline + __device__ + __half __short2half_rn(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __short2half_rz(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __short2half_rd(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __short2half_ru(short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> long long + inline + __device__ + long long __half2ll_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + long long __half2ll_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + long long __half2ll_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + long long __half2ll_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // long long -> half + inline + __device__ + __half __ll2half_rn(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ll2half_rz(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ll2half_rd(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ll2half_ru(long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> unsigned int + inline + __device__ + unsigned int __half2uint_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned int __half2uint_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned int __half2uint_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned int __half2uint_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // unsigned int -> half + inline + __device__ + __half __uint2half_rn(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __uint2half_rz(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __uint2half_rd(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __uint2half_ru(unsigned int x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> unsigned short + inline + __device__ + unsigned short __half2ushort_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned short __half2ushort_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned short __half2ushort_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned short __half2ushort_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // unsigned short -> half + inline + __device__ + __half __ushort2half_rn(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ushort2half_rz(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ushort2half_rd(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ushort2half_ru(unsigned short x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // half -> unsigned long long + inline + __device__ + unsigned long long __half2ull_rn(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned long long __half2ull_rz(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned long long __half2ull_rd(__half x) + { + return static_cast<__half_raw>(x).data; + } + inline + __device__ + unsigned long long __half2ull_ru(__half x) + { + return static_cast<__half_raw>(x).data; + } + + // unsigned long long -> half + inline + __device__ + __half __ull2half_rn(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ull2half_rz(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ull2half_rd(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + inline + __device__ + __half __ull2half_ru(unsigned long long x) + { + return __half_raw{static_cast<_Float16>(x)}; + } + + // Load primitives + inline + __device__ + __half __ldg(const __half* ptr) { return *ptr; } + inline + __device__ + __half __ldcg(const __half* ptr) { return *ptr; } + inline + __device__ + __half __ldca(const __half* ptr) { return *ptr; } + inline + __device__ + __half __ldcs(const __half* ptr) { return *ptr; } + + inline + __HOST_DEVICE__ + __half2 __ldg(const __half2* ptr) { return *ptr; } + inline + __HOST_DEVICE__ + __half2 __ldcg(const __half2* ptr) { return *ptr; } + inline + __HOST_DEVICE__ + __half2 __ldca(const __half2* ptr) { return *ptr; } + inline + __HOST_DEVICE__ + __half2 __ldcs(const __half2* ptr) { return *ptr; } + + // Relations + inline + __device__ + bool __heq(__half x, __half y) + { + return static_cast<__half_raw>(x).data == + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hne(__half x, __half y) + { + return static_cast<__half_raw>(x).data != + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hle(__half x, __half y) + { + return static_cast<__half_raw>(x).data <= + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hge(__half x, __half y) + { + return static_cast<__half_raw>(x).data >= + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hlt(__half x, __half y) + { + return static_cast<__half_raw>(x).data < + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hgt(__half x, __half y) + { + return static_cast<__half_raw>(x).data > + static_cast<__half_raw>(y).data; + } + inline + __device__ + bool __hequ(__half x, __half y) { return __heq(x, y); } + inline + __device__ + bool __hneu(__half x, __half y) { return __hne(x, y); } + inline + __device__ + bool __hleu(__half x, __half y) { return __hle(x, y); } + inline + __device__ + bool __hgeu(__half x, __half y) { return __hge(x, y); } + inline + __device__ + bool __hltu(__half x, __half y) { return __hlt(x, y); } + inline + __device__ + bool __hgtu(__half x, __half y) { return __hgt(x, y); } + + inline + __HOST_DEVICE__ + __half2 __heq2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data == + static_cast<__half2_raw>(y).data; + return __builtin_convertvector(-r, _Float16_2); + } + inline + __HOST_DEVICE__ + __half2 __hne2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data != + static_cast<__half2_raw>(y).data; + return __builtin_convertvector(-r, _Float16_2); + } + inline + __HOST_DEVICE__ + __half2 __hle2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data <= + static_cast<__half2_raw>(y).data; + return __builtin_convertvector(-r, _Float16_2); + } + inline + __HOST_DEVICE__ + __half2 __hge2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data >= + static_cast<__half2_raw>(y).data; + return __builtin_convertvector(-r, _Float16_2); + } + inline + __HOST_DEVICE__ + __half2 __hlt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data < + static_cast<__half2_raw>(y).data; + return __builtin_convertvector(-r, _Float16_2); + } + inline + __HOST_DEVICE__ + __half2 __hgt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(x).data > + static_cast<__half2_raw>(y).data; + return __builtin_convertvector(-r, _Float16_2); + } + inline + __HOST_DEVICE__ + __half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); } + inline + __HOST_DEVICE__ + __half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); } + inline + __HOST_DEVICE__ + __half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); } + inline + __HOST_DEVICE__ + __half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); } + inline + __HOST_DEVICE__ + __half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); } + inline + __HOST_DEVICE__ + __half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); } + + inline + __HOST_DEVICE__ + bool __hbeq2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__heq2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __HOST_DEVICE__ + bool __hbne2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hne2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __HOST_DEVICE__ + bool __hble2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hle2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __HOST_DEVICE__ + bool __hbge2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hge2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __HOST_DEVICE__ + bool __hblt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hlt2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __HOST_DEVICE__ + bool __hbgt2(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hgt2(x, y)); + return r.data.x != 0 && r.data.y != 0; + } + inline + __HOST_DEVICE__ + bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); } + inline + __HOST_DEVICE__ + bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); } + inline + __HOST_DEVICE__ + bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); } + inline + __HOST_DEVICE__ + bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); } + inline + __HOST_DEVICE__ + bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); } + inline + __HOST_DEVICE__ + bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); } + inline + __device__ + __half __hmax(const __half x, const __half y) { + return __half_raw{__ocml_fmax_f16(static_cast<__half_raw>(x).data, + static_cast<__half_raw>(y).data)}; + } + inline + __device__ + __half __hmax_nan(const __half x, const __half y) { + if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) { + return x; + } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) { + return y; + } + return __hmax(x, y); + } + inline + __device__ + __half __hmin(const __half x, const __half y) { + return __half_raw{__ocml_fmin_f16(static_cast<__half_raw>(x).data, + static_cast<__half_raw>(y).data)}; + } + inline + __device__ + __half __hmin_nan(const __half x, const __half y) { + if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) { + return x; + } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) { + return y; + } + return __hmin(x, y); + } + + // Arithmetic + inline + __device__ + __half __clamp_01(__half x) + { + auto r = static_cast<__half_raw>(x); + + if (__hlt(x, __half_raw{0})) return __half_raw{0}; + if (__hlt(__half_raw{1}, x)) return __half_raw{1}; + return r; + } + + inline + __device__ + __half __hadd(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data + + static_cast<__half_raw>(y).data}; + } + inline + __device__ + __half __habs(__half x) + { + return __half_raw{ + __ocml_fabs_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half __hsub(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data - + static_cast<__half_raw>(y).data}; + } + inline + __device__ + __half __hmul(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data * + static_cast<__half_raw>(y).data}; + } + inline + __device__ + __half __hadd_sat(__half x, __half y) + { + return __clamp_01(__hadd(x, y)); + } + inline + __device__ + __half __hsub_sat(__half x, __half y) + { + return __clamp_01(__hsub(x, y)); + } + inline + __device__ + __half __hmul_sat(__half x, __half y) + { + return __clamp_01(__hmul(x, y)); + } + inline + __device__ + __half __hfma(__half x, __half y, __half z) + { + return __half_raw{__ocml_fma_f16( + static_cast<__half_raw>(x).data, + static_cast<__half_raw>(y).data, + static_cast<__half_raw>(z).data)}; + } + inline + __device__ + __half __hfma_sat(__half x, __half y, __half z) + { + return __clamp_01(__hfma(x, y, z)); + } + inline + __device__ + __half __hdiv(__half x, __half y) + { + return __half_raw{ + static_cast<__half_raw>(x).data / + static_cast<__half_raw>(y).data}; + } + + inline + __HOST_DEVICE__ + __half2 __hadd2(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data + + static_cast<__half2_raw>(y).data}; + } + inline + __HOST_DEVICE__ + __half2 __habs2(__half2 x) + { + return __half2_raw{ + __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)}; + } + inline + __HOST_DEVICE__ + __half2 __hsub2(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data - + static_cast<__half2_raw>(y).data}; + } + inline + __HOST_DEVICE__ + __half2 __hmul2(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data * + static_cast<__half2_raw>(y).data}; + } + inline + __HOST_DEVICE__ + __half2 __hadd2_sat(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hadd2(x, y)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __HOST_DEVICE__ + __half2 __hsub2_sat(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hsub2(x, y)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __HOST_DEVICE__ + __half2 __hmul2_sat(__half2 x, __half2 y) + { + auto r = static_cast<__half2_raw>(__hmul2(x, y)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __HOST_DEVICE__ + __half2 __hfma2(__half2 x, __half2 y, __half2 z) + { + return __half2_raw{__ocml_fma_2f16(x, y, z)}; + } + inline + __HOST_DEVICE__ + __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z) + { + auto r = static_cast<__half2_raw>(__hfma2(x, y, z)); + return __half2{ + __clamp_01(__half_raw{r.data.x}), + __clamp_01(__half_raw{r.data.y})}; + } + inline + __HOST_DEVICE__ + __half2 __h2div(__half2 x, __half2 y) + { + return __half2_raw{ + static_cast<__half2_raw>(x).data / + static_cast<__half2_raw>(y).data}; + } + + // Math functions + #if defined(__clang__) && defined(__HIP__) + inline + __device__ + float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) { + return __ockl_fdot2(static_cast<__half2_raw>(a).data, + static_cast<__half2_raw>(b).data, + c, saturate); + } + #endif + inline + __device__ + __half htrunc(__half x) + { + return __half_raw{ + __ocml_trunc_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hceil(__half x) + { + return __half_raw{ + __ocml_ceil_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hfloor(__half x) + { + return __half_raw{ + __ocml_floor_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hrint(__half x) + { + return __half_raw{ + __ocml_rint_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hsin(__half x) + { + return __half_raw{ + __ocml_sin_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hcos(__half x) + { + return __half_raw{ + __ocml_cos_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hexp(__half x) + { + return __half_raw{ + __ocml_exp_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hexp2(__half x) + { + return __half_raw{ + __ocml_exp2_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hexp10(__half x) + { + return __half_raw{ + __ocml_exp10_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hlog2(__half x) + { + return __half_raw{ + __ocml_log2_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hlog(__half x) + { + return __half_raw{ + __ocml_log_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hlog10(__half x) + { + return __half_raw{ + __ocml_log10_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hrcp(__half x) + { + return __half_raw{ + static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half_raw>(x).data))}; + } + inline + __device__ + __half hrsqrt(__half x) + { + return __half_raw{ + __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + __half hsqrt(__half x) + { + return __half_raw{ + __ocml_sqrt_f16(static_cast<__half_raw>(x).data)}; + } + inline + __device__ + bool __hisinf(__half x) + { + return __ocml_isinf_f16(static_cast<__half_raw>(x).data); + } + inline + __device__ + bool __hisnan(__half x) + { + return __ocml_isnan_f16(static_cast<__half_raw>(x).data); + } + inline + __device__ + __half __hneg(__half x) + { + return __half_raw{-static_cast<__half_raw>(x).data}; + } + + inline + __HOST_DEVICE__ + __half2 h2trunc(__half2 x) + { + return __half2_raw{__ocml_trunc_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2ceil(__half2 x) + { + return __half2_raw{__ocml_ceil_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2floor(__half2 x) + { + return __half2_raw{__ocml_floor_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2rint(__half2 x) + { + return __half2_raw{__ocml_rint_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2sin(__half2 x) + { + return __half2_raw{__ocml_sin_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2cos(__half2 x) + { + return __half2_raw{__ocml_cos_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2exp(__half2 x) + { + return __half2_raw{__ocml_exp_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2exp2(__half2 x) + { + return __half2_raw{__ocml_exp2_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2exp10(__half2 x) + { + return __half2_raw{__ocml_exp10_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2log2(__half2 x) + { + return __half2_raw{__ocml_log2_2f16(x)}; + } + inline + __HOST_DEVICE__ + __half2 h2log(__half2 x) { return __ocml_log_2f16(x); } + inline + __HOST_DEVICE__ + __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); } + inline + __HOST_DEVICE__ + __half2 h2rcp(__half2 x) { + return _Float16_2{static_cast<_Float16>(__builtin_amdgcn_rcph(x.x)), + static_cast<_Float16>(__builtin_amdgcn_rcph(x.y))}; + } + inline + __HOST_DEVICE__ + __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); } + inline + __HOST_DEVICE__ + __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); } + inline + __HOST_DEVICE__ + __half2 __hisinf2(__half2 x) + { + auto r = __ocml_isinf_2f16(x); + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __HOST_DEVICE__ + __half2 __hisnan2(__half2 x) + { + auto r = __ocml_isnan_2f16(x); + return __half2_raw{_Float16_2{ + static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; + } + inline + __HOST_DEVICE__ + __half2 __hneg2(__half2 x) + { + return __half2_raw{-static_cast<__half2_raw>(x).data}; + } + } // Anonymous namespace. + + #if !defined(HIP_NO_HALF) + using half = __half; + using half2 = __half2; + #endif + __device__ + inline + __half __shfl(__half var, int src_lane, int width = warpSize) { + union { int i; __half h; } tmp; tmp.h = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.h; + } + __device__ + inline + __half2 __shfl(__half2 var, int src_lane, int width = warpSize) { + union { int i; __half2 h; } tmp; tmp.h = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.h; + } + __device__ + inline + __half __shfl_up(__half var, unsigned int lane_delta, int width = warpSize) { + union { int i; __half h; } tmp; tmp.h = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.h; + } + __device__ + inline + __half2 __shfl_up(__half2 var, unsigned int lane_delta, int width = warpSize) { + union { int i; __half2 h; } tmp; tmp.h = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.h; + } + __device__ + inline + __half __shfl_down(__half var, unsigned int lane_delta, int width = warpSize) { + union { int i; __half h; } tmp; tmp.h = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.h; + } + __device__ + inline + __half2 __shfl_down(__half2 var, unsigned int lane_delta, int width = warpSize) { + union { int i; __half2 h; } tmp; tmp.h = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.h; + } + __device__ + inline + __half __shfl_xor(__half var, int lane_mask, int width = warpSize) { + union { int i; __half h; } tmp; tmp.h = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.h; + } + __device__ + inline + __half2 __shfl_xor(__half2 var, int lane_mask, int width = warpSize) { + union { int i; __half2 h; } tmp; tmp.h = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.h; + } + #endif // defined(__cplusplus) +#elif defined(__GNUC__) + #if !defined(__HIPCC_RTC__) + #include "hip_fp16_gcc.h" + #endif +#endif // !defined(__clang__) && defined(__GNUC__) + +#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_math_constants.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_math_constants.h new file mode 100644 index 0000000000..53883ae9e9 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_math_constants.h @@ -0,0 +1,59 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef AMD_HIP_MATH_CONSTANTS_H +#define AMD_HIP_MATH_CONSTANTS_H +#define HIP_INF_F __int_as_float(0x7f800000U) +#define HIP_NAN_F __int_as_float(0x7fffffffU) +#define HIP_MIN_DENORM_F __int_as_float(0x00000001U) +#define HIP_MAX_NORMAL_F __int_as_float(0x7f7fffffU) +#define HIP_NEG_ZERO_F __int_as_float(0x80000000U) +#define HIP_ZERO_F 0.0F +#define HIP_ONE_F 1.0F +#define HIP_SQRT_HALF_F 0.707106781F +#define HIP_SQRT_HALF_HI_F 0.707106781F +#define HIP_SQRT_HALF_LO_F 1.210161749e-08F +#define HIP_SQRT_TWO_F 1.414213562F +#define HIP_THIRD_F 0.333333333F +#define HIP_PIO4_F 0.785398163F +#define HIP_PIO2_F 1.570796327F +#define HIP_3PIO4_F 2.356194490F +#define HIP_2_OVER_PI_F 0.636619772F +#define HIP_SQRT_2_OVER_PI_F 0.797884561F +#define HIP_PI_F 3.141592654F +#define HIP_L2E_F 1.442695041F +#define HIP_L2T_F 3.321928094F +#define HIP_LG2_F 0.301029996F +#define HIP_LGE_F 0.434294482F +#define HIP_LN2_F 0.693147181F +#define HIP_LNT_F 2.302585093F +#define HIP_LNPI_F 1.144729886F +#define HIP_TWO_TO_M126_F 1.175494351e-38F +#define HIP_TWO_TO_126_F 8.507059173e37F +#define HIP_NORM_HUGE_F 3.402823466e38F +#define HIP_TWO_TO_23_F 8388608.0F +#define HIP_TWO_TO_24_F 16777216.0F +#define HIP_TWO_TO_31_F 2147483648.0F +#define HIP_TWO_TO_32_F 4294967296.0F +#define HIP_REMQUO_BITS_F 3U +#define HIP_REMQUO_MASK_F (~((~0U)< + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Query the installed library build name. + * + * This function can be used even when the library is not initialized. + * + * @returns Returns a string describing the build version of the library. The + * string is owned by the library. + */ +const char* amd_dbgapi_get_build_name(); + +/** + * @brief Query the installed library git hash. + * + * This function can be used even when the library is not initialized. + * + * @returns Returns git hash of the library. + */ +const char* amd_dbgapi_get_git_hash(); + +/** + * @brief Query the installed library build ID. + * + * This function can be used even when the library is not initialized. + * + * @returns Returns build ID of the library. + */ +size_t amd_dbgapi_get_build_id(); + +#ifdef __cplusplus +} /* extern "c" */ +#endif + +//--- +// Top part of file can be compiled with any compiler + +#if !defined(__HIPCC_RTC__) +//#include +#if __cplusplus +#include +#include +#else +#include +#include +#include +#endif // __cplusplus +#else +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef signed int int32_t; +typedef signed long long int64_t; +namespace std { +using ::uint32_t; +using ::uint64_t; +using ::int32_t; +using ::int64_t; +} +#endif // !defined(__HIPCC_RTC__) + +#if __HIP_CLANG_ONLY__ + +#if !defined(__align__) +#define __align__(x) __attribute__((aligned(x))) +#endif + +#define CUDA_SUCCESS hipSuccess + +#if !defined(__HIPCC_RTC__) +#include +extern int HIP_TRACE_API; +#endif // !defined(__HIPCC_RTC__) + +#ifdef __cplusplus +#include +#endif +#include +#include +#include +#include +#include +#include + +// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. +#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) +#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__ +#endif + +// Feature tests: +#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__ +// Device compile and not host compile: + +// 32-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1) + +// 64-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (1) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (1) + +// warp cross-lane operations: +#define __HIP_ARCH_HAS_WARP_VOTE__ (1) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (1) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) + +// sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) + +// misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0) +#define __HIP_ARCH_HAS_3DGRID__ (1) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0) + +#endif /* Device feature flags */ + + +#define launch_bounds_impl0(requiredMaxThreadsPerBlock) \ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) +#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \ + amdgpu_waves_per_eu(minBlocksPerMultiprocessor))) +#define select_impl_(_1, _2, impl_, ...) impl_ +#define __launch_bounds__(...) \ + select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0, )(__VA_ARGS__) + +#if !defined(__HIPCC_RTC__) +__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; } +#endif // !defined(__HIPCC_RTC__) + +// End doxygen API: +/** + * @} + */ + +// +// hip-clang functions +// +#if !defined(__HIPCC_RTC__) +#define HIP_KERNEL_NAME(...) __VA_ARGS__ +#define HIP_SYMBOL(X) X + +typedef int hipLaunchParm; + +template ::type* = nullptr> +void pArgs(const std::tuple&, void*) {} + +template ::type* = nullptr> +void pArgs(const std::tuple& formals, void** _vargs) { + using T = typename std::tuple_element >::type; + + static_assert(!std::is_reference{}, + "A __global__ function cannot have a reference as one of its " + "arguments."); +#if defined(HIP_STRICT) + static_assert(std::is_trivially_copyable{}, + "Only TriviallyCopyable types can be arguments to a __global__ " + "function"); +#endif + _vargs[n] = const_cast(reinterpret_cast(&std::get(formals))); + return pArgs(formals, _vargs); +} + +template +std::tuple validateArgsCountType(void (*kernel)(Formals...), std::tuple(actuals)) { + static_assert(sizeof...(Formals) == sizeof...(Actuals), "Argument Count Mismatch"); + std::tuple to_formals{std::move(actuals)}; + return to_formals; +} + +#if defined(HIP_TEMPLATE_KERNEL_LAUNCH) +template +void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) { + constexpr size_t count = sizeof...(Args); + auto tup_ = std::tuple{args...}; + auto tup = validateArgsCountType(kernel, tup_); + void* _Args[count]; + pArgs<0>(tup, _Args); + + auto k = reinterpret_cast(kernel); + hipLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream); +} +#else +#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \ + do { \ + kernelName<<<(numBlocks), (numThreads), (memPerBlock), (streamId)>>>(__VA_ARGS__); \ + } while (0) + +#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__) +#endif + +#include +#endif // !defined(__HIPCC_RTC__) + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint); +struct __HIP_BlockIdx { + __device__ + std::uint32_t operator()(std::uint32_t x) const noexcept { return __ockl_get_group_id(x); } +}; +struct __HIP_BlockDim { + __device__ + std::uint32_t operator()(std::uint32_t x) const noexcept { + return __ockl_get_local_size(x); + } +}; +struct __HIP_GridDim { + __device__ + std::uint32_t operator()(std::uint32_t x) const noexcept { + return __ockl_get_num_groups(x); + } +}; +struct __HIP_ThreadIdx { + __device__ + std::uint32_t operator()(std::uint32_t x) const noexcept { + return __ockl_get_local_id(x); + } +}; + +#if defined(__HIPCC_RTC__) +typedef struct dim3 { + uint32_t x; ///< x + uint32_t y; ///< y + uint32_t z; ///< z +#ifdef __cplusplus + constexpr __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){}; +#endif +} dim3; +#endif // !defined(__HIPCC_RTC__) + +template +struct __HIP_Coordinates { + using R = decltype(F{}(0)); + + struct __X { + __device__ operator R() const noexcept { return F{}(0); } + __device__ R operator+=(const R& rhs) { return F{}(0) + rhs; } + }; + struct __Y { + __device__ operator R() const noexcept { return F{}(1); } + __device__ R operator+=(const R& rhs) { return F{}(1) + rhs; } + }; + struct __Z { + __device__ operator R() const noexcept { return F{}(2); } + __device__ R operator+=(const R& rhs) { return F{}(2) + rhs; } + }; + + static constexpr __X x{}; + static constexpr __Y y{}; + static constexpr __Z z{}; +#ifdef __cplusplus + __device__ operator dim3() const { return dim3(x, y, z); } +#endif + +}; +template +#if !defined(_MSC_VER) +__attribute__((weak)) +#endif +constexpr typename __HIP_Coordinates::__X __HIP_Coordinates::x; +template +#if !defined(_MSC_VER) +__attribute__((weak)) +#endif +constexpr typename __HIP_Coordinates::__Y __HIP_Coordinates::y; +template +#if !defined(_MSC_VER) +__attribute__((weak)) +#endif +constexpr typename __HIP_Coordinates::__Z __HIP_Coordinates::z; + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(uint); +inline +__device__ +std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::__X, + __HIP_Coordinates<__HIP_BlockDim>::__X) noexcept { + return __ockl_get_global_size(0); +} +inline +__device__ +std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::__X, + __HIP_Coordinates<__HIP_GridDim>::__X) noexcept { + return __ockl_get_global_size(0); +} +inline +__device__ +std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::__Y, + __HIP_Coordinates<__HIP_BlockDim>::__Y) noexcept { + return __ockl_get_global_size(1); +} +inline +__device__ +std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::__Y, + __HIP_Coordinates<__HIP_GridDim>::__Y) noexcept { + return __ockl_get_global_size(1); +} +inline +__device__ +std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::__Z, + __HIP_Coordinates<__HIP_BlockDim>::__Z) noexcept { + return __ockl_get_global_size(2); +} +inline +__device__ +std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::__Z, + __HIP_Coordinates<__HIP_GridDim>::__Z) noexcept { + return __ockl_get_global_size(2); +} + +static constexpr __HIP_Coordinates<__HIP_BlockDim> blockDim{}; +static constexpr __HIP_Coordinates<__HIP_BlockIdx> blockIdx{}; +static constexpr __HIP_Coordinates<__HIP_GridDim> gridDim{}; +static constexpr __HIP_Coordinates<__HIP_ThreadIdx> threadIdx{}; + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint); +#define hipThreadIdx_x (__ockl_get_local_id(0)) +#define hipThreadIdx_y (__ockl_get_local_id(1)) +#define hipThreadIdx_z (__ockl_get_local_id(2)) + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint); +#define hipBlockIdx_x (__ockl_get_group_id(0)) +#define hipBlockIdx_y (__ockl_get_group_id(1)) +#define hipBlockIdx_z (__ockl_get_group_id(2)) + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint); +#define hipBlockDim_x (__ockl_get_local_size(0)) +#define hipBlockDim_y (__ockl_get_local_size(1)) +#define hipBlockDim_z (__ockl_get_local_size(2)) + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint); +#define hipGridDim_x (__ockl_get_num_groups(0)) +#define hipGridDim_y (__ockl_get_num_groups(1)) +#define hipGridDim_z (__ockl_get_num_groups(2)) + +#include + +#if __HIP_HCC_COMPAT_MODE__ +// Define HCC work item functions in terms of HIP builtin variables. +#pragma push_macro("__DEFINE_HCC_FUNC") +#define __DEFINE_HCC_FUNC(hc_fun,hip_var) \ +inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \ + if (i==0) \ + return hip_var.x; \ + else if(i==1) \ + return hip_var.y; \ + else \ + return hip_var.z; \ +} + +__DEFINE_HCC_FUNC(workitem_id, threadIdx) +__DEFINE_HCC_FUNC(group_id, blockIdx) +__DEFINE_HCC_FUNC(group_size, blockDim) +__DEFINE_HCC_FUNC(num_groups, gridDim) +#pragma pop_macro("__DEFINE_HCC_FUNC") + +extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint); +inline __device__ __attribute__((always_inline)) uint +hc_get_workitem_absolute_id(int dim) +{ + return (uint)__ockl_get_global_id(dim); +} + +#endif + +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +#if !defined(__HIPCC_RTC__) +// Support std::complex. +#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ +#pragma push_macro("__CUDA__") +#define __CUDA__ +#include <__clang_cuda_math_forward_declares.h> +#include <__clang_cuda_complex_builtins.h> +// Workaround for using libc++ with HIP-Clang. +// The following headers requires clang include path before standard C++ include path. +// However libc++ include path requires to be before clang include path. +// To workaround this, we pass -isystem with the parent directory of clang include +// path instead of the clang include path itself. +#include +#include +#include +#undef __CUDA__ +#pragma pop_macro("__CUDA__") +#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ +#endif // !defined(__HIPCC_RTC__) +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +#endif // __HIP_CLANG_ONLY__ + +#endif // HIP_AMD_DETAIL_RUNTIME_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h new file mode 100644 index 0000000000..beb12ba2f6 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h @@ -0,0 +1,194 @@ +/* +Copyright (c) 2022 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H +#define HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H + +#if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) + +/// hipStreamPerThread implementation +#if defined(HIP_API_PER_THREAD_DEFAULT_STREAM) + #define __HIP_STREAM_PER_THREAD + #define __HIP_API_SPT(api) api ## _spt +#else + #define __HIP_API_SPT(api) api +#endif + +#if defined(__HIP_STREAM_PER_THREAD) + // Memory APIs + #define hipMemcpy __HIP_API_SPT(hipMemcpy) + #define hipMemcpyToSymbol __HIP_API_SPT(hipMemcpyToSymbol) + #define hipMemcpyFromSymbol __HIP_API_SPT(hipMemcpyFromSymbol) + #define hipMemcpy2D __HIP_API_SPT(hipMemcpy2D) + #define hipMemcpy2DFromArray __HIP_API_SPT(hipMemcpy2DFromArray) + #define hipMemcpy3D __HIP_API_SPT(hipMemcpy3D) + #define hipMemset __HIP_API_SPT(hipMemset) + #define hipMemset2D __HIP_API_SPT(hipMemset2D) + #define hipMemset3D __HIP_API_SPT(hipMemset3D) + #define hipMemcpyAsync __HIP_API_SPT(hipMemcpyAsync) + #define hipMemset3DAsync __HIP_API_SPT(hipMemset3DAsync) + #define hipMemset2DAsync __HIP_API_SPT(hipMemset2DAsync) + #define hipMemsetAsync __HIP_API_SPT(hipMemsetAsync) + #define hipMemcpy3DAsync __HIP_API_SPT(hipMemcpy3DAsync) + #define hipMemcpy2DAsync __HIP_API_SPT(hipMemcpy2DAsync) + #define hipMemcpyFromSymbolAsync __HIP_API_SPT(hipMemcpyFromSymbolAsync) + #define hipMemcpyToSymbolAsync __HIP_API_SPT(hipMemcpyToSymbolAsync) + #define hipMemcpyFromArray __HIP_API_SPT(hipMemcpyFromArray) + #define hipMemcpy2DToArray __HIP_API_SPT(hipMemcpy2DToArray) + #define hipMemcpy2DFromArrayAsync __HIP_API_SPT(hipMemcpy2DFromArrayAsync) + #define hipMemcpy2DToArrayAsync __HIP_API_SPT(hipMemcpy2DToArrayAsync) + + // Stream APIs + #define hipStreamSynchronize __HIP_API_SPT(hipStreamSynchronize) + #define hipStreamQuery __HIP_API_SPT(hipStreamQuery) + #define hipStreamGetFlags __HIP_API_SPT(hipStreamGetFlags) + #define hipStreamGetPriority __HIP_API_SPT(hipStreamGetPriority) + #define hipStreamWaitEvent __HIP_API_SPT(hipStreamWaitEvent) + #define hipStreamAddCallback __HIP_API_SPT(hipStreamAddCallback) + #define hipLaunchHostFunc __HIP_API_SPT(hipLaunchHostFunc) + + // Event APIs + #define hipEventRecord __HIP_API_SPT(hipEventRecord) + + // Launch APIs + #define hipLaunchKernel __HIP_API_SPT(hipLaunchKernel) + #define hipLaunchCooperativeKernel __HIP_API_SPT(hipLaunchCooperativeKernel) + + // Graph APIs + #define hipGraphLaunch __HIP_API_SPT(hipGraphLaunch) + #define hipStreamBeginCapture __HIP_API_SPT(hipStreamBeginCapture) + #define hipStreamEndCapture __HIP_API_SPT(hipStreamEndCapture) + #define hipStreamIsCapturing __HIP_API_SPT(hipStreamIsCapturing) + #define hipStreamGetCaptureInfo __HIP_API_SPT(hipStreamGetCaptureInfo) + #define hipStreamGetCaptureInfo_v2 __HIP_API_SPT(hipStreamGetCaptureInfo_v2) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +hipError_t hipMemcpy_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); + +hipError_t hipMemcpyToSymbol_spt(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind); + +hipError_t hipMemcpyFromSymbol_spt(void* dst, const void* symbol,size_t sizeBytes, + size_t offset, hipMemcpyKind kind); + +hipError_t hipMemcpy2D_spt(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); + +hipError_t hipMemcpy2DFromArray_spt( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, hipMemcpyKind kind); + +hipError_t hipMemcpy3D_spt(const struct hipMemcpy3DParms* p); + +hipError_t hipMemset_spt(void* dst, int value, size_t sizeBytes); + +hipError_t hipMemsetAsync_spt(void* dst, int value, size_t sizeBytes, hipStream_t stream); + +hipError_t hipMemset2D_spt(void* dst, size_t pitch, int value, size_t width, size_t height); + +hipError_t hipMemset2DAsync_spt(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream); + +hipError_t hipMemset3DAsync_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream); + +hipError_t hipMemset3D_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ); + +hipError_t hipMemcpyAsync_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream); + +hipError_t hipMemcpy3DAsync_spt(const hipMemcpy3DParms* p, hipStream_t stream); + +hipError_t hipMemcpy2DAsync_spt(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream); + +hipError_t hipMemcpyFromSymbolAsync_spt(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream); + +hipError_t hipMemcpyToSymbolAsync_spt(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream); + +hipError_t hipMemcpyFromArray_spt(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, + size_t count, hipMemcpyKind kind); + +hipError_t hipMemcpy2DToArray_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); + +hipError_t hipMemcpy2DFromArrayAsync_spt(void* dst, size_t dpitch, hipArray_const_t src, + size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, + hipMemcpyKind kind, hipStream_t stream); + +hipError_t hipMemcpy2DToArrayAsync_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream); + +hipError_t hipStreamQuery_spt(hipStream_t stream); + +hipError_t hipStreamSynchronize_spt(hipStream_t stream); + +hipError_t hipStreamGetPriority_spt(hipStream_t stream, int* priority); + +hipError_t hipStreamWaitEvent_spt(hipStream_t stream, hipEvent_t event, unsigned int flags); + +hipError_t hipStreamGetFlags_spt(hipStream_t stream, unsigned int* flags); + +hipError_t hipStreamAddCallback_spt(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags); +#ifdef __cplusplus +hipError_t hipEventRecord_spt(hipEvent_t event, hipStream_t stream = NULL); +#else +hipError_t hipEventRecord_spt(hipEvent_t event, hipStream_t stream); +#endif + +hipError_t hipLaunchCooperativeKernel_spt(const void* f, + dim3 gridDim, dim3 blockDim, + void **kernelParams, uint32_t sharedMemBytes, hipStream_t hStream); + +hipError_t hipLaunchKernel_spt(const void* function_address, + dim3 numBlocks, + dim3 dimBlocks, + void** args, + size_t sharedMemBytes, hipStream_t stream); + +hipError_t hipGraphLaunch_spt(hipGraphExec_t graphExec, hipStream_t stream); +hipError_t hipStreamBeginCapture_spt(hipStream_t stream, hipStreamCaptureMode mode); +hipError_t hipStreamEndCapture_spt(hipStream_t stream, hipGraph_t* pGraph); +hipError_t hipStreamIsCapturing_spt(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus); +hipError_t hipStreamGetCaptureInfo_spt(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId); +hipError_t hipStreamGetCaptureInfo_v2_spt(hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out, hipGraph_t* graph_out, + const hipGraphNode_t** dependencies_out, + size_t* numDependencies_out); +hipError_t hipLaunchHostFunc_spt(hipStream_t stream, hipHostFn_t fn, void* userData); + + +#ifdef __cplusplus +} +#endif // extern "C" + +#endif //(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) +#endif //HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h new file mode 100644 index 0000000000..0100e99e71 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h @@ -0,0 +1,570 @@ +/* +Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#ifdef __cplusplus +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" + +/** + * @brief Unsafe floating point rmw atomic add. + * + * Performs a relaxed read-modify-write floating point atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 4 bytes aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and is not supported. + * + * @param [in,out] addr Pointer to value to be increment by \p value. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline float unsafeAtomicAdd(float* addr, float value) { +#if defined(__gfx940__) && \ + __has_builtin(__builtin_amdgcn_flat_atomic_fadd_f32) + return __builtin_amdgcn_flat_atomic_fadd_f32(addr, value); +#elif defined(__gfx90a__) && \ + __has_builtin(__builtin_amdgcn_is_shared) && \ + __has_builtin(__builtin_amdgcn_is_private) && \ + __has_builtin(__builtin_amdgcn_ds_atomic_fadd_f32) && \ + __has_builtin(__builtin_amdgcn_global_atomic_fadd_f32) + if (__builtin_amdgcn_is_shared( + (const __attribute__((address_space(0))) void*)addr)) + return __builtin_amdgcn_ds_atomic_fadd_f32(addr, value); + else if (__builtin_amdgcn_is_private( + (const __attribute__((address_space(0))) void*)addr)) { + float temp = *addr; + *addr = temp + value; + return temp; + } + else + return __builtin_amdgcn_global_atomic_fadd_f32(addr, value); +#elif __has_builtin(__hip_atomic_fetch_add) + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif +} + +/** + * @brief Unsafe floating point rmw atomic max. + * + * Performs a relaxed read-modify-write floating point atomic max with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if greater. + * + * @note This operation is currently identical to that performed by + * atomicMax and is included for completeness. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float unsafeAtomicMax(float* addr, float val) { + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned int *uaddr = (unsigned int *)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __uint_as_float(value); + #endif +} + +/** + * @brief Unsafe floating point rmw atomic min. + * + * Performs a relaxed read-modify-write floating point atomic min with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if lesser. + * + * @note This operation is currently identical to that performed by + * atomicMin and is included for completeness. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float unsafeAtomicMin(float* addr, float val) { + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned int *uaddr = (unsigned int *)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __uint_as_float(value); + #endif +} + +/** + * @brief Unsafe double precision rmw atomic add. + * + * Performs a relaxed read-modify-write double precision atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 8 byte aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and are not supported. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline double unsafeAtomicAdd(double* addr, double value) { +#if (defined(__gfx90a__) || defined(__gfx940__)) && \ + __has_builtin(__builtin_amdgcn_flat_atomic_fadd_f64) + return __builtin_amdgcn_flat_atomic_fadd_f64(addr, value); +#elif defined (__hip_atomic_fetch_add) + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif +} + +/** + * @brief Unsafe double precision rmw atomic max. + * + * Performs a relaxed read-modify-write double precision atomic max with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if greater. + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 8 byte aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and are not supported. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double unsafeAtomicMax(double* addr, double val) { +#if (defined(__gfx90a__) || defined(__gfx940__)) && \ + __has_builtin(__builtin_amdgcn_flat_atomic_fmax_f64) + return __builtin_amdgcn_flat_atomic_fmax_f64(addr, val); +#else + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned long long *uaddr = (unsigned long long *)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); + #endif +#endif +} + +/** + * @brief Unsafe double precision rmw atomic min. + * + * Performs a relaxed read-modify-write double precision atomic min with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if lesser. + * + * @note This operation currently only performs different operations for + * the gfx90a target. Other devices continue to use safe atomics. + * + * It can be used to generate code that uses fast hardware floating point atomic + * operations which may handle rounding and subnormal values differently than + * non-atomic floating point operations. + * + * The operation is not always safe and can have undefined behavior unless + * following condition are met: + * + * - \p addr is at least 8 byte aligned + * - If \p addr is a global segment address, it is in a coarse grain allocation. + * Passing in global segment addresses in fine grain allocations will result in + * undefined behavior and are not supported. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double unsafeAtomicMin(double* addr, double val) { +#if (defined(__gfx90a__) || defined(__gfx940__)) && \ + __has_builtin(__builtin_amdgcn_flat_atomic_fmin_f64) + return __builtin_amdgcn_flat_atomic_fmin_f64(addr, val); +#else + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned long long *uaddr = (unsigned long long *)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); + #endif +#endif +} + +/** + * @brief Safe floating point rmw atomic add. + * + * Performs a relaxed read-modify-write floating point atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be increment by \p value. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline float safeAtomicAdd(float* addr, float value) { +#if defined(__gfx908__) || \ + (defined(__gfx90a__) && !__has_builtin(__hip_atomic_fetch_add)) + // On gfx908, we can generate unsafe FP32 atomic add that does not follow all + // IEEE rules when -munsafe-fp-atomics is passed. Do a CAS loop emulation instead. + // On gfx90a, if we do not have the __hip_atomic_fetch_add builtin, we need to + // force a CAS loop here. + float old_val; +#if __has_builtin(__hip_atomic_load) + old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else // !__has_builtin(__hip_atomic_load) + old_val = __uint_as_float(__atomic_load_n(reinterpret_cast(addr), __ATOMIC_RELAXED)); +#endif // __has_builtin(__hip_atomic_load) + float expected, temp; + do { + temp = expected = old_val; +#if __has_builtin(__hip_atomic_compare_exchange_strong) + __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else // !__has_builtin(__hip_atomic_compare_exchange_strong) + __atomic_compare_exchange_n(addr, &expected, old_val + value, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); +#endif // __has_builtin(__hip_atomic_compare_exchange_strong) + old_val = expected; + } while (__float_as_uint(temp) != __float_as_uint(old_val)); + return old_val; +#elif defined(__gfx90a__) + // On gfx90a, with the __hip_atomic_fetch_add builtin, relaxed system-scope + // atomics will produce safe CAS loops, but are otherwise not different than + // agent-scope atomics. This logic is only applicable for gfx90a, and should + // not be assumed on other architectures. + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#elif __has_builtin(__hip_atomic_fetch_add) + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif +} + +/** + * @brief Safe floating point rmw atomic max. + * + * Performs a relaxed read-modify-write floating point atomic max with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if greater. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float safeAtomicMax(float* addr, float val) { + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned int *uaddr = (unsigned int *)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __uint_as_float(value); + #endif +} + +/** + * @brief Safe floating point rmw atomic min. + * + * Performs a relaxed read-modify-write floating point atomic min with + * device memory scope. The original value at \p addr is returned and + * the value at \p addr is replaced by \p val if lesser. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated + * @param [in] val Value used to update the value at \p addr. + * @return Original value contained in \p addr. + */ +__device__ inline float safeAtomicMin(float* addr, float val) { + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned int *uaddr = (unsigned int *)addr; + unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __uint_as_float(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __uint_as_float(value); + #endif +} + +/** + * @brief Safe double precision rmw atomic add. + * + * Performs a relaxed read-modify-write double precision atomic add with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated to have the original value plus \p value + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be increment by \p value. + * @param [in] value Value by \p addr is to be incremented. + * @return Original value contained in \p addr. + */ +__device__ inline double safeAtomicAdd(double* addr, double value) { +#if (defined(__gfx90a__) || defined(__gfx940__)) && \ + __has_builtin(__hip_atomic_fetch_add) + // On gfx90a, with the __hip_atomic_fetch_add builtin, relaxed system-scope + // atomics will produce safe CAS loops, but are otherwise not different than + // agent-scope atomics. This logic is only applicable for gfx90a, and should + // not be assumed on other architectures. + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#elif defined(__gfx90a__) + // On gfx90a, if we do not have the __hip_atomic_fetch_add builtin, we need to + // force a CAS loop here. + double old_val; +#if __has_builtin(__hip_atomic_load) + old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else // !__has_builtin(__hip_atomic_load) + old_val = __longlong_as_double(__atomic_load_n(reinterpret_cast(addr), __ATOMIC_RELAXED)); +#endif // __has_builtin(__hip_atomic_load) + double expected, temp; + do { + temp = expected = old_val; +#if __has_builtin(__hip_atomic_compare_exchange_strong) + __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED, + __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else // !__has_builtin(__hip_atomic_compare_exchange_strong) + __atomic_compare_exchange_n(addr, &expected, old_val + value, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); +#endif // __has_builtin(__hip_atomic_compare_exchange_strong) + old_val = expected; + } while (__double_as_longlong(temp) != __double_as_longlong(old_val)); + return old_val; +#else // !defined(__gfx90a__) +#if __has_builtin(__hip_atomic_fetch_add) + return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#else // !__has_builtin(__hip_atomic_fetch_add) + return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED); +#endif // __has_builtin(__hip_atomic_fetch_add) +#endif +} + +/** + * @brief Safe double precision rmw atomic max. + * + * Performs a relaxed read-modify-write double precision atomic max with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if greater. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double safeAtomicMax(double* addr, double val) { + #if __has_builtin(__builtin_amdgcn_is_private) + if (__builtin_amdgcn_is_private( + (const __attribute__((address_space(0))) void*)addr)) { + double old = *addr; + *addr = __builtin_fmax(old, val); + return old; + } else { + #endif + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value < val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned long long *uaddr = (unsigned long long *)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) < val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); + #endif + #if __has_builtin(__builtin_amdgcn_is_private) + } + #endif +} + +/** + * @brief Safe double precision rmw atomic min. + * + * Performs a relaxed read-modify-write double precision atomic min with + * device memory scope. Original value at \p addr is returned and + * the value of \p addr is updated with \p val if lesser. + * + * @note This operation ensures that, on all targets, we produce safe atomics. + * This will be the case even when -munsafe-fp-atomics is passed into the compiler. + * + * @param [in,out] addr Pointer to value to be updated. + * @param [in] val Value used to updated the contents at \p addr + * @return Original value contained at \p addr. + */ +__device__ inline double safeAtomicMin(double* addr, double val) { + #if __has_builtin(__builtin_amdgcn_is_private) + if (__builtin_amdgcn_is_private( + (const __attribute__((address_space(0))) void*)addr)) { + double old = *addr; + *addr = __builtin_fmin(old, val); + return old; + } else { + #endif + #if __has_builtin(__hip_atomic_load) && \ + __has_builtin(__hip_atomic_compare_exchange_strong) + double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + bool done = false; + while (!done && value > val) { + done = __hip_atomic_compare_exchange_strong(addr, &value, val, + __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + } + return value; + #else + unsigned long long *uaddr = (unsigned long long *)addr; + unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + bool done = false; + while (!done && __longlong_as_double(value) > val) { + done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } + return __longlong_as_double(value); + #endif + #if __has_builtin(__builtin_amdgcn_is_private) + } + #endif +} + +#pragma clang diagnostic pop +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h new file mode 100644 index 0000000000..8215fb02e2 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h @@ -0,0 +1,2241 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/hip_vector_types.h + * @brief Defines the different newt vector types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H + +#include "hip/amd_detail/host_defines.h" + +#if defined(__HIPCC_RTC__) + #define __HOST_DEVICE__ __device__ +#else + #define __HOST_DEVICE__ __host__ __device__ +#endif + +#if defined(__has_attribute) + #if __has_attribute(ext_vector_type) + #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n))) + #else + #define __NATIVE_VECTOR__(n, T) T[n] + #endif + +#if defined(__cplusplus) +#if !defined(__HIPCC_RTC__) + #include + #include + #include +#else +namespace std { +using ::size_t; + +template struct integral_constant { + static constexpr const _Tp value = __v; + typedef _Tp value_type; + typedef integral_constant type; + constexpr operator value_type() const { return value; } + constexpr value_type operator()() const { return value; } +}; +template constexpr const _Tp integral_constant<_Tp, __v>::value; + +typedef integral_constant true_type; +typedef integral_constant false_type; + +template using bool_constant = integral_constant; +typedef bool_constant true_type; +typedef bool_constant false_type; + +template struct enable_if {}; +template struct enable_if { typedef __T type; }; + +template struct true_or_false_type : public false_type {}; +template<> struct true_or_false_type : public true_type {}; + +template struct is_integral : public false_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; + +template struct is_arithmetic : public false_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; + +template struct is_floating_point : public false_type {}; +template<> struct is_floating_point : public true_type {}; +template<> struct is_floating_point : public true_type {}; +template<> struct is_floating_point : public true_type {}; + +template struct is_same : public false_type {}; +template struct is_same<__T, __T> : public true_type {}; + +template::value> + struct is_signed : public false_type {}; +template + struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {}; + +template struct is_convertible + : public true_or_false_type<__is_convertible_to(_T1, _T2)> {}; + +template struct char_traits; +template> class basic_istream; +template> class basic_ostream; +typedef basic_istream istream; +typedef basic_ostream ostream; + +template struct is_scalar : public integral_constant {}; +} // Namespace std. +#endif // defined(__HIPCC_RTC__) + + namespace hip_impl { + inline + constexpr + unsigned int next_pot(unsigned int x) { + // Precondition: x > 1. + return 1u << (32u - __builtin_clz(x - 1u)); + } + } // Namespace hip_impl. + + template struct HIP_vector_base; + + template + struct HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(1, T); + + union { + Native_vec_ data; + struct { + T x; + }; + }; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + explicit + constexpr + HIP_vector_base(T x_) noexcept : data{x_} {} + __HOST_DEVICE__ + constexpr + HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr + HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; + }; + + template + struct HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(2, T); + + union + #if !__has_attribute(ext_vector_type) + alignas(hip_impl::next_pot(2 * sizeof(T))) + #endif + { + Native_vec_ data; + struct { + T x; + T y; + }; + }; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + explicit + constexpr + HIP_vector_base(T x_) noexcept : data{x_, x_} {} + __HOST_DEVICE__ + constexpr + HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {} + __HOST_DEVICE__ + constexpr + HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr + HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; + }; + + template + struct HIP_vector_base { + struct Native_vec_ { + T d[3]; + + __HOST_DEVICE__ + Native_vec_() = default; + + __HOST_DEVICE__ + explicit + constexpr + Native_vec_(T x_) noexcept : d{x_, x_, x_} {} + __HOST_DEVICE__ + constexpr + Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {} + __HOST_DEVICE__ + constexpr + Native_vec_(const Native_vec_&) = default; + __HOST_DEVICE__ + constexpr + Native_vec_(Native_vec_&&) = default; + __HOST_DEVICE__ + ~Native_vec_() = default; + + __HOST_DEVICE__ + Native_vec_& operator=(const Native_vec_&) = default; + __HOST_DEVICE__ + Native_vec_& operator=(Native_vec_&&) = default; + + __HOST_DEVICE__ + T& operator[](unsigned int idx) noexcept { return d[idx]; } + __HOST_DEVICE__ + T operator[](unsigned int idx) const noexcept { return d[idx]; } + + __HOST_DEVICE__ + Native_vec_& operator+=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i]; + return *this; + } + __HOST_DEVICE__ + Native_vec_& operator-=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i]; + return *this; + } + + __HOST_DEVICE__ + Native_vec_& operator*=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i]; + return *this; + } + __HOST_DEVICE__ + Native_vec_& operator/=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i]; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_ operator-() const noexcept + { + auto r{*this}; + for (auto&& x : r.d) x = -x; + return r; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_ operator~() const noexcept + { + auto r{*this}; + for (auto&& x : r.d) x = ~x; + return r; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_& operator%=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i]; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_& operator^=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i]; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_& operator|=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i]; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_& operator&=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i]; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_& operator>>=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i]; + return *this; + } + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + Native_vec_& operator<<=(const Native_vec_& x_) noexcept + { + for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i]; + return *this; + } +#if defined (__INTEL_COMPILER) + typedef struct { + int values[4]; + } _Vec3_cmp; + using Vec3_cmp = _Vec3_cmp; +#else + using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int)))); +#endif //INTEL + __HOST_DEVICE__ + Vec3_cmp operator==(const Native_vec_& x_) const noexcept + { + return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]}; + } + }; + + union { + Native_vec_ data; + struct { + T x; + T y; + T z; + }; + }; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + explicit + constexpr + HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {} + __HOST_DEVICE__ + constexpr + HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {} + __HOST_DEVICE__ + constexpr + HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr + HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(HIP_vector_base&&) = default; + }; + + template + struct HIP_vector_base { + using Native_vec_ = __NATIVE_VECTOR__(4, T); + + union + #if !__has_attribute(ext_vector_type) + alignas(hip_impl::next_pot(4 * sizeof(T))) + #endif + { + Native_vec_ data; + struct { + T x; + T y; + T z; + T w; + }; + }; + + using value_type = T; + + __HOST_DEVICE__ + HIP_vector_base() = default; + __HOST_DEVICE__ + explicit + constexpr + HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {} + __HOST_DEVICE__ + constexpr + HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {} + __HOST_DEVICE__ + constexpr + HIP_vector_base(const HIP_vector_base&) = default; + __HOST_DEVICE__ + constexpr + HIP_vector_base(HIP_vector_base&&) = default; + __HOST_DEVICE__ + ~HIP_vector_base() = default; + __HOST_DEVICE__ + HIP_vector_base& operator=(const HIP_vector_base&) = default; + }; + + template + struct HIP_vector_type : public HIP_vector_base { + using HIP_vector_base::data; + using typename HIP_vector_base::Native_vec_; + + __HOST_DEVICE__ + HIP_vector_type() = default; + template< + typename U, + typename std::enable_if< + std::is_convertible::value>::type* = nullptr> + __HOST_DEVICE__ + explicit + constexpr + HIP_vector_type(U x_) noexcept + : HIP_vector_base{static_cast(x_)} + {} + template< // TODO: constrain based on type as well. + typename... Us, + typename std::enable_if< + (rank > 1) && sizeof...(Us) == rank>::type* = nullptr> + __HOST_DEVICE__ + constexpr + HIP_vector_type(Us... xs) noexcept + : HIP_vector_base{static_cast(xs)...} + {} + __HOST_DEVICE__ + constexpr + HIP_vector_type(const HIP_vector_type&) = default; + __HOST_DEVICE__ + constexpr + HIP_vector_type(HIP_vector_type&&) = default; + __HOST_DEVICE__ + ~HIP_vector_type() = default; + + __HOST_DEVICE__ + HIP_vector_type& operator=(const HIP_vector_type&) = default; + __HOST_DEVICE__ + HIP_vector_type& operator=(HIP_vector_type&&) = default; + + // Operators + __HOST_DEVICE__ + HIP_vector_type& operator++() noexcept + { + return *this += HIP_vector_type{1}; + } + __HOST_DEVICE__ + HIP_vector_type operator++(int) noexcept + { + auto tmp(*this); + ++*this; + return tmp; + } + + __HOST_DEVICE__ + HIP_vector_type& operator--() noexcept + { + return *this -= HIP_vector_type{1}; + } + __HOST_DEVICE__ + HIP_vector_type operator--(int) noexcept + { + auto tmp(*this); + --*this; + return tmp; + } + + __HOST_DEVICE__ + HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept + { + data += x.data; + return *this; + } + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator+=(U x) noexcept + { + return *this += HIP_vector_type{x}; + } + + __HOST_DEVICE__ + HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept + { + data -= x.data; + return *this; + } + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator-=(U x) noexcept + { + return *this -= HIP_vector_type{x}; + } + + __HOST_DEVICE__ + HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept + { + data *= x.data; + return *this; + } + + friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator*( + HIP_vector_type x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{ x } *= y; + } + + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator*=(U x) noexcept + { + return *this *= HIP_vector_type{x}; + } + + friend __HOST_DEVICE__ inline constexpr HIP_vector_type operator/( + HIP_vector_type x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{ x } /= y; + } + + __HOST_DEVICE__ + HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept + { + data /= x.data; + return *this; + } + template< + typename U, + typename std::enable_if< + std::is_convertible{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator/=(U x) noexcept + { + return *this /= HIP_vector_type{x}; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type operator-() const noexcept + { + auto tmp(*this); + tmp.data = -tmp.data; + return tmp; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type operator~() const noexcept + { + HIP_vector_type r{*this}; + r.data = ~r.data; + return r; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept + { + data %= x.data; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept + { + data ^= x.data; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept + { + data |= x.data; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept + { + data &= x.data; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept + { + data >>= x.data; + return *this; + } + + template< + typename U = T, + typename std::enable_if{}>::type* = nullptr> + __HOST_DEVICE__ + HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept + { + data <<= x.data; + return *this; + } + }; + + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator+( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} += y; + } + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator+( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} += HIP_vector_type{y}; + } + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator+( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} += y; + } + + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator-( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} -= y; + } + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator-( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} -= HIP_vector_type{y}; + } + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator-( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} -= y; + } + + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator*( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} *= HIP_vector_type{y}; + } + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator*( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} *= y; + } + + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator/( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} /= HIP_vector_type{y}; + } + template + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator/( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} /= y; + } + + template + __HOST_DEVICE__ + inline + constexpr + bool _hip_any_zero(const V& x, int n) noexcept + { + return + (n == -1) ? true : ((x[n] == 0) ? false : _hip_any_zero(x, n - 1)); + } + + template + __HOST_DEVICE__ + inline + constexpr + bool operator==( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return _hip_any_zero(x.data == y.data, n - 1); + } + template + __HOST_DEVICE__ + inline + constexpr + bool operator==(const HIP_vector_type& x, U y) noexcept + { + return x == HIP_vector_type{y}; + } + template + __HOST_DEVICE__ + inline + constexpr + bool operator==(U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} == y; + } + + template + __HOST_DEVICE__ + inline + constexpr + bool operator!=( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return !(x == y); + } + template + __HOST_DEVICE__ + inline + constexpr + bool operator!=(const HIP_vector_type& x, U y) noexcept + { + return !(x == y); + } + template + __HOST_DEVICE__ + inline + constexpr + bool operator!=(U x, const HIP_vector_type& y) noexcept + { + return !(x == y); + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator%( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} %= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator%( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} %= HIP_vector_type{y}; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator%( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} %= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator^( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} ^= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator^( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} ^= HIP_vector_type{y}; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator^( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} ^= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator|( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} |= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator|( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} |= HIP_vector_type{y}; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator|( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} |= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator&( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} &= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator&( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} &= HIP_vector_type{y}; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator&( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} &= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator>>( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} >>= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator>>( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} >>= HIP_vector_type{y}; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator>>( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} >>= y; + } + + template< + typename T, + unsigned int n, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator<<( + const HIP_vector_type& x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} <<= y; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator<<( + const HIP_vector_type& x, U y) noexcept + { + return HIP_vector_type{x} <<= HIP_vector_type{y}; + } + template< + typename T, + unsigned int n, + typename U, + typename std::enable_if::value>::type, + typename std::enable_if{}>* = nullptr> + __HOST_DEVICE__ + inline + constexpr + HIP_vector_type operator<<( + U x, const HIP_vector_type& y) noexcept + { + return HIP_vector_type{x} <<= y; + } + + #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + using CUDA_name##1 = HIP_vector_type;\ + using CUDA_name##2 = HIP_vector_type;\ + using CUDA_name##3 = HIP_vector_type;\ + using CUDA_name##4 = HIP_vector_type; +#else + #define __MAKE_VECTOR_TYPE__(CUDA_name, T) \ + typedef struct {\ + T x;\ + } CUDA_name##1;\ + typedef struct {\ + T x;\ + T y;\ + } CUDA_name##2;\ + typedef struct {\ + T x;\ + T y;\ + T z;\ + } CUDA_name##3;\ + typedef struct {\ + T x;\ + T y;\ + T z;\ + T w;\ + } CUDA_name##4; +#endif + +__MAKE_VECTOR_TYPE__(uchar, unsigned char); +__MAKE_VECTOR_TYPE__(char, char); +__MAKE_VECTOR_TYPE__(ushort, unsigned short); +__MAKE_VECTOR_TYPE__(short, short); +__MAKE_VECTOR_TYPE__(uint, unsigned int); +__MAKE_VECTOR_TYPE__(int, int); +__MAKE_VECTOR_TYPE__(ulong, unsigned long); +__MAKE_VECTOR_TYPE__(long, long); +__MAKE_VECTOR_TYPE__(ulonglong, unsigned long long); +__MAKE_VECTOR_TYPE__(longlong, long long); +__MAKE_VECTOR_TYPE__(float, float); +__MAKE_VECTOR_TYPE__(double, double); + +#else // !defined(__has_attribute) + +#if defined(_MSC_VER) +#include +#include +#include +#include + +/* +this is for compatibility with CUDA as CUDA allows accessing vector components +in C++ program with MSVC +*/ +typedef union { + struct { + char x; + }; + char data; +} char1; +typedef union { + struct { + char x; + char y; + }; + char data[2]; +} char2; +typedef union { + struct { + char x; + char y; + char z; + char w; + }; + char data[4]; +} char4; +typedef union { + struct { + char x; + char y; + char z; + }; + char data[3]; +} char3; +typedef union { + __m64 data; +} char8; +typedef union { + __m128i data; +} char16; + +typedef union { + struct { + unsigned char x; + }; + unsigned char data; +} uchar1; +typedef union { + struct { + unsigned char x; + unsigned char y; + }; + unsigned char data[2]; +} uchar2; +typedef union { + struct { + unsigned char x; + unsigned char y; + unsigned char z; + unsigned char w; + }; + unsigned char data[4]; +} uchar4; +typedef union { + struct { + unsigned char x; + unsigned char y; + unsigned char z; + }; + unsigned char data[3]; +} uchar3; +typedef union { + __m64 data; +} uchar8; +typedef union { + __m128i data; +} uchar16; + +typedef union { + struct { + short x; + }; + short data; +} short1; +typedef union { + struct { + short x; + short y; + }; + short data[2]; +} short2; +typedef union { + struct { + short x; + short y; + short z; + short w; + }; + __m64 data; +} short4; +typedef union { + struct { + short x; + short y; + short z; + }; + short data[3]; +} short3; +typedef union { + __m128i data; +} short8; +typedef union { + __m128i data[2]; +} short16; + +typedef union { + struct { + unsigned short x; + }; + unsigned short data; +} ushort1; +typedef union { + struct { + unsigned short x; + unsigned short y; + }; + unsigned short data[2]; +} ushort2; +typedef union { + struct { + unsigned short x; + unsigned short y; + unsigned short z; + unsigned short w; + }; + __m64 data; +} ushort4; +typedef union { + struct { + unsigned short x; + unsigned short y; + unsigned short z; + }; + unsigned short data[3]; +} ushort3; +typedef union { + __m128i data; +} ushort8; +typedef union { + __m128i data[2]; +} ushort16; + +typedef union { + struct { + int x; + }; + int data; +} int1; +typedef union { + struct { + int x; + int y; + }; + __m64 data; +} int2; +typedef union { + struct { + int x; + int y; + int z; + int w; + }; + __m128i data; +} int4; +typedef union { + struct { + int x; + int y; + int z; + }; + int data[3]; +} int3; +typedef union { + __m128i data[2]; +} int8; +typedef union { + __m128i data[4]; +} int16; + +typedef union { + struct { + unsigned int x; + }; + unsigned int data; +} uint1; +typedef union { + struct { + unsigned int x; + unsigned int y; + }; + __m64 data; +} uint2; +typedef union { + struct { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; + __m128i data; +} uint4; +typedef union { + struct { + unsigned int x; + unsigned int y; + unsigned int z; + }; + unsigned int data[3]; +} uint3; +typedef union { + __m128i data[2]; +} uint8; +typedef union { + __m128i data[4]; +} uint16; + +typedef union { + struct { + int x; + }; + int data; +} long1; +typedef union { + struct { + int x; + int y; + }; + __m64 data; +} long2; +typedef union { + struct { + int x; + int y; + int z; + int w; + }; + __m128i data; +} long4; +typedef union { + struct { + int x; + int y; + int z; + }; + int data[3]; +} long3; +typedef union { + __m128i data[2]; +} long8; +typedef union { + __m128i data[4]; +} long16; + +typedef union { + struct { + unsigned int x; + }; + unsigned int data; +} ulong1; +typedef union { + struct { + unsigned int x; + unsigned int y; + }; + __m64 data; +} ulong2; +typedef union { + struct { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; + __m128i data; +} ulong4; +typedef union { + struct { + unsigned int x; + unsigned int y; + unsigned int z; + }; + unsigned int data[3]; +} ulong3; +typedef union { + __m128i data[2]; +} ulong8; +typedef union { + __m128i data[4]; +} ulong16; + +typedef union { + struct { + long long x; + }; + __m64 data; +} longlong1; +typedef union { + struct { + long long x; + long long y; + }; + __m128i data; +} longlong2; +typedef union { + struct { + long long x; + long long y; + long long z; + long long w; + }; + __m128i data[2]; +} longlong4; +typedef union { + struct { + long long x; + long long y; + long long z; + }; + __m64 data[3]; +} longlong3; +typedef union { + __m128i data[4]; +} longlong8; +typedef union { + __m128i data[8]; +} longlong16; + +typedef union { + struct { + __m64 x; + }; + __m64 data; +} ulonglong1; +typedef union { + struct { + __m64 x; + __m64 y; + }; + __m128i data; +} ulonglong2; +typedef union { + struct { + __m64 x; + __m64 y; + __m64 z; + __m64 w; + }; + __m128i data[2]; +} ulonglong4; +typedef union { + struct { + __m64 x; + __m64 y; + __m64 z; + }; + __m64 data[3]; +} ulonglong3; +typedef union { + __m128i data[4]; +} ulonglong8; +typedef union { + __m128i data[8]; +} ulonglong16; + +typedef union { + struct { + float x; + }; + float data; +} float1; +typedef union { + struct { + float x; + float y; + }; + __m64 data; +} float2; +typedef union { + struct { + float x; + float y; + float z; + float w; + }; + __m128 data; +} float4; +typedef union { + struct { + float x; + float y; + float z; + }; + float data[3]; +} float3; +typedef union { + __m256 data; +} float8; +typedef union { + __m256 data[2]; +} float16; + +typedef union { + struct { + double x; + }; + double data; +} double1; +typedef union { + struct { + double x; + double y; + }; + __m128d data; +} double2; +typedef union { + struct { + double x; + double y; + double z; + double w; + }; + __m256d data; +} double4; +typedef union { + struct { + double x; + double y; + double z; + }; + double data[3]; +} double3; +typedef union { + __m256d data[2]; +} double8; +typedef union { + __m256d data[4]; +} double16; + +#else // !defined(_MSC_VER) + +/* +this is for compatibility with CUDA as CUDA allows accessing vector components +in C++ program with MSVC +*/ +typedef union { + struct { + char x; + }; + char data; +} char1; +typedef union { + struct { + char x; + char y; + }; + char data[2]; +} char2; +typedef union { + struct { + char x; + char y; + char z; + char w; + }; + char data[4]; +} char4; +typedef union { + char data[8]; +} char8; +typedef union { + char data[16]; +} char16; +typedef union { + struct { + char x; + char y; + char z; + }; + char data[3]; +} char3; + +typedef union { + struct { + unsigned char x; + }; + unsigned char data; +} uchar1; +typedef union { + struct { + unsigned char x; + unsigned char y; + }; + unsigned char data[2]; +} uchar2; +typedef union { + struct { + unsigned char x; + unsigned char y; + unsigned char z; + unsigned char w; + }; + unsigned char data[4]; +} uchar4; +typedef union { + unsigned char data[8]; +} uchar8; +typedef union { + unsigned char data[16]; +} uchar16; +typedef union { + struct { + unsigned char x; + unsigned char y; + unsigned char z; + }; + unsigned char data[3]; +} uchar3; + +typedef union { + struct { + short x; + }; + short data; +} short1; +typedef union { + struct { + short x; + short y; + }; + short data[2]; +} short2; +typedef union { + struct { + short x; + short y; + short z; + short w; + }; + short data[4]; +} short4; +typedef union { + short data[8]; +} short8; +typedef union { + short data[16]; +} short16; +typedef union { + struct { + short x; + short y; + short z; + }; + short data[3]; +} short3; + +typedef union { + struct { + unsigned short x; + }; + unsigned short data; +} ushort1; +typedef union { + struct { + unsigned short x; + unsigned short y; + }; + unsigned short data[2]; +} ushort2; +typedef union { + struct { + unsigned short x; + unsigned short y; + unsigned short z; + unsigned short w; + }; + unsigned short data[4]; +} ushort4; +typedef union { + unsigned short data[8]; +} ushort8; +typedef union { + unsigned short data[16]; +} ushort16; +typedef union { + struct { + unsigned short x; + unsigned short y; + unsigned short z; + }; + unsigned short data[3]; +} ushort3; + +typedef union { + struct { + int x; + }; + int data; +} int1; +typedef union { + struct { + int x; + int y; + }; + int data[2]; +} int2; +typedef union { + struct { + int x; + int y; + int z; + int w; + }; + int data[4]; +} int4; +typedef union { + int data[8]; +} int8; +typedef union { + int data[16]; +} int16; +typedef union { + struct { + int x; + int y; + int z; + }; + int data[3]; +} int3; + +typedef union { + struct { + unsigned int x; + }; + unsigned int data; +} uint1; +typedef union { + struct { + unsigned int x; + unsigned int y; + }; + unsigned int data[2]; +} uint2; +typedef union { + struct { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; + unsigned int data[4]; +} uint4; +typedef union { + unsigned int data[8]; +} uint8; +typedef union { + unsigned int data[16]; +} uint16; +typedef union { + struct { + unsigned int x; + unsigned int y; + unsigned int z; + }; + unsigned int data[3]; +} uint3; + +typedef union { + struct { + long x; + }; + long data; +} long1; +typedef union { + struct { + long x; + long y; + }; + long data[2]; +} long2; +typedef union { + struct { + long x; + long y; + long z; + long w; + }; + long data[4]; +} long4; +typedef union { + long data[8]; +} long8; +typedef union { + long data[16]; +} long16; +typedef union { + struct { + long x; + long y; + long z; + }; + long data[3]; +} long3; + +typedef union { + struct { + unsigned long x; + }; + unsigned long data; +} ulong1; +typedef union { + struct { + unsigned long x; + unsigned long y; + }; + unsigned long data[2]; +} ulong2; +typedef union { + struct { + unsigned long x; + unsigned long y; + unsigned long z; + unsigned long w; + }; + unsigned long data[4]; +} ulong4; +typedef union { + unsigned long data[8]; +} ulong8; +typedef union { + unsigned long data[16]; +} ulong16; +typedef union { + struct { + unsigned long x; + unsigned long y; + unsigned long z; + }; + unsigned long data[3]; +} ulong3; + +typedef union { + struct { + long long x; + }; + long long data; +} longlong1; +typedef union { + struct { + long long x; + long long y; + }; + long long data[2]; +} longlong2; +typedef union { + struct { + long long x; + long long y; + long long z; + long long w; + }; + long long data[4]; +} longlong4; +typedef union { + long long data[8]; +} longlong8; +typedef union { + long long data[16]; +} longlong16; +typedef union { + struct { + long long x; + long long y; + long long z; + }; + long long data[3]; +} longlong3; + +typedef union { + struct { + unsigned long long x; + }; + unsigned long long data; +} ulonglong1; +typedef union { + struct { + unsigned long long x; + unsigned long long y; + }; + unsigned long long data[2]; +} ulonglong2; +typedef union { + struct { + unsigned long long x; + unsigned long long y; + unsigned long long z; + unsigned long long w; + }; + unsigned long long data[4]; +} ulonglong4; +typedef union { + unsigned long long data[8]; +} ulonglong8; +typedef union { + unsigned long long data[16]; +} ulonglong16; +typedef union { + struct { + unsigned long long x; + unsigned long long y; + unsigned long long z; + }; + unsigned long long data[3]; +} ulonglong3; + +typedef union { + struct { + float x; + }; + float data; +} float1; +typedef union { + struct { + float x; + float y; + }; + float data[2]; +} float2; +typedef union { + struct { + float x; + float y; + float z; + float w; + }; + float data[4]; +} float4; +typedef union { + float data[8]; +} float8; +typedef union { + float data[16]; +} float16; +typedef union { + struct { + float x; + float y; + float z; + }; + float data[3]; +} float3; + +typedef union { + struct { + double x; + }; + double data; +} double1; +typedef union { + struct { + double x; + double y; + }; + double data[2]; +} double2; +typedef union { + struct { + double x; + double y; + double z; + double w; + }; + double data[4]; +} double4; +typedef union { + double data[8]; +} double8; +typedef union { + double data[16]; +} double16; +typedef union { + struct { + double x; + double y; + double z; + }; + double data[3]; +} double3; + +#endif // defined(_MSC_VER) +#endif // defined(__has_attribute) + +#ifdef __cplusplus +#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x) { \ + type r{x}; \ + return r; \ + } + +#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \ + type r{x, y}; \ + return r; \ + } + +#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \ + type r{x, y, z}; \ + return r; \ + } + +#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \ + type r{x, y, z, w}; \ + return r; \ + } + +template +__HOST_DEVICE__ +__forceinline__ +typename std::enable_if< + sizeof(T) / sizeof(typename T::value_type) == 1 && + sizeof(U) / sizeof(typename U::value_type) >= 1, T>::type +mapElem(const U &u) { + T t; + t.x = static_cast(u.x); + return t; +} + +template +__HOST_DEVICE__ +__forceinline__ +typename std::enable_if< + sizeof(T) / sizeof(typename T::value_type) == 2 && + sizeof(U) / sizeof(typename U::value_type) >= 2, T>::type +mapElem(const U &u) { + T t; + t.x = static_cast(u.x); + t.y = static_cast(u.y); + return t; +} + +template +__HOST_DEVICE__ +__forceinline__ +typename std::enable_if< + sizeof(T) / sizeof(typename T::value_type) == 3 && + sizeof(U) / sizeof(typename U::value_type) >= 3, T>::type +mapElem(const U &u) { + T t; + t.x = static_cast(u.x); + t.y = static_cast(u.y); + t.z = static_cast(u.z); + return t; +} + +template +__HOST_DEVICE__ +__forceinline__ +typename std::enable_if< + sizeof(T) / sizeof(typename T::value_type) == 4 && + sizeof(U) / sizeof(typename U::value_type) >= 4, T>::type +mapElem(const U &u) { + T t; + t.x = static_cast(u.x); + t.y = static_cast(u.y); + t.z = static_cast(u.z); + t.w = static_cast(u.w); + return t; +} + +template +__HOST_DEVICE__ +__forceinline__ +typename std::enable_if< + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, const T>::type +mapFrom(const U &u) { + union { + U u; + T t; + } d = { u }; + return d.t; +} + +template +__HOST_DEVICE__ +__forceinline__ +typename std::enable_if< + (sizeof(T) == sizeof(typename T::value_type)) || + std::is_same::value || + std::is_same::value || + std::is_same::value, const T>::type +mapFrom(const U &u) { + union { + U u; + T t; + } d = { u }; + return d.t; +} + +template +__HOST_DEVICE__ +__forceinline__ +typename std::enable_if< + (sizeof(T) > sizeof(typename T::value_type)) && ( + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value), const T>::type +mapFrom(const U &u) { + union { + U u; + int4 i4; + uint4 u4; + } d = { u }; + if(std::is_signed::value) { + return mapElem(d.i4) ; + } else { + return mapElem(d.u4); + } +} + +#else +#define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x) { \ + type r; \ + r.x = x; \ + return r; \ + } + +#define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y) { \ + type r; \ + r.x = x; \ + r.y = y; \ + return r; \ + } + +#define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z) { \ + type r; \ + r.x = x; \ + r.y = y; \ + r.z = z; \ + return r; \ + } + +#define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ + static inline __HOST_DEVICE__ type make_##type(comp x, comp y, comp z, comp w) { \ + type r; \ + r.x = x; \ + r.y = y; \ + r.z = z; \ + r.w = w; \ + return r; \ + } +#endif + +DECLOP_MAKE_ONE_COMPONENT(unsigned char, uchar1); +DECLOP_MAKE_TWO_COMPONENT(unsigned char, uchar2); +DECLOP_MAKE_THREE_COMPONENT(unsigned char, uchar3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned char, uchar4); + +DECLOP_MAKE_ONE_COMPONENT(signed char, char1); +DECLOP_MAKE_TWO_COMPONENT(signed char, char2); +DECLOP_MAKE_THREE_COMPONENT(signed char, char3); +DECLOP_MAKE_FOUR_COMPONENT(signed char, char4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned short, ushort1); +DECLOP_MAKE_TWO_COMPONENT(unsigned short, ushort2); +DECLOP_MAKE_THREE_COMPONENT(unsigned short, ushort3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned short, ushort4); + +DECLOP_MAKE_ONE_COMPONENT(signed short, short1); +DECLOP_MAKE_TWO_COMPONENT(signed short, short2); +DECLOP_MAKE_THREE_COMPONENT(signed short, short3); +DECLOP_MAKE_FOUR_COMPONENT(signed short, short4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned int, uint1); +DECLOP_MAKE_TWO_COMPONENT(unsigned int, uint2); +DECLOP_MAKE_THREE_COMPONENT(unsigned int, uint3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned int, uint4); + +DECLOP_MAKE_ONE_COMPONENT(signed int, int1); +DECLOP_MAKE_TWO_COMPONENT(signed int, int2); +DECLOP_MAKE_THREE_COMPONENT(signed int, int3); +DECLOP_MAKE_FOUR_COMPONENT(signed int, int4); + +DECLOP_MAKE_ONE_COMPONENT(float, float1); +DECLOP_MAKE_TWO_COMPONENT(float, float2); +DECLOP_MAKE_THREE_COMPONENT(float, float3); +DECLOP_MAKE_FOUR_COMPONENT(float, float4); + +DECLOP_MAKE_ONE_COMPONENT(double, double1); +DECLOP_MAKE_TWO_COMPONENT(double, double2); +DECLOP_MAKE_THREE_COMPONENT(double, double3); +DECLOP_MAKE_FOUR_COMPONENT(double, double4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned long, ulong1); +DECLOP_MAKE_TWO_COMPONENT(unsigned long, ulong2); +DECLOP_MAKE_THREE_COMPONENT(unsigned long, ulong3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned long, ulong4); + +DECLOP_MAKE_ONE_COMPONENT(signed long, long1); +DECLOP_MAKE_TWO_COMPONENT(signed long, long2); +DECLOP_MAKE_THREE_COMPONENT(signed long, long3); +DECLOP_MAKE_FOUR_COMPONENT(signed long, long4); + +DECLOP_MAKE_ONE_COMPONENT(unsigned long long, ulonglong1); +DECLOP_MAKE_TWO_COMPONENT(unsigned long long, ulonglong2); +DECLOP_MAKE_THREE_COMPONENT(unsigned long long, ulonglong3); +DECLOP_MAKE_FOUR_COMPONENT(unsigned long long, ulonglong4); + +DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1); +DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2); +DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3); +DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4); + +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h new file mode 100644 index 0000000000..3c17d298ea --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h @@ -0,0 +1,1502 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "hip_fp16_math_fwd.h" +#include "amd_hip_vector_types.h" +#include "math_fwd.h" + +#include + +#if !defined(__HIPCC_RTC__) +#include +// assert.h is only for the host version of assert. +// The device version of assert is implemented in hip/amd_detail/hip_runtime.h. +// Users should include hip_runtime.h for the device version of assert. +#if !__HIP_DEVICE_COMPILE__ +#include +#endif +#include +#include +#include +#endif // !defined(__HIPCC_RTC__) + +#if _LIBCPP_VERSION && __HIP__ +namespace std { +template <> +struct __numeric_type<_Float16> +{ + static _Float16 __test(_Float16); + + typedef _Float16 type; + static const bool value = true; +}; +} +#endif // _LIBCPP_VERSION + +#pragma push_macro("__DEVICE__") +#pragma push_macro("__RETURN_TYPE") + +#define __DEVICE__ static __device__ +#define __RETURN_TYPE bool + +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +__DEVICE__ +inline +uint64_t __make_mantissa_base8(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; + + if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0'; + else return 0; + + ++tagp; + } + + return r; +} + +__DEVICE__ +inline +uint64_t __make_mantissa_base10(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; + + if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0'; + else return 0; + + ++tagp; + } + + return r; +} + +__DEVICE__ +inline +uint64_t __make_mantissa_base16(const char* tagp) +{ + uint64_t r = 0; + while (tagp) { + char tmp = *tagp; + + if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0'; + else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10; + else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10; + else return 0; + + ++tagp; + } + + return r; +} + +__DEVICE__ +inline +uint64_t __make_mantissa(const char* tagp) +{ + if (!tagp) return 0u; + + if (*tagp == '0') { + ++tagp; + + if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp); + else return __make_mantissa_base8(tagp); + } + + return __make_mantissa_base10(tagp); +} +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ + +// DOT FUNCTIONS +#if __HIP_CLANG_ONLY__ +__DEVICE__ +inline +int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) { + return __ockl_sdot2(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) { + return __ockl_udot2(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) { + return __ockl_sdot4(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) { + return __ockl_udot4(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +int amd_mixed_dot(int a, int b, int c, bool saturate) { + return __ockl_sdot8(a, b, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) { + return __ockl_udot8(a, b, c, saturate); +} +#endif + +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +// BEGIN FLOAT +__DEVICE__ +inline +float abs(float x) { return __ocml_fabs_f32(x); } +__DEVICE__ +inline +float acosf(float x) { return __ocml_acos_f32(x); } +__DEVICE__ +inline +float acoshf(float x) { return __ocml_acosh_f32(x); } +__DEVICE__ +inline +float asinf(float x) { return __ocml_asin_f32(x); } +__DEVICE__ +inline +float asinhf(float x) { return __ocml_asinh_f32(x); } +__DEVICE__ +inline +float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); } +__DEVICE__ +inline +float atanf(float x) { return __ocml_atan_f32(x); } +__DEVICE__ +inline +float atanhf(float x) { return __ocml_atanh_f32(x); } +__DEVICE__ +inline +float cbrtf(float x) { return __ocml_cbrt_f32(x); } +__DEVICE__ +inline +float ceilf(float x) { return __ocml_ceil_f32(x); } +__DEVICE__ +inline +float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); } +__DEVICE__ +inline +float cosf(float x) { return __ocml_cos_f32(x); } +__DEVICE__ +inline +float coshf(float x) { return __ocml_cosh_f32(x); } +__DEVICE__ +inline +float cospif(float x) { return __ocml_cospi_f32(x); } +__DEVICE__ +inline +float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); } +__DEVICE__ +inline +float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); } +__DEVICE__ +inline +float erfcf(float x) { return __ocml_erfc_f32(x); } +__DEVICE__ +inline +float erfcinvf(float x) { return __ocml_erfcinv_f32(x); } +__DEVICE__ +inline +float erfcxf(float x) { return __ocml_erfcx_f32(x); } +__DEVICE__ +inline +float erff(float x) { return __ocml_erf_f32(x); } +__DEVICE__ +inline +float erfinvf(float x) { return __ocml_erfinv_f32(x); } +__DEVICE__ +inline +float exp10f(float x) { return __ocml_exp10_f32(x); } +__DEVICE__ +inline +float exp2f(float x) { return __ocml_exp2_f32(x); } +__DEVICE__ +inline +float expf(float x) { return __ocml_exp_f32(x); } +__DEVICE__ +inline +float expm1f(float x) { return __ocml_expm1_f32(x); } +__DEVICE__ +inline +float fabsf(float x) { return __ocml_fabs_f32(x); } +__DEVICE__ +inline +float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); } +__DEVICE__ +inline +float fdividef(float x, float y) { return x / y; } +__DEVICE__ +inline +float floorf(float x) { return __ocml_floor_f32(x); } +__DEVICE__ +inline +float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); } +__DEVICE__ +inline +float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); } +__DEVICE__ +inline +float fminf(float x, float y) { return __ocml_fmin_f32(x, y); } +__DEVICE__ +inline +float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); } +__DEVICE__ +inline +float frexpf(float x, int* nptr) +{ + int tmp; + float r = + __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp); + *nptr = tmp; + + return r; +} +__DEVICE__ +inline +float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); } +__DEVICE__ +inline +int ilogbf(float x) { return __ocml_ilogb_f32(x); } +__DEVICE__ +inline +__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); } +__DEVICE__ +inline +__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); } +__DEVICE__ +inline +__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); } +__DEVICE__ +inline +float j0f(float x) { return __ocml_j0_f32(x); } +__DEVICE__ +inline +float j1f(float x) { return __ocml_j1_f32(x); } +__DEVICE__ +inline +float jnf(int n, float x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + float x0 = j0f(x); + float x1 = j1f(x); + for (int i = 1; i < n; ++i) { + float x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} +__DEVICE__ +inline +float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); } +__DEVICE__ +inline +float lgammaf(float x) { return __ocml_lgamma_f32(x); } +__DEVICE__ +inline +long long int llrintf(float x) { return __ocml_rint_f32(x); } +__DEVICE__ +inline +long long int llroundf(float x) { return __ocml_round_f32(x); } +__DEVICE__ +inline +float log10f(float x) { return __ocml_log10_f32(x); } +__DEVICE__ +inline +float log1pf(float x) { return __ocml_log1p_f32(x); } +__DEVICE__ +inline +float log2f(float x) { return __ocml_log2_f32(x); } +__DEVICE__ +inline +float logbf(float x) { return __ocml_logb_f32(x); } +__DEVICE__ +inline +float logf(float x) { return __ocml_log_f32(x); } +__DEVICE__ +inline +long int lrintf(float x) { return __ocml_rint_f32(x); } +__DEVICE__ +inline +long int lroundf(float x) { return __ocml_round_f32(x); } +__DEVICE__ +inline +float modff(float x, float* iptr) +{ + float tmp; + float r = + __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *iptr = tmp; + + return r; +} +__DEVICE__ +inline +float nanf(const char* tagp) +{ + union { + float val; + struct ieee_float { + uint32_t mantissa : 22; + uint32_t quiet : 1; + uint32_t exponent : 8; + uint32_t sign : 1; + } bits; + + static_assert(sizeof(float) == sizeof(ieee_float), ""); + } tmp; + + tmp.bits.sign = 0u; + tmp.bits.exponent = ~0u; + tmp.bits.quiet = 1u; + tmp.bits.mantissa = __make_mantissa(tagp); + + return tmp.val; +} +__DEVICE__ +inline +float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__DEVICE__ +inline +float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); } +__DEVICE__ +inline +float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); } +__DEVICE__ +inline +float norm4df(float x, float y, float z, float w) +{ + return __ocml_len4_f32(x, y, z, w); +} +__DEVICE__ +inline +float normcdff(float x) { return __ocml_ncdf_f32(x); } +__DEVICE__ +inline +float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); } +__DEVICE__ +inline +float normf(int dim, const float* a) +{ // TODO: placeholder until OCML adds support. + float r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_sqrt_f32(r); +} +__DEVICE__ +inline +float powf(float x, float y) { return __ocml_pow_f32(x, y); } +__DEVICE__ +inline +float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); } +__DEVICE__ +inline +float rcbrtf(float x) { return __ocml_rcbrt_f32(x); } +__DEVICE__ +inline +float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); } +__DEVICE__ +inline +float remquof(float x, float y, int* quo) +{ + int tmp; + float r = + __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp); + *quo = tmp; + + return r; +} +__DEVICE__ +inline +float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); } +__DEVICE__ +inline +float rintf(float x) { return __ocml_rint_f32(x); } +__DEVICE__ +inline +float rnorm3df(float x, float y, float z) +{ + return __ocml_rlen3_f32(x, y, z); +} + +__DEVICE__ +inline +float rnorm4df(float x, float y, float z, float w) +{ + return __ocml_rlen4_f32(x, y, z, w); +} +__DEVICE__ +inline +float rnormf(int dim, const float* a) +{ // TODO: placeholder until OCML adds support. + float r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_rsqrt_f32(r); +} +__DEVICE__ +inline +float roundf(float x) { return __ocml_round_f32(x); } +__DEVICE__ +inline +float rsqrtf(float x) { return __ocml_rsqrt_f32(x); } +__DEVICE__ +inline +float scalblnf(float x, long int n) +{ + return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n); +} +__DEVICE__ +inline +float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); } +__DEVICE__ +inline +__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); } +__DEVICE__ +inline +void sincosf(float x, float* sptr, float* cptr) +{ + float tmp; + + *sptr = + __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +void sincospif(float x, float* sptr, float* cptr) +{ + float tmp; + + *sptr = + __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +float sinf(float x) { return __ocml_sin_f32(x); } +__DEVICE__ +inline +float sinhf(float x) { return __ocml_sinh_f32(x); } +__DEVICE__ +inline +float sinpif(float x) { return __ocml_sinpi_f32(x); } +__DEVICE__ +inline +float sqrtf(float x) { return __ocml_sqrt_f32(x); } +__DEVICE__ +inline +float tanf(float x) { return __ocml_tan_f32(x); } +__DEVICE__ +inline +float tanhf(float x) { return __ocml_tanh_f32(x); } +__DEVICE__ +inline +float tgammaf(float x) { return __ocml_tgamma_f32(x); } +__DEVICE__ +inline +float truncf(float x) { return __ocml_trunc_f32(x); } +__DEVICE__ +inline +float y0f(float x) { return __ocml_y0_f32(x); } +__DEVICE__ +inline +float y1f(float x) { return __ocml_y1_f32(x); } +__DEVICE__ +inline +float ynf(int n, float x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. Placeholder until OCML adds + // support. + if (n == 0) return y0f(x); + if (n == 1) return y1f(x); + + float x0 = y0f(x); + float x1 = y1f(x); + for (int i = 1; i < n; ++i) { + float x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} + +// BEGIN INTRINSICS +__DEVICE__ +inline +float __cosf(float x) { return __ocml_native_cos_f32(x); } +__DEVICE__ +inline +float __exp10f(float x) { return __ocml_native_exp10_f32(x); } +__DEVICE__ +inline +float __expf(float x) { return __ocml_native_exp_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fadd_rn(float x, float y) { return x + y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } +__DEVICE__ +inline +float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fdiv_rn(float x, float y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } +__DEVICE__ +inline +float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } +#endif +__DEVICE__ +inline +float __fdividef(float x, float y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmaf_rd(float x, float y, float z) +{ + return __ocml_fma_rtn_f32(x, y, z); +} +#endif +__DEVICE__ +inline +float __fmaf_rn(float x, float y, float z) +{ + return __ocml_fma_f32(x, y, z); +} +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmaf_ru(float x, float y, float z) +{ + return __ocml_fma_rtp_f32(x, y, z); +} +__DEVICE__ +inline +float __fmaf_rz(float x, float y, float z) +{ + return __ocml_fma_rtz_f32(x, y, z); +} +__DEVICE__ +inline +float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fmul_rn(float x, float y) { return x * y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } +__DEVICE__ +inline +float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } +__DEVICE__ +inline +float __frcp_rd(float x) { return __builtin_amdgcn_rcpf(x); } +#endif +__DEVICE__ +inline +float __frcp_rn(float x) { return __builtin_amdgcn_rcpf(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __frcp_ru(float x) { return __builtin_amdgcn_rcpf(x); } +__DEVICE__ +inline +float __frcp_rz(float x) { return __builtin_amdgcn_rcpf(x); } +#endif +__DEVICE__ +inline +float __frsqrt_rn(float x) { return __builtin_amdgcn_rsqf(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } +#endif +__DEVICE__ +inline +float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } +__DEVICE__ +inline +float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } +__DEVICE__ +inline +float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } +#endif +__DEVICE__ +inline +float __fsub_rn(float x, float y) { return x - y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } +__DEVICE__ +inline +float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +#endif +__DEVICE__ +inline +float __log10f(float x) { return __ocml_native_log10_f32(x); } +__DEVICE__ +inline +float __log2f(float x) { return __ocml_native_log2_f32(x); } +__DEVICE__ +inline +float __logf(float x) { return __ocml_native_log_f32(x); } +__DEVICE__ +inline +float __powf(float x, float y) { return __ocml_pow_f32(x, y); } +__DEVICE__ +inline +float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); } +__DEVICE__ +inline +void __sincosf(float x, float* sptr, float* cptr) +{ + *sptr = __ocml_native_sin_f32(x); + *cptr = __ocml_native_cos_f32(x); +} +__DEVICE__ +inline +float __sinf(float x) { return __ocml_native_sin_f32(x); } +__DEVICE__ +inline +float __tanf(float x) { return __ocml_tan_f32(x); } +// END INTRINSICS +// END FLOAT + +// BEGIN DOUBLE +__DEVICE__ +inline +double abs(double x) { return __ocml_fabs_f64(x); } +__DEVICE__ +inline +double acos(double x) { return __ocml_acos_f64(x); } +__DEVICE__ +inline +double acosh(double x) { return __ocml_acosh_f64(x); } +__DEVICE__ +inline +double asin(double x) { return __ocml_asin_f64(x); } +__DEVICE__ +inline +double asinh(double x) { return __ocml_asinh_f64(x); } +__DEVICE__ +inline +double atan(double x) { return __ocml_atan_f64(x); } +__DEVICE__ +inline +double atan2(double x, double y) { return __ocml_atan2_f64(x, y); } +__DEVICE__ +inline +double atanh(double x) { return __ocml_atanh_f64(x); } +__DEVICE__ +inline +double cbrt(double x) { return __ocml_cbrt_f64(x); } +__DEVICE__ +inline +double ceil(double x) { return __ocml_ceil_f64(x); } +__DEVICE__ +inline +double copysign(double x, double y) { return __ocml_copysign_f64(x, y); } +__DEVICE__ +inline +double cos(double x) { return __ocml_cos_f64(x); } +__DEVICE__ +inline +double cosh(double x) { return __ocml_cosh_f64(x); } +__DEVICE__ +inline +double cospi(double x) { return __ocml_cospi_f64(x); } +__DEVICE__ +inline +double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); } +__DEVICE__ +inline +double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); } +__DEVICE__ +inline +double erf(double x) { return __ocml_erf_f64(x); } +__DEVICE__ +inline +double erfc(double x) { return __ocml_erfc_f64(x); } +__DEVICE__ +inline +double erfcinv(double x) { return __ocml_erfcinv_f64(x); } +__DEVICE__ +inline +double erfcx(double x) { return __ocml_erfcx_f64(x); } +__DEVICE__ +inline +double erfinv(double x) { return __ocml_erfinv_f64(x); } +__DEVICE__ +inline +double exp(double x) { return __ocml_exp_f64(x); } +__DEVICE__ +inline +double exp10(double x) { return __ocml_exp10_f64(x); } +__DEVICE__ +inline +double exp2(double x) { return __ocml_exp2_f64(x); } +__DEVICE__ +inline +double expm1(double x) { return __ocml_expm1_f64(x); } +__DEVICE__ +inline +double fabs(double x) { return __ocml_fabs_f64(x); } +__DEVICE__ +inline +double fdim(double x, double y) { return __ocml_fdim_f64(x, y); } +__DEVICE__ +inline +double floor(double x) { return __ocml_floor_f64(x); } +__DEVICE__ +inline +double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); } +__DEVICE__ +inline +double fmax(double x, double y) { return __ocml_fmax_f64(x, y); } +__DEVICE__ +inline +double fmin(double x, double y) { return __ocml_fmin_f64(x, y); } +__DEVICE__ +inline +double fmod(double x, double y) { return __ocml_fmod_f64(x, y); } +__DEVICE__ +inline +double frexp(double x, int* nptr) +{ + int tmp; + double r = + __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp); + *nptr = tmp; + + return r; +} +__DEVICE__ +inline +double hypot(double x, double y) { return __ocml_hypot_f64(x, y); } +__DEVICE__ +inline +int ilogb(double x) { return __ocml_ilogb_f64(x); } +__DEVICE__ +inline +__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); } +__DEVICE__ +inline +__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); } +__DEVICE__ +inline +__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); } +__DEVICE__ +inline +double j0(double x) { return __ocml_j0_f64(x); } +__DEVICE__ +inline +double j1(double x) { return __ocml_j1_f64(x); } +__DEVICE__ +inline +double jn(int n, double x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. Placeholder until OCML adds + // support. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + double x0 = j0f(x); + double x1 = j1f(x); + for (int i = 1; i < n; ++i) { + double x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} +__DEVICE__ +inline +double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); } +__DEVICE__ +inline +double lgamma(double x) { return __ocml_lgamma_f64(x); } +__DEVICE__ +inline +long long int llrint(double x) { return __ocml_rint_f64(x); } +__DEVICE__ +inline +long long int llround(double x) { return __ocml_round_f64(x); } +__DEVICE__ +inline +double log(double x) { return __ocml_log_f64(x); } +__DEVICE__ +inline +double log10(double x) { return __ocml_log10_f64(x); } +__DEVICE__ +inline +double log1p(double x) { return __ocml_log1p_f64(x); } +__DEVICE__ +inline +double log2(double x) { return __ocml_log2_f64(x); } +__DEVICE__ +inline +double logb(double x) { return __ocml_logb_f64(x); } +__DEVICE__ +inline +long int lrint(double x) { return __ocml_rint_f64(x); } +__DEVICE__ +inline +long int lround(double x) { return __ocml_round_f64(x); } +__DEVICE__ +inline +double modf(double x, double* iptr) +{ + double tmp; + double r = + __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp); + *iptr = tmp; + + return r; +} +__DEVICE__ +inline +double nan(const char* tagp) +{ +#if !_WIN32 + union { + double val; + struct ieee_double { + uint64_t mantissa : 51; + uint32_t quiet : 1; + uint32_t exponent : 11; + uint32_t sign : 1; + } bits; + static_assert(sizeof(double) == sizeof(ieee_double), ""); + } tmp; + + tmp.bits.sign = 0u; + tmp.bits.exponent = ~0u; + tmp.bits.quiet = 1u; + tmp.bits.mantissa = __make_mantissa(tagp); + + return tmp.val; +#else + static_assert(sizeof(uint64_t)==sizeof(double)); + uint64_t val = __make_mantissa(tagp); + val |= 0xFFF << 51; + return *reinterpret_cast(&val); +#endif +} +__DEVICE__ +inline +double nearbyint(double x) { return __ocml_nearbyint_f64(x); } +__DEVICE__ +inline +double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); } +__DEVICE__ +inline +double norm(int dim, const double* a) +{ // TODO: placeholder until OCML adds support. + double r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_sqrt_f64(r); +} +__DEVICE__ +inline +double norm3d(double x, double y, double z) +{ + return __ocml_len3_f64(x, y, z); +} +__DEVICE__ +inline +double norm4d(double x, double y, double z, double w) +{ + return __ocml_len4_f64(x, y, z, w); +} +__DEVICE__ +inline +double normcdf(double x) { return __ocml_ncdf_f64(x); } +__DEVICE__ +inline +double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); } +__DEVICE__ +inline +double pow(double x, double y) { return __ocml_pow_f64(x, y); } +__DEVICE__ +inline +double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); } +__DEVICE__ +inline +double rcbrt(double x) { return __ocml_rcbrt_f64(x); } +__DEVICE__ +inline +double remainder(double x, double y) { return __ocml_remainder_f64(x, y); } +__DEVICE__ +inline +double remquo(double x, double y, int* quo) +{ + int tmp; + double r = + __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp); + *quo = tmp; + + return r; +} +__DEVICE__ +inline +double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); } +__DEVICE__ +inline +double rint(double x) { return __ocml_rint_f64(x); } +__DEVICE__ +inline +double rnorm(int dim, const double* a) +{ // TODO: placeholder until OCML adds support. + double r = 0; + while (dim--) { r += a[0] * a[0]; ++a; } + + return __ocml_rsqrt_f64(r); +} +__DEVICE__ +inline +double rnorm3d(double x, double y, double z) +{ + return __ocml_rlen3_f64(x, y, z); +} +__DEVICE__ +inline +double rnorm4d(double x, double y, double z, double w) +{ + return __ocml_rlen4_f64(x, y, z, w); +} +__DEVICE__ +inline +double round(double x) { return __ocml_round_f64(x); } +__DEVICE__ +inline +double rsqrt(double x) { return __ocml_rsqrt_f64(x); } +__DEVICE__ +inline +double scalbln(double x, long int n) +{ + return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n); +} +__DEVICE__ +inline +double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); } +__DEVICE__ +inline +__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); } +__DEVICE__ +inline +double sin(double x) { return __ocml_sin_f64(x); } +__DEVICE__ +inline +void sincos(double x, double* sptr, double* cptr) +{ + double tmp; + *sptr = + __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +void sincospi(double x, double* sptr, double* cptr) +{ + double tmp; + *sptr = __ocml_sincospi_f64( + x, (__attribute__((address_space(5))) double*) &tmp); + *cptr = tmp; +} +__DEVICE__ +inline +double sinh(double x) { return __ocml_sinh_f64(x); } +__DEVICE__ +inline +double sinpi(double x) { return __ocml_sinpi_f64(x); } +__DEVICE__ +inline +double sqrt(double x) { return __ocml_sqrt_f64(x); } +__DEVICE__ +inline +double tan(double x) { return __ocml_tan_f64(x); } +__DEVICE__ +inline +double tanh(double x) { return __ocml_tanh_f64(x); } +__DEVICE__ +inline +double tgamma(double x) { return __ocml_tgamma_f64(x); } +__DEVICE__ +inline +double trunc(double x) { return __ocml_trunc_f64(x); } +__DEVICE__ +inline +double y0(double x) { return __ocml_y0_f64(x); } +__DEVICE__ +inline +double y1(double x) { return __ocml_y1_f64(x); } +__DEVICE__ +inline +double yn(int n, double x) +{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm + // for linear recurrences to get O(log n) steps, but it's unclear if + // it'd be beneficial in this case. Placeholder until OCML adds + // support. + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + double x0 = j0f(x); + double x1 = j1f(x); + for (int i = 1; i < n; ++i) { + double x2 = (2 * i) / x * x1 - x0; + x0 = x1; + x1 = x2; + } + + return x1; +} + +// BEGIN INTRINSICS +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __dadd_rn(double x, double y) { return x + y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } +__DEVICE__ +inline +double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __ddiv_rn(double x, double y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } +__DEVICE__ +inline +double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } +__DEVICE__ +inline +double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __dmul_rn(double x, double y) { return x * y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } +__DEVICE__ +inline +double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } +__DEVICE__ +inline +double __drcp_rd(double x) { return __builtin_amdgcn_rcp(x); } +#endif +__DEVICE__ +inline +double __drcp_rn(double x) { return __builtin_amdgcn_rcp(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __drcp_ru(double x) { return __builtin_amdgcn_rcp(x); } +__DEVICE__ +inline +double __drcp_rz(double x) { return __builtin_amdgcn_rcp(x); } +__DEVICE__ +inline +double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } +#endif +__DEVICE__ +inline +double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } +__DEVICE__ +inline +double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } +__DEVICE__ +inline +double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } +#endif +__DEVICE__ +inline +double __dsub_rn(double x, double y) { return x - y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } +__DEVICE__ +inline +double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } +__DEVICE__ +inline +double __fma_rd(double x, double y, double z) +{ + return __ocml_fma_rtn_f64(x, y, z); +} +#endif +__DEVICE__ +inline +double __fma_rn(double x, double y, double z) +{ + return __ocml_fma_f64(x, y, z); +} +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +inline +double __fma_ru(double x, double y, double z) +{ + return __ocml_fma_rtp_f64(x, y, z); +} +__DEVICE__ +inline +double __fma_rz(double x, double y, double z) +{ + return __ocml_fma_rtz_f64(x, y, z); +} +#endif +// END INTRINSICS +// END DOUBLE + +// BEGIN INTEGER +__DEVICE__ +inline +int abs(int x) +{ + int sgn = x >> (sizeof(int) * CHAR_BIT - 1); + return (x ^ sgn) - sgn; +} +__DEVICE__ +inline +long labs(long x) +{ + long sgn = x >> (sizeof(long) * CHAR_BIT - 1); + return (x ^ sgn) - sgn; +} +__DEVICE__ +inline +long long llabs(long long x) +{ + long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1); + return (x ^ sgn) - sgn; +} + +#if defined(__cplusplus) + __DEVICE__ + inline + long abs(long x) { return labs(x); } + __DEVICE__ + inline + long long abs(long long x) { return llabs(x); } +#endif +// END INTEGER + +__DEVICE__ +inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) { + return __ocml_fma_f16(x, y, z); +} + +__DEVICE__ +inline float fma(float x, float y, float z) { + return fmaf(x, y, z); +} + +#pragma push_macro("__DEF_FLOAT_FUN") +#pragma push_macro("__DEF_FLOAT_FUN2") +#pragma push_macro("__DEF_FLOAT_FUN2I") +#pragma push_macro("__HIP_OVERLOAD") +#pragma push_macro("__HIP_OVERLOAD2") + +// __hip_enable_if::type is a type function which returns __T if __B is true. +template +struct __hip_enable_if {}; + +template struct __hip_enable_if { + typedef __T type; +}; + +// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to +// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with +// floor(double). +#define __HIP_OVERLOAD1(__retty, __fn) \ + template \ + __DEVICE__ \ + typename __hip_enable_if::is_integer, \ + __retty>::type \ + __fn(__T __x) { \ + return ::__fn((double)__x); \ + } + +// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double +// or integer argument to avoid compilation error due to ambibuity. e.g. +// max(5.0f, 6.0) is resolved with max(double, double). +#define __HIP_OVERLOAD2(__retty, __fn) \ + template \ + __DEVICE__ typename __hip_enable_if< \ + std::numeric_limits<__T1>::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ + return __fn((double)__x, (double)__y); \ + } + +// Define cmath functions with float argument and returns float. +#define __DEF_FUN1(retty, func) \ +__DEVICE__ \ +inline \ +float func(float x) \ +{ \ + return func##f(x); \ +} \ +__HIP_OVERLOAD1(retty, func) + +// Define cmath functions with float argument and returns retty. +#define __DEF_FUNI(retty, func) \ +__DEVICE__ \ +inline \ +retty func(float x) \ +{ \ + return func##f(x); \ +} \ +__HIP_OVERLOAD1(retty, func) + +// define cmath functions with two float arguments. +#define __DEF_FUN2(retty, func) \ +__DEVICE__ \ +inline \ +float func(float x, float y) \ +{ \ + return func##f(x, y); \ +} \ +__HIP_OVERLOAD2(retty, func) + +__DEF_FUN1(double, acos) +__DEF_FUN1(double, acosh) +__DEF_FUN1(double, asin) +__DEF_FUN1(double, asinh) +__DEF_FUN1(double, atan) +__DEF_FUN2(double, atan2); +__DEF_FUN1(double, atanh) +__DEF_FUN1(double, cbrt) +__DEF_FUN1(double, ceil) +__DEF_FUN2(double, copysign); +__DEF_FUN1(double, cos) +__DEF_FUN1(double, cosh) +__DEF_FUN1(double, erf) +__DEF_FUN1(double, erfc) +__DEF_FUN1(double, exp) +__DEF_FUN1(double, exp2) +__DEF_FUN1(double, expm1) +__DEF_FUN1(double, fabs) +__DEF_FUN2(double, fdim); +__DEF_FUN1(double, floor) +__DEF_FUN2(double, fmax); +__DEF_FUN2(double, fmin); +__DEF_FUN2(double, fmod); +//__HIP_OVERLOAD1(int, fpclassify) +__DEF_FUN2(double, hypot); +__DEF_FUNI(int, ilogb) +__HIP_OVERLOAD1(bool, isfinite) +__HIP_OVERLOAD2(bool, isgreater); +__HIP_OVERLOAD2(bool, isgreaterequal); +__HIP_OVERLOAD1(bool, isinf); +__HIP_OVERLOAD2(bool, isless); +__HIP_OVERLOAD2(bool, islessequal); +__HIP_OVERLOAD2(bool, islessgreater); +__HIP_OVERLOAD1(bool, isnan); +//__HIP_OVERLOAD1(bool, isnormal) +__HIP_OVERLOAD2(bool, isunordered); +__DEF_FUN1(double, lgamma) +__DEF_FUN1(double, log) +__DEF_FUN1(double, log10) +__DEF_FUN1(double, log1p) +__DEF_FUN1(double, log2) +__DEF_FUN1(double, logb) +__DEF_FUNI(long long, llrint) +__DEF_FUNI(long long, llround) +__DEF_FUNI(long, lrint) +__DEF_FUNI(long, lround) +__DEF_FUN1(double, nearbyint); +__DEF_FUN2(double, nextafter); +__DEF_FUN2(double, pow); +__DEF_FUN2(double, remainder); +__DEF_FUN1(double, rint); +__DEF_FUN1(double, round); +__HIP_OVERLOAD1(bool, signbit) +__DEF_FUN1(double, sin) +__DEF_FUN1(double, sinh) +__DEF_FUN1(double, sqrt) +__DEF_FUN1(double, tan) +__DEF_FUN1(double, tanh) +__DEF_FUN1(double, tgamma) +__DEF_FUN1(double, trunc); + +// define cmath functions with a float and an integer argument. +#define __DEF_FLOAT_FUN2I(func) \ +__DEVICE__ \ +inline \ +float func(float x, int y) \ +{ \ + return func##f(x, y); \ +} +__DEF_FLOAT_FUN2I(scalbn) +__DEF_FLOAT_FUN2I(ldexp) + +template +__DEVICE__ inline T min(T arg1, T arg2) { + return (arg1 < arg2) ? arg1 : arg2; +} + +template +__DEVICE__ inline T max(T arg1, T arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +__DEVICE__ inline int min(int arg1, int arg2) { + return (arg1 < arg2) ? arg1 : arg2; +} +__DEVICE__ inline int max(int arg1, int arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +__DEVICE__ inline int min(uint32_t arg1, int arg2) { + return (arg1 < arg2) ? arg1 : arg2; +} +__DEVICE__ inline int max(uint32_t arg1, int arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +__DEVICE__ +inline +float max(float x, float y) { + return fmaxf(x, y); +} + +__DEVICE__ +inline +double max(double x, double y) { + return fmax(x, y); +} + +__DEVICE__ +inline +float min(float x, float y) { + return fminf(x, y); +} + +__DEVICE__ +inline +double min(double x, double y) { + return fmin(x, y); +} + +__HIP_OVERLOAD2(double, max) +__HIP_OVERLOAD2(double, min) + +#if !defined(__HIPCC_RTC__) +__host__ inline static int min(int arg1, int arg2) { + return std::min(arg1, arg2); +} + +__host__ inline static int max(int arg1, int arg2) { + return std::max(arg1, arg2); +} +#endif // !defined(__HIPCC_RTC__) + +__DEVICE__ +inline float pow(float base, int iexp) { + return powif(base, iexp); +} + +__DEVICE__ +inline double pow(double base, int iexp) { + return powi(base, iexp); +} + +__DEVICE__ +inline _Float16 pow(_Float16 base, int iexp) { + return __ocml_pown_f16(base, iexp); +} + +#pragma pop_macro("__DEF_FLOAT_FUN") +#pragma pop_macro("__DEF_FLOAT_FUN2") +#pragma pop_macro("__DEF_FLOAT_FUN2I") +#pragma pop_macro("__HIP_OVERLOAD") +#pragma pop_macro("__HIP_OVERLOAD2") + +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ + +#pragma pop_macro("__DEVICE__") +#pragma pop_macro("__RETURN_TYPE") + +// For backward compatibility. +// There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros +// defined after including math_functions.h. +#include diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h new file mode 100644 index 0000000000..bae218ee14 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h @@ -0,0 +1,362 @@ +/* +Copyright (c) 2018 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H + +#if defined(__cplusplus) + +#include +#include +#include + +#define __HIP_SURFACE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)surfObj; + +template +struct __hip_is_isurf_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_isurf_channel_type> +{ + static constexpr bool value = + __hip_is_isurf_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 3) || + (rank == 4)); +}; + +// CUDA is using byte address, need map to pixel address for HIP +static __HOST_DEVICE__ __forceinline__ int __hipGetPixelAddr(int x, int format, int order) { + /* + * use below format index to generate format LUT + typedef enum { + HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0, + HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9, + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12, + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13, + HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14, + HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15 + } hsa_ext_image_channel_type_t; + */ + static const int FormatLUT[] = { 0, 1, 0, 1, 3, 1, 1, 1, 0, 1, 2, 0, 1, 2, 1, 2 }; + x = FormatLUT[format] == 3 ? x / FormatLUT[format] : x >> FormatLUT[format]; + + /* + * use below order index to generate order LUT + typedef enum { + HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0, + HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1, + HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2, + HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4, + HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7, + HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8, + HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9, + HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10, + HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13, + HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14, + HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15, + HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16, + HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17, + HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18, + HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19 + } hsa_ext_image_channel_order_t; + */ + static const int OrderLUT[] = { 0, 0, 1, 1, 3, 1, 3, 2, 2, 2, 2, 2, 3, 2, 2, 2, 0, 0, 0, 0 }; + return x = OrderLUT[order] == 3 ? x / OrderLUT[order] : x >> OrderLUT[order]; +} + +template < + typename T, + typename std::enable_if::value>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 1>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 2>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + tmp.y = static_cast(t.y); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 3>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + tmp.y = static_cast(t.y); + tmp.z = static_cast(t.z); + return tmp; +} + +template < + typename T, + typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 4>::type* = nullptr> +static __HOST_DEVICE__ __forceinline__ float4::Native_vec_ __hipMapToNativeFloat4(const T& t) { + float4::Native_vec_ tmp; + tmp.x = static_cast(t.x); + tmp.y = static_cast(t.y); + tmp.z = static_cast(t.z); + tmp.w = static_cast(t.w); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp = static_cast(u.x); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 1, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 2, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + tmp.y = static_cast(u.y); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 3, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + tmp.y = static_cast(u.y); + tmp.z = static_cast(u.z); + return tmp; +} + +template +static __HOST_DEVICE__ __forceinline__ +typename std::enable_if::value && sizeof(T) / sizeof(typename T::value_type) == 4, const T>::type +__hipMapFromNativeFloat4(const float4::Native_vec_& u) { + T tmp; + tmp.x = static_cast(u.x); + tmp.y = static_cast(u.y); + tmp.z = static_cast(u.z); + tmp.w = static_cast(u.w); + return tmp; +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1Dread(T* data, hipSurfaceObject_t surfObj, int x, + int boundaryMode = hipBoundaryModeZero) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __ockl_image_load_1D(i, x); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1Dwrite(T data, hipSurfaceObject_t surfObj, int x) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_1D(i, x, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_2D(i, int2(x, y).data); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_2D(i, int2(x, y).data, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, int z) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); + auto tmp = __ockl_image_load_3D(i, int4(x, y, z, 0).data); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int z) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_3D(i, int4(x, y, z, 0).data, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __ockl_image_load_lod_1D(i, x, layer); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf1DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_lod_1D(i, x, layer, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_lod_2D(i, int2(x, y).data, layer); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surf2DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_lod_2D(i, int2(x, y).data, layer, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_CM(i, int2(x, y).data, face); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int face) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_CM(i, int2(x, y).data, face, tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face, + int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __ockl_image_load_lod_CM(i, int2(x, y).data, face, layer); + *data = __hipMapFromNativeFloat4(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_isurf_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfaceObject_t surfObj, int x, int y, int face, + int layer) { + __HIP_SURFACE_OBJECT_PARAMETERS_INIT + x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); + auto tmp = __hipMapToNativeFloat4(data); + __ockl_image_store_lod_CM(i, int2(x, y).data, face, layer, tmp); +} + +#endif +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h new file mode 100644 index 0000000000..fb6065b187 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h @@ -0,0 +1,503 @@ +/* +Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" + +__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { + union { int i; unsigned u; float f; } tmp; tmp.u = src; + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_bpermutef(int index, float src) { + union { int i; unsigned u; float f; } tmp; tmp.f = src; + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); + return tmp.f; +} + +__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { + union { int i; unsigned u; float f; } tmp; tmp.u = src; + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_permutef(int index, float src) { + union { int i; unsigned u; float f; } tmp; tmp.f = src; + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); + return tmp.f; +} + +#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src)) +#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src)) + +template +__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) { + union { int i; unsigned u; float f; } tmp; tmp.u = src; + tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.u; +} + +template +__device__ static inline float __hip_ds_swizzlef_N(float src) { + union { int i; unsigned u; float f; } tmp; tmp.f = src; + tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.f; +} + +#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \ + __hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src)) + +template +__device__ static inline int __hip_move_dpp_N(int src) { + return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask, + bound_ctrl); +} + +static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE; + +__device__ +inline +int __shfl(int var, int src_lane, int width = warpSize) { + int self = __lane_id(); + int index = src_lane + (self & ~(width-1)); + return __builtin_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.u = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.u; +} +__device__ +inline +float __shfl(float var, int src_lane, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.f = var; + tmp.i = __shfl(tmp.i, src_lane, width); + return tmp.f; +} +__device__ +inline +double __shfl(double var, int src_lane, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ +inline +long __shfl(long var, int src_lane, int width = warpSize) +{ + #ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl(static_cast(var), src_lane, width)); + #endif +} +__device__ +inline +unsigned long __shfl(unsigned long var, int src_lane, int width = warpSize) { + #ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(uint64_t), ""); + + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl(static_cast(var), src_lane, width)); + #endif +} +__device__ +inline +long long __shfl(long long var, int src_lane, int width = warpSize) +{ + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ +inline +unsigned long long __shfl(unsigned long long var, int src_lane, int width = warpSize) { + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); + + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl(tmp[0], src_lane, width); + tmp[1] = __shfl(tmp[1], src_lane, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ +inline +int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) { + int self = __lane_id(); + int index = self - lane_delta; + index = (index < (self & ~(width-1)))?self:index; + return __builtin_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.u = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ +inline +float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.f = var; + tmp.i = __shfl_up(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ +inline +double __shfl_up(double var, unsigned int lane_delta, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ +inline +long __shfl_up(long var, unsigned int lane_delta, int width = warpSize) +{ + #ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl_up(static_cast(var), lane_delta, width)); + #endif +} + +__device__ +inline +unsigned long __shfl_up(unsigned long var, unsigned int lane_delta, int width = warpSize) +{ + #ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(uint64_t), ""); + + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl_up(static_cast(var), lane_delta, width)); + #endif +} + +__device__ +inline +long long __shfl_up(long long var, unsigned int lane_delta, int width = warpSize) +{ + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(uint64_t), ""); + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ +inline +unsigned long long __shfl_up(unsigned long long var, unsigned int lane_delta, int width = warpSize) +{ + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_up(tmp[0], lane_delta, width); + tmp[1] = __shfl_up(tmp[1], lane_delta, width); + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ +inline +int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) { + int self = __lane_id(); + int index = self + lane_delta; + index = (int)((self&(width-1))+lane_delta) >= width?self:index; + return __builtin_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.u = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.u; +} +__device__ +inline +float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.f = var; + tmp.i = __shfl_down(tmp.i, lane_delta, width); + return tmp.f; +} +__device__ +inline +double __shfl_down(double var, unsigned int lane_delta, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ +inline +long __shfl_down(long var, unsigned int lane_delta, int width = warpSize) +{ + #ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl_down(static_cast(var), lane_delta, width)); + #endif +} +__device__ +inline +unsigned long __shfl_down(unsigned long var, unsigned int lane_delta, int width = warpSize) +{ + #ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(uint64_t), ""); + + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl_down(static_cast(var), lane_delta, width)); + #endif +} +__device__ +inline +long long __shfl_down(long long var, unsigned int lane_delta, int width = warpSize) +{ + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(uint64_t), ""); + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ +inline +unsigned long long __shfl_down(unsigned long long var, unsigned int lane_delta, int width = warpSize) +{ + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_down(tmp[0], lane_delta, width); + tmp[1] = __shfl_down(tmp[1], lane_delta, width); + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +__device__ +inline +int __shfl_xor(int var, int lane_mask, int width = warpSize) { + int self = __lane_id(); + int index = self^lane_mask; + index = index >= ((self+width)&~(width-1))?self:index; + return __builtin_amdgcn_ds_bpermute(index<<2, var); +} +__device__ +inline +unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.u = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.u; +} +__device__ +inline +float __shfl_xor(float var, int lane_mask, int width = warpSize) { + union { int i; unsigned u; float f; } tmp; tmp.f = var; + tmp.i = __shfl_xor(tmp.i, lane_mask, width); + return tmp.f; +} +__device__ +inline +double __shfl_xor(double var, int lane_mask, int width = warpSize) { + static_assert(sizeof(double) == 2 * sizeof(int), ""); + static_assert(sizeof(double) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ +inline +long __shfl_xor(long var, int lane_mask, int width = warpSize) +{ + #ifndef _MSC_VER + static_assert(sizeof(long) == 2 * sizeof(int), ""); + static_assert(sizeof(long) == sizeof(uint64_t), ""); + + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(long) == sizeof(int), ""); + return static_cast(__shfl_xor(static_cast(var), lane_mask, width)); + #endif +} +__device__ +inline +unsigned long __shfl_xor(unsigned long var, int lane_mask, int width = warpSize) +{ + #ifndef _MSC_VER + static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long) == sizeof(uint64_t), ""); + + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; + #else + static_assert(sizeof(unsigned long) == sizeof(unsigned int), ""); + return static_cast(__shfl_xor(static_cast(var), lane_mask, width)); + #endif +} +__device__ +inline +long long __shfl_xor(long long var, int lane_mask, int width = warpSize) +{ + static_assert(sizeof(long long) == 2 * sizeof(int), ""); + static_assert(sizeof(long long) == sizeof(uint64_t), ""); + int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} +__device__ +inline +unsigned long long __shfl_xor(unsigned long long var, int lane_mask, int width = warpSize) +{ + static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), ""); + static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); + unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp)); + tmp[0] = __shfl_xor(tmp[0], lane_mask, width); + tmp[1] = __shfl_xor(tmp[1], lane_mask, width); + uint64_t tmp0 = (static_cast(tmp[1]) << 32ull) | static_cast(tmp[0]); + unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0)); + return tmp1; +} + +#pragma clang diagnostic pop +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/concepts.hpp b/projects/clr/hipamd/include/hip/amd_detail/concepts.hpp new file mode 100644 index 0000000000..6aa8d56c2b --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/concepts.hpp @@ -0,0 +1,30 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +namespace hip_impl // Documentation only. +{ +#define requires(...) + +#define FunctionalProcedure typename +} // namespace hip_impl diff --git a/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h new file mode 100644 index 0000000000..0222870590 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h @@ -0,0 +1,131 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/device_library_decls.h + * @brief Contains declarations for types and functions in device library. + * Uses int64_t and uint64_t instead of long, long long, unsigned + * long and unsigned long long types for device library API + * declarations. + */ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H + +#include "hip/amd_detail/host_defines.h" + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long ullong; + +extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int); +extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int); +extern "C" __device__ uint __ockl_activelane_u32(void); + +extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); +extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); +extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint); + +extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar); +extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort); +extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint); +extern "C" __device__ __attribute__((const)) uint64_t __ockl_clz_u64(uint64_t); + +extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float); +extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float); + +extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); +extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); + +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_f64(double); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_f64(double); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_f64(double); + +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); + +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u32(uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u32(uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u32(uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s64(int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s64(int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s64(int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u64(uint64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u64(uint64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u64(uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_s64(int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_s64(int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_s64(int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_u64(uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_u64(uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_u64(uint64_t); + +extern "C" __device__ __attribute__((convergent)) void __ockl_gws_init(uint nwm1, uint rid); +extern "C" __device__ __attribute__((convergent)) void __ockl_gws_barrier(uint nwm1, uint rid); + +extern "C" __device__ __attribute__((const)) uint32_t __ockl_lane_u32(); +extern "C" __device__ __attribute__((const)) int __ockl_grid_is_valid(void); +extern "C" __device__ __attribute__((convergent)) void __ockl_grid_sync(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_num_grids(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_grid_rank(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_size(void); +extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank(void); +extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void); +extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void); + +extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float); + +extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_add_i32(int a); +extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_and_i32(int a); +extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_or_i32(int a); + +extern "C" __device__ uint64_t __ockl_fprintf_stderr_begin(); +extern "C" __device__ uint64_t __ockl_fprintf_append_args(uint64_t msg_desc, uint32_t num_args, + uint64_t value0, uint64_t value1, + uint64_t value2, uint64_t value3, + uint64_t value4, uint64_t value5, + uint64_t value6, uint32_t is_last); +extern "C" __device__ uint64_t __ockl_fprintf_append_string_n(uint64_t msg_desc, const char* data, + uint64_t length, uint32_t is_last); + +// Introduce local address space +#define __local __attribute__((address_space(3))) + +#ifdef __HIP_DEVICE_COMPILE__ +__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } +#endif //__HIP_DEVICE_COMPILE__ + +// Using hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +typedef unsigned __cl_mem_fence_flags; + +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/functional_grid_launch.hpp b/projects/clr/hipamd/include/hip/amd_detail/functional_grid_launch.hpp new file mode 100644 index 0000000000..6f2857de46 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/functional_grid_launch.hpp @@ -0,0 +1,218 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "concepts.hpp" +#include "helpers.hpp" +#include "program_state.hpp" +#include "hip_runtime_api.h" + +#include +#include +#include +#include +#include +#include + +hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags, hip_impl::program_state& ps); + +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, + dim3 blockDim, void** args, + size_t sharedMem, hipStream_t stream, + hip_impl::program_state& ps); + +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags, + hip_impl::program_state& ps); + +#pragma GCC visibility push(hidden) + +namespace hip_impl { +template {}>::type* = nullptr> +inline T round_up_to_next_multiple_nonnegative(T x, T y) { + T tmp = x + y - 1; + return tmp - tmp % y; +} + +template < + std::size_t n, + typename... Ts, + typename std::enable_if::type* = nullptr> +inline hip_impl::kernarg make_kernarg( + const std::tuple&, + const kernargs_size_align&, + hip_impl::kernarg kernarg) { + return kernarg; +} + +template < + std::size_t n, + typename... Ts, + typename std::enable_if::type* = nullptr> +inline hip_impl::kernarg make_kernarg( + const std::tuple& formals, + const kernargs_size_align& size_align, + hip_impl::kernarg kernarg) { + using T = typename std::tuple_element>::type; + + static_assert( + !std::is_reference{}, + "A __global__ function cannot have a reference as one of its " + "arguments."); + #if defined(HIP_STRICT) + static_assert( + std::is_trivially_copyable{}, + "Only TriviallyCopyable types can be arguments to a __global__ " + "function"); + #endif + + kernarg.resize(round_up_to_next_multiple_nonnegative( + kernarg.size(), size_align.alignment(n)) + size_align.size(n)); + + std::memcpy( + kernarg.data() + kernarg.size() - size_align.size(n), + &std::get(formals), + size_align.size(n)); + return make_kernarg(formals, size_align, std::move(kernarg)); +} + +template +inline hip_impl::kernarg make_kernarg( + void (*kernel)(Formals...), std::tuple actuals) { + static_assert(sizeof...(Formals) == sizeof...(Actuals), + "The count of formal arguments must match the count of actuals."); + + if (sizeof...(Formals) == 0) return {}; + + std::tuple to_formals{std::move(actuals)}; + hip_impl::kernarg kernarg; + kernarg.reserve(sizeof(to_formals)); + + auto& ps = hip_impl::get_program_state(); + return make_kernarg<0>(to_formals, + ps.get_kernargs_size_align( + reinterpret_cast(kernel)), + std::move(kernarg)); +} + + +HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(hipStream_t stream); + +inline +__attribute__((visibility("hidden"))) +void hipLaunchKernelGGLImpl( + std::uintptr_t function_address, + const dim3& numBlocks, + const dim3& dimBlocks, + std::uint32_t sharedMemBytes, + hipStream_t stream, + void** kernarg) { + + const auto& kd = hip_impl::get_program_state().kernel_descriptor(function_address, + target_agent(stream)); + + hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z, + dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, + stream, nullptr, kernarg); +} +} // Namespace hip_impl. + + +template +inline +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + + using namespace hip_impl; + + hip_impl::hip_init(); + auto f = get_program_state().kernel_descriptor(reinterpret_cast(kernel), + target_agent(0)); + + return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, + dynSharedMemPerBlk, blockSizeLimit); +} + +template +inline +hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) { + + using namespace hip_impl; + + hip_impl::hip_init(); + if(flags != hipOccupancyDefault) return hipErrorNotSupported; + auto f = get_program_state().kernel_descriptor(reinterpret_cast(kernel), + target_agent(0)); + + return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, + dynSharedMemPerBlk, blockSizeLimit); +} + +template +inline +void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, + Args... args) { + hip_impl::hip_init(); + auto kernarg = hip_impl::make_kernarg(kernel, std::tuple{std::move(args)...}); + std::size_t kernarg_size = kernarg.size(); + + void* config[]{ + HIP_LAUNCH_PARAM_BUFFER_POINTER, + kernarg.data(), + HIP_LAUNCH_PARAM_BUFFER_SIZE, + &kernarg_size, + HIP_LAUNCH_PARAM_END}; + + hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast(kernel), + numBlocks, dimBlocks, sharedMemBytes, + stream, &config[0]); +} + +template +inline +__attribute__((visibility("hidden"))) +hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMem, + hipStream_t stream) { + hip_impl::hip_init(); + auto& ps = hip_impl::get_program_state(); + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, + blockDim, args, sharedMem, stream, ps); +} + +inline +__attribute__((visibility("hidden"))) +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags) { + + hip_impl::hip_init(); + auto& ps = hip_impl::get_program_state(); + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps); +} + +#pragma GCC visibility pop diff --git a/projects/clr/hipamd/include/hip/amd_detail/grid_launch.h b/projects/clr/hipamd/include/hip/amd_detail/grid_launch.h new file mode 100644 index 0000000000..22841a5657 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/grid_launch.h @@ -0,0 +1,67 @@ +#pragma once + +#include + +#include + +#define GRID_LAUNCH_VERSION 20 + +// Extern definitions +namespace hc{ +class completion_future; +class accelerator_view; +} + + +// 3 dim structure for groups and grids. +typedef struct gl_dim3 +{ + int x,y,z; + gl_dim3(uint32_t _x=1, uint32_t _y=1, uint32_t _z=1) : x(_x), y(_y), z(_z) {}; +} gl_dim3; + +typedef enum gl_barrier_bit { + barrier_bit_queue_default, + barrier_bit_none, + barrier_bit_wait, +} gl_barrier_bit; + + +// grid_launch_parm contains information used to launch the kernel. +typedef struct grid_launch_parm +{ + //! Grid dimensions + gl_dim3 grid_dim; + + //! Group dimensions + gl_dim3 group_dim; + + //! Amount of dynamic group memory to use with the kernel launch. + //! This memory is in addition to the amount used statically in the kernel. + unsigned int dynamic_group_mem_bytes; + + //! Control setting of barrier bit on per-packet basis: + //! See gl_barrier_bit description. + //! Placeholder, is not used to control packet dispatch yet + enum gl_barrier_bit barrier_bit; + + //! Value of packet fences to apply to launch. + //! The correspond to the value of bits 9:14 in the AQL packet, + //! see HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE and hsa_fence_scope_t. + unsigned int launch_fence; + + //! Pointer to the accelerator_view where the kernel should execute. + //! If NULL, the default view on the default accelerator is used. + hc::accelerator_view *av; + + //! Pointer to the completion_future used to track the status of the command. + //! If NULL, the command does not write status. In this case, + //! synchronization can be enforced with queue-level waits or + //! waiting on younger commands. + hc::completion_future *cf; + + grid_launch_parm() = default; +} grid_launch_parm; + + +extern void init_grid_launch(grid_launch_parm *gl); diff --git a/projects/clr/hipamd/include/hip/amd_detail/grid_launch.hpp b/projects/clr/hipamd/include/hip/amd_detail/grid_launch.hpp new file mode 100644 index 0000000000..04ce7e0366 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/grid_launch.hpp @@ -0,0 +1,50 @@ +#pragma once + +#include "grid_launch.h" +#include "hc.hpp" + +class grid_launch_parm_cxx : public grid_launch_parm +{ +public: + grid_launch_parm_cxx() = default; + + // customized serialization: don't need av and cf in kernel + __attribute__((annotate("serialize"))) + void __cxxamp_serialize(Kalmar::Serialize& s) const { + s.Append(sizeof(int), &grid_dim.x); + s.Append(sizeof(int), &grid_dim.y); + s.Append(sizeof(int), &grid_dim.z); + s.Append(sizeof(int), &group_dim.x); + s.Append(sizeof(int), &group_dim.y); + s.Append(sizeof(int), &group_dim.z); + } + + __attribute__((annotate("user_deserialize"))) + grid_launch_parm_cxx(int grid_dim_x, int grid_dim_y, int grid_dim_z, + int group_dim_x, int group_dim_y, int group_dim_z) { + grid_dim.x = grid_dim_x; + grid_dim.y = grid_dim_y; + grid_dim.z = grid_dim_z; + group_dim.x = group_dim_x; + group_dim.y = group_dim_y; + group_dim.z = group_dim_z; + } +}; + + +extern inline void grid_launch_init(grid_launch_parm *lp) { + lp->grid_dim.x = lp->grid_dim.y = lp->grid_dim.z = 1; + + lp->group_dim.x = lp->group_dim.y = lp->group_dim.z = 1; + + lp->dynamic_group_mem_bytes = 0; + + lp->barrier_bit = barrier_bit_queue_default; + lp->launch_fence = -1; + + // TODO - set to NULL? + static hc::accelerator_view av = hc::accelerator().get_default_view(); + lp->av = &av; + lp->cf = NULL; +} + diff --git a/projects/clr/hipamd/include/hip/amd_detail/grid_launch_GGL.hpp b/projects/clr/hipamd/include/hip/amd_detail/grid_launch_GGL.hpp new file mode 100644 index 0000000000..df5949d7d0 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/grid_launch_GGL.hpp @@ -0,0 +1,26 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#if GENERIC_GRID_LAUNCH == 1 +#include "macro_based_grid_launch.hpp" +#endif // GENERIC_GRID_LAUNCH \ No newline at end of file diff --git a/projects/clr/hipamd/include/hip/amd_detail/helpers.hpp b/projects/clr/hipamd/include/hip/amd_detail/helpers.hpp new file mode 100644 index 0000000000..5f7935c1ec --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/helpers.hpp @@ -0,0 +1,137 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "concepts.hpp" + +#include // For std::conditional, std::decay, std::enable_if, + // std::false_type, std result_of and std::true_type. +#include // For std::declval. + +#ifdef __has_include // Check if __has_include is present +# if __has_include() // Check for version header +# include +# if defined(__cpp_lib_is_invocable) && !defined(HIP_HAS_INVOCABLE) +# define HIP_HAS_INVOCABLE __cpp_lib_is_invocable +# endif +# if defined(__cpp_lib_result_of_sfinae) && !defined(HIP_HAS_RESULT_OF_SFINAE) +# define HIP_HAS_RESULT_OF_SFINAE __cpp_lib_result_of_sfinae +# endif +# endif +#endif + +#ifndef HIP_HAS_INVOCABLE +#define HIP_HAS_INVOCABLE 0 +#endif + +#ifndef HIP_HAS_RESULT_OF_SFINAE +#define HIP_HAS_RESULT_OF_SFINAE 0 +#endif + +namespace std { // TODO: these should be removed as soon as possible. +#if (__cplusplus < 201406L) +#if (__cplusplus < 201402L) +template +using enable_if_t = typename enable_if::type; +template +using conditional_t = typename conditional::type; +template +using decay_t = typename decay::type; +template +using result_of_t = typename result_of::type; +template +using remove_reference_t = typename remove_reference::type; +#endif +#endif +} // namespace std + +namespace hip_impl { +template +using void_t_ = void; + +#if HIP_HAS_INVOCABLE +template +struct is_callable_impl; + +template +struct is_callable_impl : std::is_invocable {}; +#elif HIP_HAS_RESULT_OF_SFINAE +template +struct is_callable_impl : std::false_type {}; + +template +struct is_callable_impl::type > > : std::true_type {}; +#else +template +auto simple_invoke(T Base::*pmd, Derived&& ref) +-> decltype(static_cast(ref).*pmd); + +template +auto simple_invoke(PMD&& pmd, Pointer&& ptr) +-> decltype((*static_cast(ptr)).*static_cast(pmd)); + +template +auto simple_invoke(T Base::*pmd, const std::reference_wrapper& ref) +-> decltype(ref.get().*pmd); + +template +auto simple_invoke(T Base::*pmf, Derived&& ref, Args&&... args) +-> decltype((static_cast(ref).*pmf)(static_cast(args)...)); + +template +auto simple_invoke(PMF&& pmf, Pointer&& ptr, Args&&... args) +-> decltype(((*static_cast(ptr)).*static_cast(pmf))(static_cast(args)...)); + +template +auto simple_invoke(T Base::*pmf, const std::reference_wrapper& ref, Args&&... args) +-> decltype((ref.get().*pmf)(static_cast(args)...)); + +template +auto simple_invoke(F&& f, Ts&&... xs) +-> decltype(f(static_cast(xs)...)); + +template +struct is_callable_impl : std::false_type {}; + +template +struct is_callable_impl(), std::declval()...))> > + : std::true_type {}; + +#endif + +template +struct is_callable : is_callable_impl {}; + +#define count_macro_args_impl_hip_(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ + _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, \ + _26, _27, _28, _29, _30, _31, _n, ...) \ + _n +#define count_macro_args_hip_(...) \ + count_macro_args_impl_hip_(, ##__VA_ARGS__, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \ + 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, \ + 0) + +#define overloaded_macro_expand_hip_(macro, arg_cnt) macro##arg_cnt +#define overload_macro_impl_hip_(macro, arg_cnt) overloaded_macro_expand_hip_(macro, arg_cnt) +#define overload_macro_hip_(macro, ...) \ + overload_macro_impl_hip_(macro, count_macro_args_hip_(__VA_ARGS__))(__VA_ARGS__) +} // namespace hip_impl diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h b/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h new file mode 100644 index 0000000000..877c6a43b5 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h @@ -0,0 +1,222 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/hip_cooperative_groups_helper.h + * + * @brief Device side implementation of cooperative group feature. + * + * Defines helper constructs and APIs which aid the types and device API + * wrappers defined within `amd_detail/hip_cooperative_groups.h`. + */ +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H + +#if __cplusplus +#if !defined(__HIPCC_RTC__) +#include +#endif +#if !defined(__align__) +#define __align__(x) __attribute__((aligned(x))) +#endif + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#pragma clang diagnostic ignored "-Wshorten-64-to-32" + +#if !defined(__CG_QUALIFIER__) +#define __CG_QUALIFIER__ __device__ __forceinline__ +#endif + +#if !defined(__CG_STATIC_QUALIFIER__) +#define __CG_STATIC_QUALIFIER__ __device__ static __forceinline__ +#endif + +#if !defined(_CG_STATIC_CONST_DECL_) +#define _CG_STATIC_CONST_DECL_ static constexpr +#endif + +#if __AMDGCN_WAVEFRONT_SIZE == 32 +using lane_mask = unsigned int; +#else +using lane_mask = unsigned long long int; +#endif + +namespace cooperative_groups { + +/* Global scope */ +template +using is_power_of_2 = std::integral_constant; + +template +using is_valid_wavefront = std::integral_constant; + +template +using is_valid_tile_size = + std::integral_constant::value && is_valid_wavefront::value>; + +template +using is_valid_type = + std::integral_constant::value || std::is_floating_point::value>; + +namespace internal { + +/** \brief Enums representing different cooperative group types + */ +typedef enum { + cg_invalid, + cg_multi_grid, + cg_grid, + cg_workgroup, + cg_tiled_group, + cg_coalesced_group +} group_type; + +/** + * Functionalities related to multi-grid cooperative group type + */ +namespace multi_grid { + +__CG_STATIC_QUALIFIER__ uint32_t num_grids() { + return static_cast(__ockl_multi_grid_num_grids()); } + +__CG_STATIC_QUALIFIER__ uint32_t grid_rank() { + return static_cast(__ockl_multi_grid_grid_rank()); } + +__CG_STATIC_QUALIFIER__ uint32_t size() { return static_cast(__ockl_multi_grid_size()); } + +__CG_STATIC_QUALIFIER__ uint32_t thread_rank() { + return static_cast(__ockl_multi_grid_thread_rank()); } + +__CG_STATIC_QUALIFIER__ bool is_valid() { return static_cast(__ockl_multi_grid_is_valid()); } + +__CG_STATIC_QUALIFIER__ void sync() { __ockl_multi_grid_sync(); } + +} // namespace multi_grid + +/** + * Functionalities related to grid cooperative group type + */ +namespace grid { + +__CG_STATIC_QUALIFIER__ uint32_t size() { + return static_cast((blockDim.z * gridDim.z) * (blockDim.y * gridDim.y) * + (blockDim.x * gridDim.x)); +} + +__CG_STATIC_QUALIFIER__ uint32_t thread_rank() { + // Compute global id of the workgroup to which the current thread belongs to + uint32_t blkIdx = static_cast((blockIdx.z * gridDim.y * gridDim.x) + + (blockIdx.y * gridDim.x) + (blockIdx.x)); + + // Compute total number of threads being passed to reach current workgroup + // within grid + uint32_t num_threads_till_current_workgroup = + static_cast(blkIdx * (blockDim.x * blockDim.y * blockDim.z)); + + // Compute thread local rank within current workgroup + uint32_t local_thread_rank = static_cast((threadIdx.z * blockDim.y * blockDim.x) + + (threadIdx.y * blockDim.x) + (threadIdx.x)); + + return (num_threads_till_current_workgroup + local_thread_rank); +} + +__CG_STATIC_QUALIFIER__ bool is_valid() { return static_cast(__ockl_grid_is_valid()); } + +__CG_STATIC_QUALIFIER__ void sync() { __ockl_grid_sync(); } + +} // namespace grid + +/** + * Functionalities related to `workgroup` (thread_block in CUDA terminology) + * cooperative group type + */ +namespace workgroup { + +__CG_STATIC_QUALIFIER__ dim3 group_index() { + return (dim3(static_cast(blockIdx.x), static_cast(blockIdx.y), + static_cast(blockIdx.z))); +} + +__CG_STATIC_QUALIFIER__ dim3 thread_index() { + return (dim3(static_cast(threadIdx.x), static_cast(threadIdx.y), + static_cast(threadIdx.z))); +} + +__CG_STATIC_QUALIFIER__ uint32_t size() { + return (static_cast(blockDim.x * blockDim.y * blockDim.z)); +} + +__CG_STATIC_QUALIFIER__ uint32_t thread_rank() { + return (static_cast((threadIdx.z * blockDim.y * blockDim.x) + + (threadIdx.y * blockDim.x) + (threadIdx.x))); +} + +__CG_STATIC_QUALIFIER__ bool is_valid() { + // TODO(mahesha) any functionality need to be added here? I believe not + return true; +} + +__CG_STATIC_QUALIFIER__ void sync() { __syncthreads(); } + +} // namespace workgroup + +namespace tiled_group { + +// enforce ordering for memory intructions +__CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent"); } + +} // namespace tiled_group + +namespace coalesced_group { + +// enforce ordering for memory intructions +__CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent"); } + +// Masked bit count +// +// For each thread, this function returns the number of active threads which +// have i-th bit of x set and come before the current thread. +__CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) { + unsigned int counter=0; + #if __AMDGCN_WAVEFRONT_SIZE == 32 + counter = __builtin_amdgcn_mbcnt_lo(x, add); + #else + counter = __builtin_amdgcn_mbcnt_lo(static_cast(x), add); + counter = __builtin_amdgcn_mbcnt_hi(static_cast(x >> 32), counter); + #endif + + return counter; +} + +} // namespace coalesced_group + + +} // namespace internal + +} // namespace cooperative_groups + +#pragma clang diagnostic pop +#endif // __cplusplus +#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_gcc.h b/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_gcc.h new file mode 100644 index 0000000000..e76a7fff3a --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_gcc.h @@ -0,0 +1,254 @@ +#pragma once + +#if defined(__cplusplus) + #include +#endif + +struct __half_raw { + unsigned short x; +}; + +struct __half2_raw { + unsigned short x; + unsigned short y; +}; + +#if defined(__cplusplus) + struct __half; + + __half __float2half(float); + float __half2float(__half); + + // BEGIN STRUCT __HALF + struct __half { + protected: + unsigned short __x; + public: + // CREATORS + __half() = default; + __half(const __half_raw& x) : __x{x.x} {} + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + __half(float x) : __x{__float2half(x).__x} {} + __half(double x) : __x{__float2half(x).__x} {} + #endif + __half(const __half&) = default; + __half(__half&&) = default; + ~__half() = default; + + // MANIPULATORS + __half& operator=(const __half&) = default; + __half& operator=(__half&&) = default; + __half& operator=(const __half_raw& x) { __x = x.x; return *this; } + #if !defined(__HIP_NO_HALF_CONVERSIONS__) + __half& operator=(float x) + { + __x = __float2half(x).__x; + return *this; + } + __half& operator=(double x) + { + return *this = static_cast(x); + } + #endif + + // ACCESSORS + operator float() const { return __half2float(*this); } + operator __half_raw() const { return __half_raw{__x}; } + }; + // END STRUCT __HALF + + // BEGIN STRUCT __HALF2 + struct __half2 { + public: + __half x; + __half y; + + // CREATORS + __half2() = default; + __half2(const __half2_raw& ix) + : + x{reinterpret_cast(ix.x)}, + y{reinterpret_cast(ix.y)} + {} + __half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {} + __half2(const __half2&) = default; + __half2(__half2&&) = default; + ~__half2() = default; + + // MANIPULATORS + __half2& operator=(const __half2&) = default; + __half2& operator=(__half2&&) = default; + __half2& operator=(const __half2_raw& ix) + { + x = reinterpret_cast(ix.x); + y = reinterpret_cast(ix.y); + return *this; + } + + // ACCESSORS + operator __half2_raw() const + { + return __half2_raw{ + reinterpret_cast(x), + reinterpret_cast(y)}; + } + }; + // END STRUCT __HALF2 + + inline + unsigned short __internal_float2half( + float flt, unsigned int& sgn, unsigned int& rem) + { + unsigned int x{}; + std::memcpy(&x, &flt, sizeof(flt)); + + unsigned int u = (x & 0x7fffffffU); + sgn = ((x >> 16) & 0x8000U); + + // NaN/+Inf/-Inf + if (u >= 0x7f800000U) { + rem = 0; + return static_cast( + (u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU); + } + // Overflows + if (u > 0x477fefffU) { + rem = 0x80000000U; + return static_cast(sgn | 0x7bffU); + } + // Normal numbers + if (u >= 0x38800000U) { + rem = u << 19; + u -= 0x38000000U; + return static_cast(sgn | (u >> 13)); + } + // +0/-0 + if (u < 0x33000001U) { + rem = u; + return static_cast(sgn); + } + // Denormal numbers + unsigned int exponent = u >> 23; + unsigned int mantissa = (u & 0x7fffffU); + unsigned int shift = 0x7eU - exponent; + mantissa |= 0x800000U; + rem = mantissa << (32 - shift); + return static_cast(sgn | (mantissa >> shift)); + } + + inline + __half __float2half(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x; + + return r; + } + + inline + __half __float2half_rn(float x) { return __float2half(x); } + + inline + __half __float2half_rz(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + + return r; + } + + inline + __half __float2half_rd(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + if (rem && sgn) ++r.x; + + return r; + } + + inline + __half __float2half_ru(float x) + { + __half_raw r; + unsigned int sgn{}; + unsigned int rem{}; + r.x = __internal_float2half(x, sgn, rem); + if (rem && !sgn) ++r.x; + + return r; + } + + inline + __half2 __float2half2_rn(float x) + { + return __half2{__float2half_rn(x), __float2half_rn(x)}; + } + + inline + __half2 __floats2half2_rn(float x, float y) + { + return __half2{__float2half_rn(x), __float2half_rn(y)}; + } + + inline + float __internal_half2float(unsigned short x) + { + unsigned int sign = ((x >> 15) & 1); + unsigned int exponent = ((x >> 10) & 0x1f); + unsigned int mantissa = ((x & 0x3ff) << 13); + + if (exponent == 0x1fU) { /* NaN or Inf */ + mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0); + exponent = 0xffU; + } else if (!exponent) { /* Denorm or Zero */ + if (mantissa) { + unsigned int msb; + exponent = 0x71U; + do { + msb = (mantissa & 0x400000U); + mantissa <<= 1; /* normalize */ + --exponent; + } while (!msb); + mantissa &= 0x7fffffU; /* 1.mantissa is implicit */ + } + } else { + exponent += 0x70U; + } + unsigned int u = ((sign << 31) | (exponent << 23) | mantissa); + float f; + memcpy(&f, &u, sizeof(u)); + + return f; + } + + inline + float __half2float(__half x) + { + return __internal_half2float(static_cast<__half_raw>(x).x); + } + + inline + float __low2float(__half2 x) + { + return __internal_half2float(static_cast<__half2_raw>(x).x); + } + + inline + float __high2float(__half2 x) + { + return __internal_half2float(static_cast<__half2_raw>(x).y); + } + + #if !defined(HIP_NO_HALF) + using half = __half; + using half2 = __half2; + #endif +#endif // defined(__cplusplus) diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h b/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h new file mode 100644 index 0000000000..a4ae27ca6f --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h @@ -0,0 +1,96 @@ +/* +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +// /* +// Half Math Functions +// */ +#if !defined(__HIPCC_RTC__) +#include "host_defines.h" +#endif +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +extern "C" +{ + __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); + __device__ _Float16 __ocml_cos_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); + __device__ __attribute__((const)) + _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); + __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16); + __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); + __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16); + __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int); + __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); + __device__ _Float16 __ocml_sin_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16); + __device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16); + __device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16); + + typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); + typedef short __2i16 __attribute__((ext_vector_type(2))); + + #if defined(__clang__) && defined(__HIP__) + __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s); + #endif + + __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); + __device__ __2f16 __ocml_cos_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16); + __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16); + __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); + __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); + __device__ __2f16 __ocml_sin_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); + + __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); + __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); + __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); + +} +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +//TODO: remove these after they get into clang header __clang_hip_libdevice_declares.h' +extern "C" { + __device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16); + __device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16); + __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); + __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); + __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); +} diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_ldg.h b/projects/clr/hipamd/include/hip/amd_detail/hip_ldg.h new file mode 100644 index 0000000000..ce1fb51f46 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_ldg.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_LDG_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_LDG_H + +#if __HIP_CLANG_ONLY__ +#include "amd_hip_vector_types.h" +#include "host_defines.h" + +__device__ inline static char __ldg(const char* ptr) { return *ptr; } + +__device__ inline static char2 __ldg(const char2* ptr) { return *ptr; } + +__device__ inline static char4 __ldg(const char4* ptr) { return *ptr; } + +__device__ inline static signed char __ldg(const signed char* ptr) { return ptr[0]; } + +__device__ inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; } + + +__device__ inline static short __ldg(const short* ptr) { return ptr[0]; } + +__device__ inline static short2 __ldg(const short2* ptr) { return ptr[0]; } + +__device__ inline static short4 __ldg(const short4* ptr) { return ptr[0]; } + +__device__ inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; } + + +__device__ inline static int __ldg(const int* ptr) { return ptr[0]; } + +__device__ inline static int2 __ldg(const int2* ptr) { return ptr[0]; } + +__device__ inline static int4 __ldg(const int4* ptr) { return ptr[0]; } + +__device__ inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; } + + +__device__ inline static long __ldg(const long* ptr) { return ptr[0]; } + +__device__ inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; } + + +__device__ inline static long long __ldg(const long long* ptr) { return ptr[0]; } + +__device__ inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; } + +__device__ inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; } + + +__device__ inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; } + +__device__ inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; } + + +__device__ inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; } + + +__device__ inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; } + +__device__ inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; } + + +__device__ inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; } + + +__device__ inline static float __ldg(const float* ptr) { return ptr[0]; } + +__device__ inline static float2 __ldg(const float2* ptr) { return ptr[0]; } + +__device__ inline static float4 __ldg(const float4* ptr) { return ptr[0]; } + + +__device__ inline static double __ldg(const double* ptr) { return ptr[0]; } + +__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; } + +#endif // __HIP_CLANG_ONLY__ + +#endif // HIP_LDG_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h new file mode 100644 index 0000000000..d0b24d01c0 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -0,0 +1,9784 @@ +// Generated file. DO NOT EDIT. +// +// This file is automatically generated by the hip_prof_gen.py script. +// If changes are required, run the script and commit the updated file. + +#ifndef _HIP_PROF_STR_H +#define _HIP_PROF_STR_H +#define HIP_PROF_VER 1 + +// HIP API callbacks ID enumeration +enum hip_api_id_t { + HIP_API_ID_NONE = 0, + HIP_API_ID_FIRST = 1, + HIP_API_ID___hipPopCallConfiguration = 1, + HIP_API_ID___hipPushCallConfiguration = 2, + HIP_API_ID_hipArray3DCreate = 3, + HIP_API_ID_hipArrayCreate = 4, + HIP_API_ID_hipArrayDestroy = 5, + HIP_API_ID_hipChooseDevice = 6, + HIP_API_ID_hipConfigureCall = 7, + HIP_API_ID_hipCtxCreate = 8, + HIP_API_ID_hipCtxDestroy = 9, + HIP_API_ID_hipCtxDisablePeerAccess = 10, + HIP_API_ID_hipCtxEnablePeerAccess = 11, + HIP_API_ID_hipCtxGetApiVersion = 12, + HIP_API_ID_hipCtxGetCacheConfig = 13, + HIP_API_ID_hipCtxGetCurrent = 14, + HIP_API_ID_hipCtxGetDevice = 15, + HIP_API_ID_hipCtxGetFlags = 16, + HIP_API_ID_hipCtxGetSharedMemConfig = 17, + HIP_API_ID_hipCtxPopCurrent = 18, + HIP_API_ID_hipCtxPushCurrent = 19, + HIP_API_ID_hipCtxSetCacheConfig = 20, + HIP_API_ID_hipCtxSetCurrent = 21, + HIP_API_ID_hipCtxSetSharedMemConfig = 22, + HIP_API_ID_hipCtxSynchronize = 23, + HIP_API_ID_hipDestroyExternalMemory = 24, + HIP_API_ID_hipDestroyExternalSemaphore = 25, + HIP_API_ID_hipDeviceCanAccessPeer = 26, + HIP_API_ID_hipDeviceComputeCapability = 27, + HIP_API_ID_hipDeviceDisablePeerAccess = 28, + HIP_API_ID_hipDeviceEnablePeerAccess = 29, + HIP_API_ID_hipDeviceGet = 30, + HIP_API_ID_hipDeviceGetAttribute = 31, + HIP_API_ID_hipDeviceGetByPCIBusId = 32, + HIP_API_ID_hipDeviceGetCacheConfig = 33, + HIP_API_ID_hipDeviceGetLimit = 34, + HIP_API_ID_hipDeviceGetName = 35, + HIP_API_ID_hipDeviceGetP2PAttribute = 36, + HIP_API_ID_hipDeviceGetPCIBusId = 37, + HIP_API_ID_hipDeviceGetSharedMemConfig = 38, + HIP_API_ID_hipDeviceGetStreamPriorityRange = 39, + HIP_API_ID_hipDevicePrimaryCtxGetState = 40, + HIP_API_ID_hipDevicePrimaryCtxRelease = 41, + HIP_API_ID_hipDevicePrimaryCtxReset = 42, + HIP_API_ID_hipDevicePrimaryCtxRetain = 43, + HIP_API_ID_hipDevicePrimaryCtxSetFlags = 44, + HIP_API_ID_hipDeviceReset = 45, + HIP_API_ID_hipDeviceSetCacheConfig = 46, + HIP_API_ID_hipDeviceSetSharedMemConfig = 47, + HIP_API_ID_hipDeviceSynchronize = 48, + HIP_API_ID_hipDeviceTotalMem = 49, + HIP_API_ID_RESERVED_50 = 50, + HIP_API_ID_hipDrvMemcpy2DUnaligned = 51, + HIP_API_ID_hipDrvMemcpy3D = 52, + HIP_API_ID_hipDrvMemcpy3DAsync = 53, + HIP_API_ID_hipEventCreate = 54, + HIP_API_ID_hipEventCreateWithFlags = 55, + HIP_API_ID_hipEventDestroy = 56, + HIP_API_ID_hipEventElapsedTime = 57, + HIP_API_ID_hipEventQuery = 58, + HIP_API_ID_hipEventRecord = 59, + HIP_API_ID_hipEventSynchronize = 60, + HIP_API_ID_hipExtGetLinkTypeAndHopCount = 61, + HIP_API_ID_hipExtLaunchKernel = 62, + HIP_API_ID_hipExtLaunchMultiKernelMultiDevice = 63, + HIP_API_ID_hipExtMallocWithFlags = 64, + HIP_API_ID_hipExtModuleLaunchKernel = 65, + HIP_API_ID_hipExtStreamCreateWithCUMask = 66, + HIP_API_ID_hipExtStreamGetCUMask = 67, + HIP_API_ID_hipExternalMemoryGetMappedBuffer = 68, + HIP_API_ID_hipFree = 69, + HIP_API_ID_hipFreeArray = 70, + HIP_API_ID_hipFreeHost = 71, + HIP_API_ID_hipFreeMipmappedArray = 72, + HIP_API_ID_hipFuncGetAttribute = 73, + HIP_API_ID_hipFuncGetAttributes = 74, + HIP_API_ID_hipFuncSetAttribute = 75, + HIP_API_ID_hipFuncSetCacheConfig = 76, + HIP_API_ID_hipFuncSetSharedMemConfig = 77, + HIP_API_ID_hipGetDevice = 78, + HIP_API_ID_hipGetDeviceCount = 79, + HIP_API_ID_hipGetDeviceFlags = 80, + HIP_API_ID_hipGetDeviceProperties = 81, + HIP_API_ID_RESERVED_82 = 82, + HIP_API_ID_hipGetErrorString = 83, + HIP_API_ID_hipGetLastError = 84, + HIP_API_ID_hipGetMipmappedArrayLevel = 85, + HIP_API_ID_hipGetSymbolAddress = 86, + HIP_API_ID_hipGetSymbolSize = 87, + HIP_API_ID_hipHccModuleLaunchKernel = 88, + HIP_API_ID_hipHostAlloc = 89, + HIP_API_ID_hipHostFree = 90, + HIP_API_ID_hipHostGetDevicePointer = 91, + HIP_API_ID_hipHostGetFlags = 92, + HIP_API_ID_hipHostMalloc = 93, + HIP_API_ID_hipHostRegister = 94, + HIP_API_ID_hipHostUnregister = 95, + HIP_API_ID_hipImportExternalMemory = 96, + HIP_API_ID_hipImportExternalSemaphore = 97, + HIP_API_ID_hipInit = 98, + HIP_API_ID_hipIpcCloseMemHandle = 99, + HIP_API_ID_hipIpcGetEventHandle = 100, + HIP_API_ID_hipIpcGetMemHandle = 101, + HIP_API_ID_hipIpcOpenEventHandle = 102, + HIP_API_ID_hipIpcOpenMemHandle = 103, + HIP_API_ID_hipLaunchByPtr = 104, + HIP_API_ID_hipLaunchCooperativeKernel = 105, + HIP_API_ID_hipLaunchCooperativeKernelMultiDevice = 106, + HIP_API_ID_hipLaunchKernel = 107, + HIP_API_ID_hipMalloc = 108, + HIP_API_ID_hipMalloc3D = 109, + HIP_API_ID_hipMalloc3DArray = 110, + HIP_API_ID_hipMallocArray = 111, + HIP_API_ID_hipMallocHost = 112, + HIP_API_ID_hipMallocManaged = 113, + HIP_API_ID_hipMallocMipmappedArray = 114, + HIP_API_ID_hipMallocPitch = 115, + HIP_API_ID_hipMemAdvise = 116, + HIP_API_ID_hipMemAllocHost = 117, + HIP_API_ID_hipMemAllocPitch = 118, + HIP_API_ID_hipMemGetAddressRange = 119, + HIP_API_ID_hipMemGetInfo = 120, + HIP_API_ID_hipMemPrefetchAsync = 121, + HIP_API_ID_hipMemPtrGetInfo = 122, + HIP_API_ID_hipMemRangeGetAttribute = 123, + HIP_API_ID_hipMemRangeGetAttributes = 124, + HIP_API_ID_hipMemcpy = 125, + HIP_API_ID_hipMemcpy2D = 126, + HIP_API_ID_hipMemcpy2DAsync = 127, + HIP_API_ID_hipMemcpy2DFromArray = 128, + HIP_API_ID_hipMemcpy2DFromArrayAsync = 129, + HIP_API_ID_hipMemcpy2DToArray = 130, + HIP_API_ID_hipMemcpy2DToArrayAsync = 131, + HIP_API_ID_hipMemcpy3D = 132, + HIP_API_ID_hipMemcpy3DAsync = 133, + HIP_API_ID_hipMemcpyAsync = 134, + HIP_API_ID_hipMemcpyAtoH = 135, + HIP_API_ID_hipMemcpyDtoD = 136, + HIP_API_ID_hipMemcpyDtoDAsync = 137, + HIP_API_ID_hipMemcpyDtoH = 138, + HIP_API_ID_hipMemcpyDtoHAsync = 139, + HIP_API_ID_hipMemcpyFromArray = 140, + HIP_API_ID_hipMemcpyFromSymbol = 141, + HIP_API_ID_hipMemcpyFromSymbolAsync = 142, + HIP_API_ID_hipMemcpyHtoA = 143, + HIP_API_ID_hipMemcpyHtoD = 144, + HIP_API_ID_hipMemcpyHtoDAsync = 145, + HIP_API_ID_hipMemcpyParam2D = 146, + HIP_API_ID_hipMemcpyParam2DAsync = 147, + HIP_API_ID_hipMemcpyPeer = 148, + HIP_API_ID_hipMemcpyPeerAsync = 149, + HIP_API_ID_hipMemcpyToArray = 150, + HIP_API_ID_hipMemcpyToSymbol = 151, + HIP_API_ID_hipMemcpyToSymbolAsync = 152, + HIP_API_ID_hipMemcpyWithStream = 153, + HIP_API_ID_hipMemset = 154, + HIP_API_ID_hipMemset2D = 155, + HIP_API_ID_hipMemset2DAsync = 156, + HIP_API_ID_hipMemset3D = 157, + HIP_API_ID_hipMemset3DAsync = 158, + HIP_API_ID_hipMemsetAsync = 159, + HIP_API_ID_hipMemsetD16 = 160, + HIP_API_ID_hipMemsetD16Async = 161, + HIP_API_ID_hipMemsetD32 = 162, + HIP_API_ID_hipMemsetD32Async = 163, + HIP_API_ID_hipMemsetD8 = 164, + HIP_API_ID_hipMemsetD8Async = 165, + HIP_API_ID_hipModuleGetFunction = 166, + HIP_API_ID_hipModuleGetGlobal = 167, + HIP_API_ID_hipModuleGetTexRef = 168, + HIP_API_ID_hipModuleLaunchKernel = 169, + HIP_API_ID_hipModuleLoad = 170, + HIP_API_ID_hipModuleLoadData = 171, + HIP_API_ID_hipModuleLoadDataEx = 172, + HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor = 173, + HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 174, + HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize = 175, + HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags = 176, + HIP_API_ID_hipModuleUnload = 177, + HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor = 178, + HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 179, + HIP_API_ID_hipOccupancyMaxPotentialBlockSize = 180, + HIP_API_ID_hipPeekAtLastError = 181, + HIP_API_ID_hipPointerGetAttributes = 182, + HIP_API_ID_hipProfilerStart = 183, + HIP_API_ID_hipProfilerStop = 184, + HIP_API_ID_RESERVED_185 = 185, + HIP_API_ID_hipSetDevice = 186, + HIP_API_ID_hipSetDeviceFlags = 187, + HIP_API_ID_hipSetupArgument = 188, + HIP_API_ID_hipSignalExternalSemaphoresAsync = 189, + HIP_API_ID_hipStreamAddCallback = 190, + HIP_API_ID_hipStreamAttachMemAsync = 191, + HIP_API_ID_hipStreamCreate = 192, + HIP_API_ID_hipStreamCreateWithFlags = 193, + HIP_API_ID_hipStreamCreateWithPriority = 194, + HIP_API_ID_hipStreamDestroy = 195, + HIP_API_ID_hipStreamGetFlags = 196, + HIP_API_ID_hipStreamGetPriority = 197, + HIP_API_ID_hipStreamQuery = 198, + HIP_API_ID_hipStreamSynchronize = 199, + HIP_API_ID_hipStreamWaitEvent = 200, + HIP_API_ID_hipStreamWaitValue32 = 201, + HIP_API_ID_hipStreamWaitValue64 = 202, + HIP_API_ID_hipStreamWriteValue32 = 203, + HIP_API_ID_hipStreamWriteValue64 = 204, + HIP_API_ID_hipWaitExternalSemaphoresAsync = 205, + HIP_API_ID_hipCreateSurfaceObject = 206, + HIP_API_ID_hipDestroySurfaceObject = 207, + HIP_API_ID_hipGraphAddKernelNode = 208, + HIP_API_ID_hipGraphAddMemcpyNode = 209, + HIP_API_ID_hipGraphAddMemsetNode = 210, + HIP_API_ID_hipGraphCreate = 211, + HIP_API_ID_hipGraphDestroy = 212, + HIP_API_ID_hipGraphExecDestroy = 213, + HIP_API_ID_hipGraphInstantiate = 214, + HIP_API_ID_hipGraphLaunch = 215, + HIP_API_ID_hipMipmappedArrayCreate = 216, + HIP_API_ID_hipMipmappedArrayDestroy = 217, + HIP_API_ID_hipMipmappedArrayGetLevel = 218, + HIP_API_ID_hipStreamBeginCapture = 219, + HIP_API_ID_hipStreamEndCapture = 220, + HIP_API_ID_hipTexRefGetAddress = 221, + HIP_API_ID_hipTexRefGetFlags = 222, + HIP_API_ID_hipTexRefGetFormat = 223, + HIP_API_ID_hipTexRefGetMaxAnisotropy = 224, + HIP_API_ID_hipTexRefGetMipMappedArray = 225, + HIP_API_ID_hipTexRefGetMipmapLevelBias = 226, + HIP_API_ID_hipTexRefGetMipmapLevelClamp = 227, + HIP_API_ID_hipTexRefSetAddress = 228, + HIP_API_ID_hipTexRefSetAddress2D = 229, + HIP_API_ID_hipTexRefSetBorderColor = 230, + HIP_API_ID_hipTexRefSetFormat = 231, + HIP_API_ID_hipTexRefSetMaxAnisotropy = 232, + HIP_API_ID_hipTexRefSetMipmapLevelClamp = 233, + HIP_API_ID_hipTexRefSetMipmappedArray = 234, + HIP_API_ID_hipGLGetDevices = 235, + HIP_API_ID_hipGraphAddDependencies = 236, + HIP_API_ID_hipGraphAddEmptyNode = 237, + HIP_API_ID_hipGraphExecKernelNodeSetParams = 238, + HIP_API_ID_hipGraphGetNodes = 239, + HIP_API_ID_hipGraphGetRootNodes = 240, + HIP_API_ID_hipGraphKernelNodeGetParams = 241, + HIP_API_ID_hipGraphKernelNodeSetParams = 242, + HIP_API_ID_hipGraphMemcpyNodeGetParams = 243, + HIP_API_ID_hipGraphMemcpyNodeSetParams = 244, + HIP_API_ID_hipGraphMemsetNodeGetParams = 245, + HIP_API_ID_hipGraphMemsetNodeSetParams = 246, + HIP_API_ID_hipGraphicsGLRegisterBuffer = 247, + HIP_API_ID_hipGraphicsMapResources = 248, + HIP_API_ID_hipGraphicsResourceGetMappedPointer = 249, + HIP_API_ID_hipGraphicsUnmapResources = 250, + HIP_API_ID_hipGraphicsUnregisterResource = 251, + HIP_API_ID_hipGraphAddChildGraphNode = 252, + HIP_API_ID_hipGraphAddEventRecordNode = 253, + HIP_API_ID_hipGraphAddEventWaitNode = 254, + HIP_API_ID_hipGraphAddHostNode = 255, + HIP_API_ID_hipGraphAddMemcpyNode1D = 256, + HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol = 257, + HIP_API_ID_hipGraphAddMemcpyNodeToSymbol = 258, + HIP_API_ID_hipGraphChildGraphNodeGetGraph = 259, + HIP_API_ID_hipGraphClone = 260, + HIP_API_ID_hipGraphDestroyNode = 261, + HIP_API_ID_hipGraphEventRecordNodeGetEvent = 262, + HIP_API_ID_hipGraphEventRecordNodeSetEvent = 263, + HIP_API_ID_hipGraphEventWaitNodeGetEvent = 264, + HIP_API_ID_hipGraphEventWaitNodeSetEvent = 265, + HIP_API_ID_hipGraphExecChildGraphNodeSetParams = 266, + HIP_API_ID_hipGraphExecEventRecordNodeSetEvent = 267, + HIP_API_ID_hipGraphExecEventWaitNodeSetEvent = 268, + HIP_API_ID_hipGraphExecHostNodeSetParams = 269, + HIP_API_ID_hipGraphExecMemcpyNodeSetParams = 270, + HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D = 271, + HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol = 272, + HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol = 273, + HIP_API_ID_hipGraphExecMemsetNodeSetParams = 274, + HIP_API_ID_hipGraphExecUpdate = 275, + HIP_API_ID_hipGraphGetEdges = 276, + HIP_API_ID_hipGraphHostNodeGetParams = 277, + HIP_API_ID_hipGraphHostNodeSetParams = 278, + HIP_API_ID_hipGraphInstantiateWithFlags = 279, + HIP_API_ID_hipGraphMemcpyNodeSetParams1D = 280, + HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol = 281, + HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol = 282, + HIP_API_ID_hipGraphNodeFindInClone = 283, + HIP_API_ID_hipGraphNodeGetDependencies = 284, + HIP_API_ID_hipGraphNodeGetDependentNodes = 285, + HIP_API_ID_hipGraphNodeGetType = 286, + HIP_API_ID_hipGraphRemoveDependencies = 287, + HIP_API_ID_hipStreamGetCaptureInfo = 288, + HIP_API_ID_hipStreamGetCaptureInfo_v2 = 289, + HIP_API_ID_hipStreamIsCapturing = 290, + HIP_API_ID_hipStreamUpdateCaptureDependencies = 291, + HIP_API_ID_hipDrvPointerGetAttributes = 292, + HIP_API_ID_hipGraphicsGLRegisterImage = 293, + HIP_API_ID_hipGraphicsSubResourceGetMappedArray = 294, + HIP_API_ID_hipPointerGetAttribute = 295, + HIP_API_ID_RESERVED_296 = 296, + HIP_API_ID_hipThreadExchangeStreamCaptureMode = 297, + HIP_API_ID_hipDeviceGetUuid = 298, + HIP_API_ID_hipGetChannelDesc = 299, + HIP_API_ID_hipGraphKernelNodeGetAttribute = 300, + HIP_API_ID_hipGraphKernelNodeSetAttribute = 301, + HIP_API_ID_hipLaunchHostFunc = 302, + HIP_API_ID_hipDeviceGetDefaultMemPool = 303, + HIP_API_ID_hipDeviceGetMemPool = 304, + HIP_API_ID_hipDeviceSetMemPool = 305, + HIP_API_ID_hipFreeAsync = 306, + HIP_API_ID_hipMallocAsync = 307, + HIP_API_ID_hipMallocFromPoolAsync = 308, + HIP_API_ID_hipMemPoolCreate = 309, + HIP_API_ID_hipMemPoolDestroy = 310, + HIP_API_ID_hipMemPoolExportPointer = 311, + HIP_API_ID_hipMemPoolExportToShareableHandle = 312, + HIP_API_ID_hipMemPoolGetAccess = 313, + HIP_API_ID_hipMemPoolGetAttribute = 314, + HIP_API_ID_hipMemPoolImportFromShareableHandle = 315, + HIP_API_ID_hipMemPoolImportPointer = 316, + HIP_API_ID_hipMemPoolSetAccess = 317, + HIP_API_ID_hipMemPoolSetAttribute = 318, + HIP_API_ID_hipMemPoolTrimTo = 319, + HIP_API_ID_hipMemAddressFree = 320, + HIP_API_ID_hipMemAddressReserve = 321, + HIP_API_ID_hipMemCreate = 322, + HIP_API_ID_hipMemExportToShareableHandle = 323, + HIP_API_ID_hipMemGetAccess = 324, + HIP_API_ID_hipMemGetAllocationGranularity = 325, + HIP_API_ID_hipMemGetAllocationPropertiesFromHandle = 326, + HIP_API_ID_hipMemImportFromShareableHandle = 327, + HIP_API_ID_hipMemMap = 328, + HIP_API_ID_hipMemMapArrayAsync = 329, + HIP_API_ID_hipMemRelease = 330, + HIP_API_ID_hipMemRetainAllocationHandle = 331, + HIP_API_ID_hipMemSetAccess = 332, + HIP_API_ID_hipMemUnmap = 333, + HIP_API_ID_hipDeviceSetGraphMemAttribute = 334, + HIP_API_ID_hipDeviceGetGraphMemAttribute = 335, + HIP_API_ID_hipDeviceGraphMemTrim = 336, + HIP_API_ID_hipDeviceSetLimit = 337, + HIP_API_ID_hipTexRefSetArray = 338, + HIP_API_ID_hipTexRefSetFlags = 339, + HIP_API_ID_hipTexRefSetMipmapLevelBias = 340, + HIP_API_ID_hipDriverGetVersion = 341, + HIP_API_ID_hipGraphUpload = 342, + HIP_API_ID_hipRuntimeGetVersion = 343, + HIP_API_ID_hipUserObjectCreate = 344, + HIP_API_ID_hipUserObjectRelease = 345, + HIP_API_ID_hipUserObjectRetain = 346, + HIP_API_ID_hipGraphRetainUserObject = 347, + HIP_API_ID_hipGraphReleaseUserObject = 348, + HIP_API_ID_hipGraphDebugDotPrint = 349, + HIP_API_ID_hipGraphKernelNodeCopyAttributes = 350, + HIP_API_ID_hipGraphNodeGetEnabled = 351, + HIP_API_ID_hipGraphNodeSetEnabled = 352, + HIP_API_ID_hipPointerSetAttribute = 353, + HIP_API_ID_hipGraphAddMemAllocNode = 354, + HIP_API_ID_hipGraphAddMemFreeNode = 355, + HIP_API_ID_hipGraphMemAllocNodeGetParams = 356, + HIP_API_ID_hipGraphMemFreeNodeGetParams = 357, + HIP_API_ID_hipModuleLaunchCooperativeKernel = 358, + HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice = 359, + HIP_API_ID_hipArray3DGetDescriptor = 360, + HIP_API_ID_hipArrayGetDescriptor = 361, + HIP_API_ID_hipArrayGetInfo = 362, + HIP_API_ID_hipStreamGetDevice = 363, + HIP_API_ID_LAST = 363, + + HIP_API_ID_hipBindTexture = HIP_API_ID_NONE, + HIP_API_ID_hipBindTexture2D = HIP_API_ID_NONE, + HIP_API_ID_hipBindTextureToArray = HIP_API_ID_NONE, + HIP_API_ID_hipBindTextureToMipmappedArray = HIP_API_ID_NONE, + HIP_API_ID_hipCreateTextureObject = HIP_API_ID_NONE, + HIP_API_ID_hipDestroyTextureObject = HIP_API_ID_NONE, + HIP_API_ID_hipDeviceGetCount = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureAlignmentOffset = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureObjectResourceDesc = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureObjectResourceViewDesc = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NONE, + HIP_API_ID_hipGetTextureReference = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpy2DArrayToArray = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyArrayToArray = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyAtoA = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyAtoD = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyAtoHAsync = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyDtoA = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyFromArrayAsync = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyHtoAAsync = HIP_API_ID_NONE, + HIP_API_ID_hipMemcpyToArrayAsync = HIP_API_ID_NONE, + HIP_API_ID_hipModuleLaunchKernelExt = HIP_API_ID_NONE, + HIP_API_ID_hipSetValidDevices = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectCreate = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectDestroy = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectGetResourceDesc = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectGetResourceViewDesc = HIP_API_ID_NONE, + HIP_API_ID_hipTexObjectGetTextureDesc = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetAddressMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetArray = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetBorderColor = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetMipmapFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefGetMipmappedArray = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefSetAddressMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefSetFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipTexRefSetMipmapFilterMode = HIP_API_ID_NONE, + HIP_API_ID_hipUnbindTexture = HIP_API_ID_NONE, +}; + +// Return the HIP API string for a given callback ID +static inline const char* hip_api_name(const uint32_t id) { + switch(id) { + case HIP_API_ID___hipPopCallConfiguration: return "__hipPopCallConfiguration"; + case HIP_API_ID___hipPushCallConfiguration: return "__hipPushCallConfiguration"; + case HIP_API_ID_hipArray3DCreate: return "hipArray3DCreate"; + case HIP_API_ID_hipArray3DGetDescriptor: return "hipArray3DGetDescriptor"; + case HIP_API_ID_hipArrayCreate: return "hipArrayCreate"; + case HIP_API_ID_hipArrayDestroy: return "hipArrayDestroy"; + case HIP_API_ID_hipArrayGetDescriptor: return "hipArrayGetDescriptor"; + case HIP_API_ID_hipArrayGetInfo: return "hipArrayGetInfo"; + case HIP_API_ID_hipChooseDevice: return "hipChooseDevice"; + case HIP_API_ID_hipConfigureCall: return "hipConfigureCall"; + case HIP_API_ID_hipCreateSurfaceObject: return "hipCreateSurfaceObject"; + case HIP_API_ID_hipCtxCreate: return "hipCtxCreate"; + case HIP_API_ID_hipCtxDestroy: return "hipCtxDestroy"; + case HIP_API_ID_hipCtxDisablePeerAccess: return "hipCtxDisablePeerAccess"; + case HIP_API_ID_hipCtxEnablePeerAccess: return "hipCtxEnablePeerAccess"; + case HIP_API_ID_hipCtxGetApiVersion: return "hipCtxGetApiVersion"; + case HIP_API_ID_hipCtxGetCacheConfig: return "hipCtxGetCacheConfig"; + case HIP_API_ID_hipCtxGetCurrent: return "hipCtxGetCurrent"; + case HIP_API_ID_hipCtxGetDevice: return "hipCtxGetDevice"; + case HIP_API_ID_hipCtxGetFlags: return "hipCtxGetFlags"; + case HIP_API_ID_hipCtxGetSharedMemConfig: return "hipCtxGetSharedMemConfig"; + case HIP_API_ID_hipCtxPopCurrent: return "hipCtxPopCurrent"; + case HIP_API_ID_hipCtxPushCurrent: return "hipCtxPushCurrent"; + case HIP_API_ID_hipCtxSetCacheConfig: return "hipCtxSetCacheConfig"; + case HIP_API_ID_hipCtxSetCurrent: return "hipCtxSetCurrent"; + case HIP_API_ID_hipCtxSetSharedMemConfig: return "hipCtxSetSharedMemConfig"; + case HIP_API_ID_hipCtxSynchronize: return "hipCtxSynchronize"; + case HIP_API_ID_hipDestroyExternalMemory: return "hipDestroyExternalMemory"; + case HIP_API_ID_hipDestroyExternalSemaphore: return "hipDestroyExternalSemaphore"; + case HIP_API_ID_hipDestroySurfaceObject: return "hipDestroySurfaceObject"; + case HIP_API_ID_hipDeviceCanAccessPeer: return "hipDeviceCanAccessPeer"; + case HIP_API_ID_hipDeviceComputeCapability: return "hipDeviceComputeCapability"; + case HIP_API_ID_hipDeviceDisablePeerAccess: return "hipDeviceDisablePeerAccess"; + case HIP_API_ID_hipDeviceEnablePeerAccess: return "hipDeviceEnablePeerAccess"; + case HIP_API_ID_hipDeviceGet: return "hipDeviceGet"; + case HIP_API_ID_hipDeviceGetAttribute: return "hipDeviceGetAttribute"; + case HIP_API_ID_hipDeviceGetByPCIBusId: return "hipDeviceGetByPCIBusId"; + case HIP_API_ID_hipDeviceGetCacheConfig: return "hipDeviceGetCacheConfig"; + case HIP_API_ID_hipDeviceGetDefaultMemPool: return "hipDeviceGetDefaultMemPool"; + case HIP_API_ID_hipDeviceGetGraphMemAttribute: return "hipDeviceGetGraphMemAttribute"; + case HIP_API_ID_hipDeviceGetLimit: return "hipDeviceGetLimit"; + case HIP_API_ID_hipDeviceGetMemPool: return "hipDeviceGetMemPool"; + case HIP_API_ID_hipDeviceGetName: return "hipDeviceGetName"; + case HIP_API_ID_hipDeviceGetP2PAttribute: return "hipDeviceGetP2PAttribute"; + case HIP_API_ID_hipDeviceGetPCIBusId: return "hipDeviceGetPCIBusId"; + case HIP_API_ID_hipDeviceGetSharedMemConfig: return "hipDeviceGetSharedMemConfig"; + case HIP_API_ID_hipDeviceGetStreamPriorityRange: return "hipDeviceGetStreamPriorityRange"; + case HIP_API_ID_hipDeviceGetUuid: return "hipDeviceGetUuid"; + case HIP_API_ID_hipDeviceGraphMemTrim: return "hipDeviceGraphMemTrim"; + case HIP_API_ID_hipDevicePrimaryCtxGetState: return "hipDevicePrimaryCtxGetState"; + case HIP_API_ID_hipDevicePrimaryCtxRelease: return "hipDevicePrimaryCtxRelease"; + case HIP_API_ID_hipDevicePrimaryCtxReset: return "hipDevicePrimaryCtxReset"; + case HIP_API_ID_hipDevicePrimaryCtxRetain: return "hipDevicePrimaryCtxRetain"; + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: return "hipDevicePrimaryCtxSetFlags"; + case HIP_API_ID_hipDeviceReset: return "hipDeviceReset"; + case HIP_API_ID_hipDeviceSetCacheConfig: return "hipDeviceSetCacheConfig"; + case HIP_API_ID_hipDeviceSetGraphMemAttribute: return "hipDeviceSetGraphMemAttribute"; + case HIP_API_ID_hipDeviceSetLimit: return "hipDeviceSetLimit"; + case HIP_API_ID_hipDeviceSetMemPool: return "hipDeviceSetMemPool"; + case HIP_API_ID_hipDeviceSetSharedMemConfig: return "hipDeviceSetSharedMemConfig"; + case HIP_API_ID_hipDeviceSynchronize: return "hipDeviceSynchronize"; + case HIP_API_ID_hipDeviceTotalMem: return "hipDeviceTotalMem"; + case HIP_API_ID_hipDriverGetVersion: return "hipDriverGetVersion"; + case HIP_API_ID_hipDrvMemcpy2DUnaligned: return "hipDrvMemcpy2DUnaligned"; + case HIP_API_ID_hipDrvMemcpy3D: return "hipDrvMemcpy3D"; + case HIP_API_ID_hipDrvMemcpy3DAsync: return "hipDrvMemcpy3DAsync"; + case HIP_API_ID_hipDrvPointerGetAttributes: return "hipDrvPointerGetAttributes"; + case HIP_API_ID_hipEventCreate: return "hipEventCreate"; + case HIP_API_ID_hipEventCreateWithFlags: return "hipEventCreateWithFlags"; + case HIP_API_ID_hipEventDestroy: return "hipEventDestroy"; + case HIP_API_ID_hipEventElapsedTime: return "hipEventElapsedTime"; + case HIP_API_ID_hipEventQuery: return "hipEventQuery"; + case HIP_API_ID_hipEventRecord: return "hipEventRecord"; + case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize"; + case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount"; + case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel"; + case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice"; + case HIP_API_ID_hipExtMallocWithFlags: return "hipExtMallocWithFlags"; + case HIP_API_ID_hipExtModuleLaunchKernel: return "hipExtModuleLaunchKernel"; + case HIP_API_ID_hipExtStreamCreateWithCUMask: return "hipExtStreamCreateWithCUMask"; + case HIP_API_ID_hipExtStreamGetCUMask: return "hipExtStreamGetCUMask"; + case HIP_API_ID_hipExternalMemoryGetMappedBuffer: return "hipExternalMemoryGetMappedBuffer"; + case HIP_API_ID_hipFree: return "hipFree"; + case HIP_API_ID_hipFreeArray: return "hipFreeArray"; + case HIP_API_ID_hipFreeAsync: return "hipFreeAsync"; + case HIP_API_ID_hipFreeHost: return "hipFreeHost"; + case HIP_API_ID_hipFreeMipmappedArray: return "hipFreeMipmappedArray"; + case HIP_API_ID_hipFuncGetAttribute: return "hipFuncGetAttribute"; + case HIP_API_ID_hipFuncGetAttributes: return "hipFuncGetAttributes"; + case HIP_API_ID_hipFuncSetAttribute: return "hipFuncSetAttribute"; + case HIP_API_ID_hipFuncSetCacheConfig: return "hipFuncSetCacheConfig"; + case HIP_API_ID_hipFuncSetSharedMemConfig: return "hipFuncSetSharedMemConfig"; + case HIP_API_ID_hipGLGetDevices: return "hipGLGetDevices"; + case HIP_API_ID_hipGetChannelDesc: return "hipGetChannelDesc"; + case HIP_API_ID_hipGetDevice: return "hipGetDevice"; + case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount"; + case HIP_API_ID_hipGetDeviceFlags: return "hipGetDeviceFlags"; + case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties"; + case HIP_API_ID_hipGetErrorString: return "hipGetErrorString"; + case HIP_API_ID_hipGetLastError: return "hipGetLastError"; + case HIP_API_ID_hipGetMipmappedArrayLevel: return "hipGetMipmappedArrayLevel"; + case HIP_API_ID_hipGetSymbolAddress: return "hipGetSymbolAddress"; + case HIP_API_ID_hipGetSymbolSize: return "hipGetSymbolSize"; + case HIP_API_ID_hipGraphAddChildGraphNode: return "hipGraphAddChildGraphNode"; + case HIP_API_ID_hipGraphAddDependencies: return "hipGraphAddDependencies"; + case HIP_API_ID_hipGraphAddEmptyNode: return "hipGraphAddEmptyNode"; + case HIP_API_ID_hipGraphAddEventRecordNode: return "hipGraphAddEventRecordNode"; + case HIP_API_ID_hipGraphAddEventWaitNode: return "hipGraphAddEventWaitNode"; + case HIP_API_ID_hipGraphAddHostNode: return "hipGraphAddHostNode"; + case HIP_API_ID_hipGraphAddKernelNode: return "hipGraphAddKernelNode"; + case HIP_API_ID_hipGraphAddMemAllocNode: return "hipGraphAddMemAllocNode"; + case HIP_API_ID_hipGraphAddMemFreeNode: return "hipGraphAddMemFreeNode"; + case HIP_API_ID_hipGraphAddMemcpyNode: return "hipGraphAddMemcpyNode"; + case HIP_API_ID_hipGraphAddMemcpyNode1D: return "hipGraphAddMemcpyNode1D"; + case HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol: return "hipGraphAddMemcpyNodeFromSymbol"; + case HIP_API_ID_hipGraphAddMemcpyNodeToSymbol: return "hipGraphAddMemcpyNodeToSymbol"; + case HIP_API_ID_hipGraphAddMemsetNode: return "hipGraphAddMemsetNode"; + case HIP_API_ID_hipGraphChildGraphNodeGetGraph: return "hipGraphChildGraphNodeGetGraph"; + case HIP_API_ID_hipGraphClone: return "hipGraphClone"; + case HIP_API_ID_hipGraphCreate: return "hipGraphCreate"; + case HIP_API_ID_hipGraphDebugDotPrint: return "hipGraphDebugDotPrint"; + case HIP_API_ID_hipGraphDestroy: return "hipGraphDestroy"; + case HIP_API_ID_hipGraphDestroyNode: return "hipGraphDestroyNode"; + case HIP_API_ID_hipGraphEventRecordNodeGetEvent: return "hipGraphEventRecordNodeGetEvent"; + case HIP_API_ID_hipGraphEventRecordNodeSetEvent: return "hipGraphEventRecordNodeSetEvent"; + case HIP_API_ID_hipGraphEventWaitNodeGetEvent: return "hipGraphEventWaitNodeGetEvent"; + case HIP_API_ID_hipGraphEventWaitNodeSetEvent: return "hipGraphEventWaitNodeSetEvent"; + case HIP_API_ID_hipGraphExecChildGraphNodeSetParams: return "hipGraphExecChildGraphNodeSetParams"; + case HIP_API_ID_hipGraphExecDestroy: return "hipGraphExecDestroy"; + case HIP_API_ID_hipGraphExecEventRecordNodeSetEvent: return "hipGraphExecEventRecordNodeSetEvent"; + case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: return "hipGraphExecEventWaitNodeSetEvent"; + case HIP_API_ID_hipGraphExecHostNodeSetParams: return "hipGraphExecHostNodeSetParams"; + case HIP_API_ID_hipGraphExecKernelNodeSetParams: return "hipGraphExecKernelNodeSetParams"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams: return "hipGraphExecMemcpyNodeSetParams"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D: return "hipGraphExecMemcpyNodeSetParams1D"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol: return "hipGraphExecMemcpyNodeSetParamsFromSymbol"; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol: return "hipGraphExecMemcpyNodeSetParamsToSymbol"; + case HIP_API_ID_hipGraphExecMemsetNodeSetParams: return "hipGraphExecMemsetNodeSetParams"; + case HIP_API_ID_hipGraphExecUpdate: return "hipGraphExecUpdate"; + case HIP_API_ID_hipGraphGetEdges: return "hipGraphGetEdges"; + case HIP_API_ID_hipGraphGetNodes: return "hipGraphGetNodes"; + case HIP_API_ID_hipGraphGetRootNodes: return "hipGraphGetRootNodes"; + case HIP_API_ID_hipGraphHostNodeGetParams: return "hipGraphHostNodeGetParams"; + case HIP_API_ID_hipGraphHostNodeSetParams: return "hipGraphHostNodeSetParams"; + case HIP_API_ID_hipGraphInstantiate: return "hipGraphInstantiate"; + case HIP_API_ID_hipGraphInstantiateWithFlags: return "hipGraphInstantiateWithFlags"; + case HIP_API_ID_hipGraphKernelNodeCopyAttributes: return "hipGraphKernelNodeCopyAttributes"; + case HIP_API_ID_hipGraphKernelNodeGetAttribute: return "hipGraphKernelNodeGetAttribute"; + case HIP_API_ID_hipGraphKernelNodeGetParams: return "hipGraphKernelNodeGetParams"; + case HIP_API_ID_hipGraphKernelNodeSetAttribute: return "hipGraphKernelNodeSetAttribute"; + case HIP_API_ID_hipGraphKernelNodeSetParams: return "hipGraphKernelNodeSetParams"; + case HIP_API_ID_hipGraphLaunch: return "hipGraphLaunch"; + case HIP_API_ID_hipGraphMemAllocNodeGetParams: return "hipGraphMemAllocNodeGetParams"; + case HIP_API_ID_hipGraphMemFreeNodeGetParams: return "hipGraphMemFreeNodeGetParams"; + case HIP_API_ID_hipGraphMemcpyNodeGetParams: return "hipGraphMemcpyNodeGetParams"; + case HIP_API_ID_hipGraphMemcpyNodeSetParams: return "hipGraphMemcpyNodeSetParams"; + case HIP_API_ID_hipGraphMemcpyNodeSetParams1D: return "hipGraphMemcpyNodeSetParams1D"; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol: return "hipGraphMemcpyNodeSetParamsFromSymbol"; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol: return "hipGraphMemcpyNodeSetParamsToSymbol"; + case HIP_API_ID_hipGraphMemsetNodeGetParams: return "hipGraphMemsetNodeGetParams"; + case HIP_API_ID_hipGraphMemsetNodeSetParams: return "hipGraphMemsetNodeSetParams"; + case HIP_API_ID_hipGraphNodeFindInClone: return "hipGraphNodeFindInClone"; + case HIP_API_ID_hipGraphNodeGetDependencies: return "hipGraphNodeGetDependencies"; + case HIP_API_ID_hipGraphNodeGetDependentNodes: return "hipGraphNodeGetDependentNodes"; + case HIP_API_ID_hipGraphNodeGetEnabled: return "hipGraphNodeGetEnabled"; + case HIP_API_ID_hipGraphNodeGetType: return "hipGraphNodeGetType"; + case HIP_API_ID_hipGraphNodeSetEnabled: return "hipGraphNodeSetEnabled"; + case HIP_API_ID_hipGraphReleaseUserObject: return "hipGraphReleaseUserObject"; + case HIP_API_ID_hipGraphRemoveDependencies: return "hipGraphRemoveDependencies"; + case HIP_API_ID_hipGraphRetainUserObject: return "hipGraphRetainUserObject"; + case HIP_API_ID_hipGraphUpload: return "hipGraphUpload"; + case HIP_API_ID_hipGraphicsGLRegisterBuffer: return "hipGraphicsGLRegisterBuffer"; + case HIP_API_ID_hipGraphicsGLRegisterImage: return "hipGraphicsGLRegisterImage"; + case HIP_API_ID_hipGraphicsMapResources: return "hipGraphicsMapResources"; + case HIP_API_ID_hipGraphicsResourceGetMappedPointer: return "hipGraphicsResourceGetMappedPointer"; + case HIP_API_ID_hipGraphicsSubResourceGetMappedArray: return "hipGraphicsSubResourceGetMappedArray"; + case HIP_API_ID_hipGraphicsUnmapResources: return "hipGraphicsUnmapResources"; + case HIP_API_ID_hipGraphicsUnregisterResource: return "hipGraphicsUnregisterResource"; + case HIP_API_ID_hipHccModuleLaunchKernel: return "hipHccModuleLaunchKernel"; + case HIP_API_ID_hipHostAlloc: return "hipHostAlloc"; + case HIP_API_ID_hipHostFree: return "hipHostFree"; + case HIP_API_ID_hipHostGetDevicePointer: return "hipHostGetDevicePointer"; + case HIP_API_ID_hipHostGetFlags: return "hipHostGetFlags"; + case HIP_API_ID_hipHostMalloc: return "hipHostMalloc"; + case HIP_API_ID_hipHostRegister: return "hipHostRegister"; + case HIP_API_ID_hipHostUnregister: return "hipHostUnregister"; + case HIP_API_ID_hipImportExternalMemory: return "hipImportExternalMemory"; + case HIP_API_ID_hipImportExternalSemaphore: return "hipImportExternalSemaphore"; + case HIP_API_ID_hipInit: return "hipInit"; + case HIP_API_ID_hipIpcCloseMemHandle: return "hipIpcCloseMemHandle"; + case HIP_API_ID_hipIpcGetEventHandle: return "hipIpcGetEventHandle"; + case HIP_API_ID_hipIpcGetMemHandle: return "hipIpcGetMemHandle"; + case HIP_API_ID_hipIpcOpenEventHandle: return "hipIpcOpenEventHandle"; + case HIP_API_ID_hipIpcOpenMemHandle: return "hipIpcOpenMemHandle"; + case HIP_API_ID_hipLaunchByPtr: return "hipLaunchByPtr"; + case HIP_API_ID_hipLaunchCooperativeKernel: return "hipLaunchCooperativeKernel"; + case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: return "hipLaunchCooperativeKernelMultiDevice"; + case HIP_API_ID_hipLaunchHostFunc: return "hipLaunchHostFunc"; + case HIP_API_ID_hipLaunchKernel: return "hipLaunchKernel"; + case HIP_API_ID_hipMalloc: return "hipMalloc"; + case HIP_API_ID_hipMalloc3D: return "hipMalloc3D"; + case HIP_API_ID_hipMalloc3DArray: return "hipMalloc3DArray"; + case HIP_API_ID_hipMallocArray: return "hipMallocArray"; + case HIP_API_ID_hipMallocAsync: return "hipMallocAsync"; + case HIP_API_ID_hipMallocFromPoolAsync: return "hipMallocFromPoolAsync"; + case HIP_API_ID_hipMallocHost: return "hipMallocHost"; + case HIP_API_ID_hipMallocManaged: return "hipMallocManaged"; + case HIP_API_ID_hipMallocMipmappedArray: return "hipMallocMipmappedArray"; + case HIP_API_ID_hipMallocPitch: return "hipMallocPitch"; + case HIP_API_ID_hipMemAddressFree: return "hipMemAddressFree"; + case HIP_API_ID_hipMemAddressReserve: return "hipMemAddressReserve"; + case HIP_API_ID_hipMemAdvise: return "hipMemAdvise"; + case HIP_API_ID_hipMemAllocHost: return "hipMemAllocHost"; + case HIP_API_ID_hipMemAllocPitch: return "hipMemAllocPitch"; + case HIP_API_ID_hipMemCreate: return "hipMemCreate"; + case HIP_API_ID_hipMemExportToShareableHandle: return "hipMemExportToShareableHandle"; + case HIP_API_ID_hipMemGetAccess: return "hipMemGetAccess"; + case HIP_API_ID_hipMemGetAddressRange: return "hipMemGetAddressRange"; + case HIP_API_ID_hipMemGetAllocationGranularity: return "hipMemGetAllocationGranularity"; + case HIP_API_ID_hipMemGetAllocationPropertiesFromHandle: return "hipMemGetAllocationPropertiesFromHandle"; + case HIP_API_ID_hipMemGetInfo: return "hipMemGetInfo"; + case HIP_API_ID_hipMemImportFromShareableHandle: return "hipMemImportFromShareableHandle"; + case HIP_API_ID_hipMemMap: return "hipMemMap"; + case HIP_API_ID_hipMemMapArrayAsync: return "hipMemMapArrayAsync"; + case HIP_API_ID_hipMemPoolCreate: return "hipMemPoolCreate"; + case HIP_API_ID_hipMemPoolDestroy: return "hipMemPoolDestroy"; + case HIP_API_ID_hipMemPoolExportPointer: return "hipMemPoolExportPointer"; + case HIP_API_ID_hipMemPoolExportToShareableHandle: return "hipMemPoolExportToShareableHandle"; + case HIP_API_ID_hipMemPoolGetAccess: return "hipMemPoolGetAccess"; + case HIP_API_ID_hipMemPoolGetAttribute: return "hipMemPoolGetAttribute"; + case HIP_API_ID_hipMemPoolImportFromShareableHandle: return "hipMemPoolImportFromShareableHandle"; + case HIP_API_ID_hipMemPoolImportPointer: return "hipMemPoolImportPointer"; + case HIP_API_ID_hipMemPoolSetAccess: return "hipMemPoolSetAccess"; + case HIP_API_ID_hipMemPoolSetAttribute: return "hipMemPoolSetAttribute"; + case HIP_API_ID_hipMemPoolTrimTo: return "hipMemPoolTrimTo"; + case HIP_API_ID_hipMemPrefetchAsync: return "hipMemPrefetchAsync"; + case HIP_API_ID_hipMemPtrGetInfo: return "hipMemPtrGetInfo"; + case HIP_API_ID_hipMemRangeGetAttribute: return "hipMemRangeGetAttribute"; + case HIP_API_ID_hipMemRangeGetAttributes: return "hipMemRangeGetAttributes"; + case HIP_API_ID_hipMemRelease: return "hipMemRelease"; + case HIP_API_ID_hipMemRetainAllocationHandle: return "hipMemRetainAllocationHandle"; + case HIP_API_ID_hipMemSetAccess: return "hipMemSetAccess"; + case HIP_API_ID_hipMemUnmap: return "hipMemUnmap"; + case HIP_API_ID_hipMemcpy: return "hipMemcpy"; + case HIP_API_ID_hipMemcpy2D: return "hipMemcpy2D"; + case HIP_API_ID_hipMemcpy2DAsync: return "hipMemcpy2DAsync"; + case HIP_API_ID_hipMemcpy2DFromArray: return "hipMemcpy2DFromArray"; + case HIP_API_ID_hipMemcpy2DFromArrayAsync: return "hipMemcpy2DFromArrayAsync"; + case HIP_API_ID_hipMemcpy2DToArray: return "hipMemcpy2DToArray"; + case HIP_API_ID_hipMemcpy2DToArrayAsync: return "hipMemcpy2DToArrayAsync"; + case HIP_API_ID_hipMemcpy3D: return "hipMemcpy3D"; + case HIP_API_ID_hipMemcpy3DAsync: return "hipMemcpy3DAsync"; + case HIP_API_ID_hipMemcpyAsync: return "hipMemcpyAsync"; + case HIP_API_ID_hipMemcpyAtoH: return "hipMemcpyAtoH"; + case HIP_API_ID_hipMemcpyDtoD: return "hipMemcpyDtoD"; + case HIP_API_ID_hipMemcpyDtoDAsync: return "hipMemcpyDtoDAsync"; + case HIP_API_ID_hipMemcpyDtoH: return "hipMemcpyDtoH"; + case HIP_API_ID_hipMemcpyDtoHAsync: return "hipMemcpyDtoHAsync"; + case HIP_API_ID_hipMemcpyFromArray: return "hipMemcpyFromArray"; + case HIP_API_ID_hipMemcpyFromSymbol: return "hipMemcpyFromSymbol"; + case HIP_API_ID_hipMemcpyFromSymbolAsync: return "hipMemcpyFromSymbolAsync"; + case HIP_API_ID_hipMemcpyHtoA: return "hipMemcpyHtoA"; + case HIP_API_ID_hipMemcpyHtoD: return "hipMemcpyHtoD"; + case HIP_API_ID_hipMemcpyHtoDAsync: return "hipMemcpyHtoDAsync"; + case HIP_API_ID_hipMemcpyParam2D: return "hipMemcpyParam2D"; + case HIP_API_ID_hipMemcpyParam2DAsync: return "hipMemcpyParam2DAsync"; + case HIP_API_ID_hipMemcpyPeer: return "hipMemcpyPeer"; + case HIP_API_ID_hipMemcpyPeerAsync: return "hipMemcpyPeerAsync"; + case HIP_API_ID_hipMemcpyToArray: return "hipMemcpyToArray"; + case HIP_API_ID_hipMemcpyToSymbol: return "hipMemcpyToSymbol"; + case HIP_API_ID_hipMemcpyToSymbolAsync: return "hipMemcpyToSymbolAsync"; + case HIP_API_ID_hipMemcpyWithStream: return "hipMemcpyWithStream"; + case HIP_API_ID_hipMemset: return "hipMemset"; + case HIP_API_ID_hipMemset2D: return "hipMemset2D"; + case HIP_API_ID_hipMemset2DAsync: return "hipMemset2DAsync"; + case HIP_API_ID_hipMemset3D: return "hipMemset3D"; + case HIP_API_ID_hipMemset3DAsync: return "hipMemset3DAsync"; + case HIP_API_ID_hipMemsetAsync: return "hipMemsetAsync"; + case HIP_API_ID_hipMemsetD16: return "hipMemsetD16"; + case HIP_API_ID_hipMemsetD16Async: return "hipMemsetD16Async"; + case HIP_API_ID_hipMemsetD32: return "hipMemsetD32"; + case HIP_API_ID_hipMemsetD32Async: return "hipMemsetD32Async"; + case HIP_API_ID_hipMemsetD8: return "hipMemsetD8"; + case HIP_API_ID_hipMemsetD8Async: return "hipMemsetD8Async"; + case HIP_API_ID_hipMipmappedArrayCreate: return "hipMipmappedArrayCreate"; + case HIP_API_ID_hipMipmappedArrayDestroy: return "hipMipmappedArrayDestroy"; + case HIP_API_ID_hipMipmappedArrayGetLevel: return "hipMipmappedArrayGetLevel"; + case HIP_API_ID_hipModuleGetFunction: return "hipModuleGetFunction"; + case HIP_API_ID_hipModuleGetGlobal: return "hipModuleGetGlobal"; + case HIP_API_ID_hipModuleGetTexRef: return "hipModuleGetTexRef"; + case HIP_API_ID_hipModuleLaunchCooperativeKernel: return "hipModuleLaunchCooperativeKernel"; + case HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice: return "hipModuleLaunchCooperativeKernelMultiDevice"; + case HIP_API_ID_hipModuleLaunchKernel: return "hipModuleLaunchKernel"; + case HIP_API_ID_hipModuleLoad: return "hipModuleLoad"; + case HIP_API_ID_hipModuleLoadData: return "hipModuleLoadData"; + case HIP_API_ID_hipModuleLoadDataEx: return "hipModuleLoadDataEx"; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor"; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: return "hipModuleOccupancyMaxPotentialBlockSize"; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: return "hipModuleOccupancyMaxPotentialBlockSizeWithFlags"; + case HIP_API_ID_hipModuleUnload: return "hipModuleUnload"; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: return "hipOccupancyMaxActiveBlocksPerMultiprocessor"; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: return "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"; + case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: return "hipOccupancyMaxPotentialBlockSize"; + case HIP_API_ID_hipPeekAtLastError: return "hipPeekAtLastError"; + case HIP_API_ID_hipPointerGetAttribute: return "hipPointerGetAttribute"; + case HIP_API_ID_hipPointerGetAttributes: return "hipPointerGetAttributes"; + case HIP_API_ID_hipPointerSetAttribute: return "hipPointerSetAttribute"; + case HIP_API_ID_hipProfilerStart: return "hipProfilerStart"; + case HIP_API_ID_hipProfilerStop: return "hipProfilerStop"; + case HIP_API_ID_hipRuntimeGetVersion: return "hipRuntimeGetVersion"; + case HIP_API_ID_hipSetDevice: return "hipSetDevice"; + case HIP_API_ID_hipSetDeviceFlags: return "hipSetDeviceFlags"; + case HIP_API_ID_hipSetupArgument: return "hipSetupArgument"; + case HIP_API_ID_hipSignalExternalSemaphoresAsync: return "hipSignalExternalSemaphoresAsync"; + case HIP_API_ID_hipStreamAddCallback: return "hipStreamAddCallback"; + case HIP_API_ID_hipStreamAttachMemAsync: return "hipStreamAttachMemAsync"; + case HIP_API_ID_hipStreamBeginCapture: return "hipStreamBeginCapture"; + case HIP_API_ID_hipStreamCreate: return "hipStreamCreate"; + case HIP_API_ID_hipStreamCreateWithFlags: return "hipStreamCreateWithFlags"; + case HIP_API_ID_hipStreamCreateWithPriority: return "hipStreamCreateWithPriority"; + case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy"; + case HIP_API_ID_hipStreamEndCapture: return "hipStreamEndCapture"; + case HIP_API_ID_hipStreamGetCaptureInfo: return "hipStreamGetCaptureInfo"; + case HIP_API_ID_hipStreamGetCaptureInfo_v2: return "hipStreamGetCaptureInfo_v2"; + case HIP_API_ID_hipStreamGetDevice: return "hipStreamGetDevice"; + case HIP_API_ID_hipStreamGetFlags: return "hipStreamGetFlags"; + case HIP_API_ID_hipStreamGetPriority: return "hipStreamGetPriority"; + case HIP_API_ID_hipStreamIsCapturing: return "hipStreamIsCapturing"; + case HIP_API_ID_hipStreamQuery: return "hipStreamQuery"; + case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize"; + case HIP_API_ID_hipStreamUpdateCaptureDependencies: return "hipStreamUpdateCaptureDependencies"; + case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent"; + case HIP_API_ID_hipStreamWaitValue32: return "hipStreamWaitValue32"; + case HIP_API_ID_hipStreamWaitValue64: return "hipStreamWaitValue64"; + case HIP_API_ID_hipStreamWriteValue32: return "hipStreamWriteValue32"; + case HIP_API_ID_hipStreamWriteValue64: return "hipStreamWriteValue64"; + case HIP_API_ID_hipTexRefGetAddress: return "hipTexRefGetAddress"; + case HIP_API_ID_hipTexRefGetFlags: return "hipTexRefGetFlags"; + case HIP_API_ID_hipTexRefGetFormat: return "hipTexRefGetFormat"; + case HIP_API_ID_hipTexRefGetMaxAnisotropy: return "hipTexRefGetMaxAnisotropy"; + case HIP_API_ID_hipTexRefGetMipMappedArray: return "hipTexRefGetMipMappedArray"; + case HIP_API_ID_hipTexRefGetMipmapLevelBias: return "hipTexRefGetMipmapLevelBias"; + case HIP_API_ID_hipTexRefGetMipmapLevelClamp: return "hipTexRefGetMipmapLevelClamp"; + case HIP_API_ID_hipTexRefSetAddress: return "hipTexRefSetAddress"; + case HIP_API_ID_hipTexRefSetAddress2D: return "hipTexRefSetAddress2D"; + case HIP_API_ID_hipTexRefSetArray: return "hipTexRefSetArray"; + case HIP_API_ID_hipTexRefSetBorderColor: return "hipTexRefSetBorderColor"; + case HIP_API_ID_hipTexRefSetFlags: return "hipTexRefSetFlags"; + case HIP_API_ID_hipTexRefSetFormat: return "hipTexRefSetFormat"; + case HIP_API_ID_hipTexRefSetMaxAnisotropy: return "hipTexRefSetMaxAnisotropy"; + case HIP_API_ID_hipTexRefSetMipmapLevelBias: return "hipTexRefSetMipmapLevelBias"; + case HIP_API_ID_hipTexRefSetMipmapLevelClamp: return "hipTexRefSetMipmapLevelClamp"; + case HIP_API_ID_hipTexRefSetMipmappedArray: return "hipTexRefSetMipmappedArray"; + case HIP_API_ID_hipThreadExchangeStreamCaptureMode: return "hipThreadExchangeStreamCaptureMode"; + case HIP_API_ID_hipUserObjectCreate: return "hipUserObjectCreate"; + case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease"; + case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain"; + case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync"; + }; + return "unknown"; +}; + +#include +// Return the HIP API callback ID for a given name +static inline uint32_t hipApiIdByName(const char* name) { + if (strcmp("__hipPopCallConfiguration", name) == 0) return HIP_API_ID___hipPopCallConfiguration; + if (strcmp("__hipPushCallConfiguration", name) == 0) return HIP_API_ID___hipPushCallConfiguration; + if (strcmp("hipArray3DCreate", name) == 0) return HIP_API_ID_hipArray3DCreate; + if (strcmp("hipArray3DGetDescriptor", name) == 0) return HIP_API_ID_hipArray3DGetDescriptor; + if (strcmp("hipArrayCreate", name) == 0) return HIP_API_ID_hipArrayCreate; + if (strcmp("hipArrayDestroy", name) == 0) return HIP_API_ID_hipArrayDestroy; + if (strcmp("hipArrayGetDescriptor", name) == 0) return HIP_API_ID_hipArrayGetDescriptor; + if (strcmp("hipArrayGetInfo", name) == 0) return HIP_API_ID_hipArrayGetInfo; + if (strcmp("hipChooseDevice", name) == 0) return HIP_API_ID_hipChooseDevice; + if (strcmp("hipConfigureCall", name) == 0) return HIP_API_ID_hipConfigureCall; + if (strcmp("hipCreateSurfaceObject", name) == 0) return HIP_API_ID_hipCreateSurfaceObject; + if (strcmp("hipCtxCreate", name) == 0) return HIP_API_ID_hipCtxCreate; + if (strcmp("hipCtxDestroy", name) == 0) return HIP_API_ID_hipCtxDestroy; + if (strcmp("hipCtxDisablePeerAccess", name) == 0) return HIP_API_ID_hipCtxDisablePeerAccess; + if (strcmp("hipCtxEnablePeerAccess", name) == 0) return HIP_API_ID_hipCtxEnablePeerAccess; + if (strcmp("hipCtxGetApiVersion", name) == 0) return HIP_API_ID_hipCtxGetApiVersion; + if (strcmp("hipCtxGetCacheConfig", name) == 0) return HIP_API_ID_hipCtxGetCacheConfig; + if (strcmp("hipCtxGetCurrent", name) == 0) return HIP_API_ID_hipCtxGetCurrent; + if (strcmp("hipCtxGetDevice", name) == 0) return HIP_API_ID_hipCtxGetDevice; + if (strcmp("hipCtxGetFlags", name) == 0) return HIP_API_ID_hipCtxGetFlags; + if (strcmp("hipCtxGetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxGetSharedMemConfig; + if (strcmp("hipCtxPopCurrent", name) == 0) return HIP_API_ID_hipCtxPopCurrent; + if (strcmp("hipCtxPushCurrent", name) == 0) return HIP_API_ID_hipCtxPushCurrent; + if (strcmp("hipCtxSetCacheConfig", name) == 0) return HIP_API_ID_hipCtxSetCacheConfig; + if (strcmp("hipCtxSetCurrent", name) == 0) return HIP_API_ID_hipCtxSetCurrent; + if (strcmp("hipCtxSetSharedMemConfig", name) == 0) return HIP_API_ID_hipCtxSetSharedMemConfig; + if (strcmp("hipCtxSynchronize", name) == 0) return HIP_API_ID_hipCtxSynchronize; + if (strcmp("hipDestroyExternalMemory", name) == 0) return HIP_API_ID_hipDestroyExternalMemory; + if (strcmp("hipDestroyExternalSemaphore", name) == 0) return HIP_API_ID_hipDestroyExternalSemaphore; + if (strcmp("hipDestroySurfaceObject", name) == 0) return HIP_API_ID_hipDestroySurfaceObject; + if (strcmp("hipDeviceCanAccessPeer", name) == 0) return HIP_API_ID_hipDeviceCanAccessPeer; + if (strcmp("hipDeviceComputeCapability", name) == 0) return HIP_API_ID_hipDeviceComputeCapability; + if (strcmp("hipDeviceDisablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceDisablePeerAccess; + if (strcmp("hipDeviceEnablePeerAccess", name) == 0) return HIP_API_ID_hipDeviceEnablePeerAccess; + if (strcmp("hipDeviceGet", name) == 0) return HIP_API_ID_hipDeviceGet; + if (strcmp("hipDeviceGetAttribute", name) == 0) return HIP_API_ID_hipDeviceGetAttribute; + if (strcmp("hipDeviceGetByPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetByPCIBusId; + if (strcmp("hipDeviceGetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceGetCacheConfig; + if (strcmp("hipDeviceGetDefaultMemPool", name) == 0) return HIP_API_ID_hipDeviceGetDefaultMemPool; + if (strcmp("hipDeviceGetGraphMemAttribute", name) == 0) return HIP_API_ID_hipDeviceGetGraphMemAttribute; + if (strcmp("hipDeviceGetLimit", name) == 0) return HIP_API_ID_hipDeviceGetLimit; + if (strcmp("hipDeviceGetMemPool", name) == 0) return HIP_API_ID_hipDeviceGetMemPool; + if (strcmp("hipDeviceGetName", name) == 0) return HIP_API_ID_hipDeviceGetName; + if (strcmp("hipDeviceGetP2PAttribute", name) == 0) return HIP_API_ID_hipDeviceGetP2PAttribute; + if (strcmp("hipDeviceGetPCIBusId", name) == 0) return HIP_API_ID_hipDeviceGetPCIBusId; + if (strcmp("hipDeviceGetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceGetSharedMemConfig; + if (strcmp("hipDeviceGetStreamPriorityRange", name) == 0) return HIP_API_ID_hipDeviceGetStreamPriorityRange; + if (strcmp("hipDeviceGetUuid", name) == 0) return HIP_API_ID_hipDeviceGetUuid; + if (strcmp("hipDeviceGraphMemTrim", name) == 0) return HIP_API_ID_hipDeviceGraphMemTrim; + if (strcmp("hipDevicePrimaryCtxGetState", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxGetState; + if (strcmp("hipDevicePrimaryCtxRelease", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRelease; + if (strcmp("hipDevicePrimaryCtxReset", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxReset; + if (strcmp("hipDevicePrimaryCtxRetain", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxRetain; + if (strcmp("hipDevicePrimaryCtxSetFlags", name) == 0) return HIP_API_ID_hipDevicePrimaryCtxSetFlags; + if (strcmp("hipDeviceReset", name) == 0) return HIP_API_ID_hipDeviceReset; + if (strcmp("hipDeviceSetCacheConfig", name) == 0) return HIP_API_ID_hipDeviceSetCacheConfig; + if (strcmp("hipDeviceSetGraphMemAttribute", name) == 0) return HIP_API_ID_hipDeviceSetGraphMemAttribute; + if (strcmp("hipDeviceSetLimit", name) == 0) return HIP_API_ID_hipDeviceSetLimit; + if (strcmp("hipDeviceSetMemPool", name) == 0) return HIP_API_ID_hipDeviceSetMemPool; + if (strcmp("hipDeviceSetSharedMemConfig", name) == 0) return HIP_API_ID_hipDeviceSetSharedMemConfig; + if (strcmp("hipDeviceSynchronize", name) == 0) return HIP_API_ID_hipDeviceSynchronize; + if (strcmp("hipDeviceTotalMem", name) == 0) return HIP_API_ID_hipDeviceTotalMem; + if (strcmp("hipDriverGetVersion", name) == 0) return HIP_API_ID_hipDriverGetVersion; + if (strcmp("hipDrvMemcpy2DUnaligned", name) == 0) return HIP_API_ID_hipDrvMemcpy2DUnaligned; + if (strcmp("hipDrvMemcpy3D", name) == 0) return HIP_API_ID_hipDrvMemcpy3D; + if (strcmp("hipDrvMemcpy3DAsync", name) == 0) return HIP_API_ID_hipDrvMemcpy3DAsync; + if (strcmp("hipDrvPointerGetAttributes", name) == 0) return HIP_API_ID_hipDrvPointerGetAttributes; + if (strcmp("hipEventCreate", name) == 0) return HIP_API_ID_hipEventCreate; + if (strcmp("hipEventCreateWithFlags", name) == 0) return HIP_API_ID_hipEventCreateWithFlags; + if (strcmp("hipEventDestroy", name) == 0) return HIP_API_ID_hipEventDestroy; + if (strcmp("hipEventElapsedTime", name) == 0) return HIP_API_ID_hipEventElapsedTime; + if (strcmp("hipEventQuery", name) == 0) return HIP_API_ID_hipEventQuery; + if (strcmp("hipEventRecord", name) == 0) return HIP_API_ID_hipEventRecord; + if (strcmp("hipEventSynchronize", name) == 0) return HIP_API_ID_hipEventSynchronize; + if (strcmp("hipExtGetLinkTypeAndHopCount", name) == 0) return HIP_API_ID_hipExtGetLinkTypeAndHopCount; + if (strcmp("hipExtLaunchKernel", name) == 0) return HIP_API_ID_hipExtLaunchKernel; + if (strcmp("hipExtLaunchMultiKernelMultiDevice", name) == 0) return HIP_API_ID_hipExtLaunchMultiKernelMultiDevice; + if (strcmp("hipExtMallocWithFlags", name) == 0) return HIP_API_ID_hipExtMallocWithFlags; + if (strcmp("hipExtModuleLaunchKernel", name) == 0) return HIP_API_ID_hipExtModuleLaunchKernel; + if (strcmp("hipExtStreamCreateWithCUMask", name) == 0) return HIP_API_ID_hipExtStreamCreateWithCUMask; + if (strcmp("hipExtStreamGetCUMask", name) == 0) return HIP_API_ID_hipExtStreamGetCUMask; + if (strcmp("hipExternalMemoryGetMappedBuffer", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedBuffer; + if (strcmp("hipFree", name) == 0) return HIP_API_ID_hipFree; + if (strcmp("hipFreeArray", name) == 0) return HIP_API_ID_hipFreeArray; + if (strcmp("hipFreeAsync", name) == 0) return HIP_API_ID_hipFreeAsync; + if (strcmp("hipFreeHost", name) == 0) return HIP_API_ID_hipFreeHost; + if (strcmp("hipFreeMipmappedArray", name) == 0) return HIP_API_ID_hipFreeMipmappedArray; + if (strcmp("hipFuncGetAttribute", name) == 0) return HIP_API_ID_hipFuncGetAttribute; + if (strcmp("hipFuncGetAttributes", name) == 0) return HIP_API_ID_hipFuncGetAttributes; + if (strcmp("hipFuncSetAttribute", name) == 0) return HIP_API_ID_hipFuncSetAttribute; + if (strcmp("hipFuncSetCacheConfig", name) == 0) return HIP_API_ID_hipFuncSetCacheConfig; + if (strcmp("hipFuncSetSharedMemConfig", name) == 0) return HIP_API_ID_hipFuncSetSharedMemConfig; + if (strcmp("hipGLGetDevices", name) == 0) return HIP_API_ID_hipGLGetDevices; + if (strcmp("hipGetChannelDesc", name) == 0) return HIP_API_ID_hipGetChannelDesc; + if (strcmp("hipGetDevice", name) == 0) return HIP_API_ID_hipGetDevice; + if (strcmp("hipGetDeviceCount", name) == 0) return HIP_API_ID_hipGetDeviceCount; + if (strcmp("hipGetDeviceFlags", name) == 0) return HIP_API_ID_hipGetDeviceFlags; + if (strcmp("hipGetDeviceProperties", name) == 0) return HIP_API_ID_hipGetDeviceProperties; + if (strcmp("hipGetErrorString", name) == 0) return HIP_API_ID_hipGetErrorString; + if (strcmp("hipGetLastError", name) == 0) return HIP_API_ID_hipGetLastError; + if (strcmp("hipGetMipmappedArrayLevel", name) == 0) return HIP_API_ID_hipGetMipmappedArrayLevel; + if (strcmp("hipGetSymbolAddress", name) == 0) return HIP_API_ID_hipGetSymbolAddress; + if (strcmp("hipGetSymbolSize", name) == 0) return HIP_API_ID_hipGetSymbolSize; + if (strcmp("hipGraphAddChildGraphNode", name) == 0) return HIP_API_ID_hipGraphAddChildGraphNode; + if (strcmp("hipGraphAddDependencies", name) == 0) return HIP_API_ID_hipGraphAddDependencies; + if (strcmp("hipGraphAddEmptyNode", name) == 0) return HIP_API_ID_hipGraphAddEmptyNode; + if (strcmp("hipGraphAddEventRecordNode", name) == 0) return HIP_API_ID_hipGraphAddEventRecordNode; + if (strcmp("hipGraphAddEventWaitNode", name) == 0) return HIP_API_ID_hipGraphAddEventWaitNode; + if (strcmp("hipGraphAddHostNode", name) == 0) return HIP_API_ID_hipGraphAddHostNode; + if (strcmp("hipGraphAddKernelNode", name) == 0) return HIP_API_ID_hipGraphAddKernelNode; + if (strcmp("hipGraphAddMemAllocNode", name) == 0) return HIP_API_ID_hipGraphAddMemAllocNode; + if (strcmp("hipGraphAddMemFreeNode", name) == 0) return HIP_API_ID_hipGraphAddMemFreeNode; + if (strcmp("hipGraphAddMemcpyNode", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNode; + if (strcmp("hipGraphAddMemcpyNode1D", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNode1D; + if (strcmp("hipGraphAddMemcpyNodeFromSymbol", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol; + if (strcmp("hipGraphAddMemcpyNodeToSymbol", name) == 0) return HIP_API_ID_hipGraphAddMemcpyNodeToSymbol; + if (strcmp("hipGraphAddMemsetNode", name) == 0) return HIP_API_ID_hipGraphAddMemsetNode; + if (strcmp("hipGraphChildGraphNodeGetGraph", name) == 0) return HIP_API_ID_hipGraphChildGraphNodeGetGraph; + if (strcmp("hipGraphClone", name) == 0) return HIP_API_ID_hipGraphClone; + if (strcmp("hipGraphCreate", name) == 0) return HIP_API_ID_hipGraphCreate; + if (strcmp("hipGraphDebugDotPrint", name) == 0) return HIP_API_ID_hipGraphDebugDotPrint; + if (strcmp("hipGraphDestroy", name) == 0) return HIP_API_ID_hipGraphDestroy; + if (strcmp("hipGraphDestroyNode", name) == 0) return HIP_API_ID_hipGraphDestroyNode; + if (strcmp("hipGraphEventRecordNodeGetEvent", name) == 0) return HIP_API_ID_hipGraphEventRecordNodeGetEvent; + if (strcmp("hipGraphEventRecordNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphEventRecordNodeSetEvent; + if (strcmp("hipGraphEventWaitNodeGetEvent", name) == 0) return HIP_API_ID_hipGraphEventWaitNodeGetEvent; + if (strcmp("hipGraphEventWaitNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphEventWaitNodeSetEvent; + if (strcmp("hipGraphExecChildGraphNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecChildGraphNodeSetParams; + if (strcmp("hipGraphExecDestroy", name) == 0) return HIP_API_ID_hipGraphExecDestroy; + if (strcmp("hipGraphExecEventRecordNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphExecEventRecordNodeSetEvent; + if (strcmp("hipGraphExecEventWaitNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphExecEventWaitNodeSetEvent; + if (strcmp("hipGraphExecHostNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecHostNodeSetParams; + if (strcmp("hipGraphExecKernelNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecKernelNodeSetParams; + if (strcmp("hipGraphExecMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParams; + if (strcmp("hipGraphExecMemcpyNodeSetParams1D", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D; + if (strcmp("hipGraphExecMemcpyNodeSetParamsFromSymbol", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol; + if (strcmp("hipGraphExecMemcpyNodeSetParamsToSymbol", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol; + if (strcmp("hipGraphExecMemsetNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecMemsetNodeSetParams; + if (strcmp("hipGraphExecUpdate", name) == 0) return HIP_API_ID_hipGraphExecUpdate; + if (strcmp("hipGraphGetEdges", name) == 0) return HIP_API_ID_hipGraphGetEdges; + if (strcmp("hipGraphGetNodes", name) == 0) return HIP_API_ID_hipGraphGetNodes; + if (strcmp("hipGraphGetRootNodes", name) == 0) return HIP_API_ID_hipGraphGetRootNodes; + if (strcmp("hipGraphHostNodeGetParams", name) == 0) return HIP_API_ID_hipGraphHostNodeGetParams; + if (strcmp("hipGraphHostNodeSetParams", name) == 0) return HIP_API_ID_hipGraphHostNodeSetParams; + if (strcmp("hipGraphInstantiate", name) == 0) return HIP_API_ID_hipGraphInstantiate; + if (strcmp("hipGraphInstantiateWithFlags", name) == 0) return HIP_API_ID_hipGraphInstantiateWithFlags; + if (strcmp("hipGraphKernelNodeCopyAttributes", name) == 0) return HIP_API_ID_hipGraphKernelNodeCopyAttributes; + if (strcmp("hipGraphKernelNodeGetAttribute", name) == 0) return HIP_API_ID_hipGraphKernelNodeGetAttribute; + if (strcmp("hipGraphKernelNodeGetParams", name) == 0) return HIP_API_ID_hipGraphKernelNodeGetParams; + if (strcmp("hipGraphKernelNodeSetAttribute", name) == 0) return HIP_API_ID_hipGraphKernelNodeSetAttribute; + if (strcmp("hipGraphKernelNodeSetParams", name) == 0) return HIP_API_ID_hipGraphKernelNodeSetParams; + if (strcmp("hipGraphLaunch", name) == 0) return HIP_API_ID_hipGraphLaunch; + if (strcmp("hipGraphMemAllocNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemAllocNodeGetParams; + if (strcmp("hipGraphMemFreeNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemFreeNodeGetParams; + if (strcmp("hipGraphMemcpyNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeGetParams; + if (strcmp("hipGraphMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParams; + if (strcmp("hipGraphMemcpyNodeSetParams1D", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParams1D; + if (strcmp("hipGraphMemcpyNodeSetParamsFromSymbol", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol; + if (strcmp("hipGraphMemcpyNodeSetParamsToSymbol", name) == 0) return HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol; + if (strcmp("hipGraphMemsetNodeGetParams", name) == 0) return HIP_API_ID_hipGraphMemsetNodeGetParams; + if (strcmp("hipGraphMemsetNodeSetParams", name) == 0) return HIP_API_ID_hipGraphMemsetNodeSetParams; + if (strcmp("hipGraphNodeFindInClone", name) == 0) return HIP_API_ID_hipGraphNodeFindInClone; + if (strcmp("hipGraphNodeGetDependencies", name) == 0) return HIP_API_ID_hipGraphNodeGetDependencies; + if (strcmp("hipGraphNodeGetDependentNodes", name) == 0) return HIP_API_ID_hipGraphNodeGetDependentNodes; + if (strcmp("hipGraphNodeGetEnabled", name) == 0) return HIP_API_ID_hipGraphNodeGetEnabled; + if (strcmp("hipGraphNodeGetType", name) == 0) return HIP_API_ID_hipGraphNodeGetType; + if (strcmp("hipGraphNodeSetEnabled", name) == 0) return HIP_API_ID_hipGraphNodeSetEnabled; + if (strcmp("hipGraphReleaseUserObject", name) == 0) return HIP_API_ID_hipGraphReleaseUserObject; + if (strcmp("hipGraphRemoveDependencies", name) == 0) return HIP_API_ID_hipGraphRemoveDependencies; + if (strcmp("hipGraphRetainUserObject", name) == 0) return HIP_API_ID_hipGraphRetainUserObject; + if (strcmp("hipGraphUpload", name) == 0) return HIP_API_ID_hipGraphUpload; + if (strcmp("hipGraphicsGLRegisterBuffer", name) == 0) return HIP_API_ID_hipGraphicsGLRegisterBuffer; + if (strcmp("hipGraphicsGLRegisterImage", name) == 0) return HIP_API_ID_hipGraphicsGLRegisterImage; + if (strcmp("hipGraphicsMapResources", name) == 0) return HIP_API_ID_hipGraphicsMapResources; + if (strcmp("hipGraphicsResourceGetMappedPointer", name) == 0) return HIP_API_ID_hipGraphicsResourceGetMappedPointer; + if (strcmp("hipGraphicsSubResourceGetMappedArray", name) == 0) return HIP_API_ID_hipGraphicsSubResourceGetMappedArray; + if (strcmp("hipGraphicsUnmapResources", name) == 0) return HIP_API_ID_hipGraphicsUnmapResources; + if (strcmp("hipGraphicsUnregisterResource", name) == 0) return HIP_API_ID_hipGraphicsUnregisterResource; + if (strcmp("hipHccModuleLaunchKernel", name) == 0) return HIP_API_ID_hipHccModuleLaunchKernel; + if (strcmp("hipHostAlloc", name) == 0) return HIP_API_ID_hipHostAlloc; + if (strcmp("hipHostFree", name) == 0) return HIP_API_ID_hipHostFree; + if (strcmp("hipHostGetDevicePointer", name) == 0) return HIP_API_ID_hipHostGetDevicePointer; + if (strcmp("hipHostGetFlags", name) == 0) return HIP_API_ID_hipHostGetFlags; + if (strcmp("hipHostMalloc", name) == 0) return HIP_API_ID_hipHostMalloc; + if (strcmp("hipHostRegister", name) == 0) return HIP_API_ID_hipHostRegister; + if (strcmp("hipHostUnregister", name) == 0) return HIP_API_ID_hipHostUnregister; + if (strcmp("hipImportExternalMemory", name) == 0) return HIP_API_ID_hipImportExternalMemory; + if (strcmp("hipImportExternalSemaphore", name) == 0) return HIP_API_ID_hipImportExternalSemaphore; + if (strcmp("hipInit", name) == 0) return HIP_API_ID_hipInit; + if (strcmp("hipIpcCloseMemHandle", name) == 0) return HIP_API_ID_hipIpcCloseMemHandle; + if (strcmp("hipIpcGetEventHandle", name) == 0) return HIP_API_ID_hipIpcGetEventHandle; + if (strcmp("hipIpcGetMemHandle", name) == 0) return HIP_API_ID_hipIpcGetMemHandle; + if (strcmp("hipIpcOpenEventHandle", name) == 0) return HIP_API_ID_hipIpcOpenEventHandle; + if (strcmp("hipIpcOpenMemHandle", name) == 0) return HIP_API_ID_hipIpcOpenMemHandle; + if (strcmp("hipLaunchByPtr", name) == 0) return HIP_API_ID_hipLaunchByPtr; + if (strcmp("hipLaunchCooperativeKernel", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernel; + if (strcmp("hipLaunchCooperativeKernelMultiDevice", name) == 0) return HIP_API_ID_hipLaunchCooperativeKernelMultiDevice; + if (strcmp("hipLaunchHostFunc", name) == 0) return HIP_API_ID_hipLaunchHostFunc; + if (strcmp("hipLaunchKernel", name) == 0) return HIP_API_ID_hipLaunchKernel; + if (strcmp("hipMalloc", name) == 0) return HIP_API_ID_hipMalloc; + if (strcmp("hipMalloc3D", name) == 0) return HIP_API_ID_hipMalloc3D; + if (strcmp("hipMalloc3DArray", name) == 0) return HIP_API_ID_hipMalloc3DArray; + if (strcmp("hipMallocArray", name) == 0) return HIP_API_ID_hipMallocArray; + if (strcmp("hipMallocAsync", name) == 0) return HIP_API_ID_hipMallocAsync; + if (strcmp("hipMallocFromPoolAsync", name) == 0) return HIP_API_ID_hipMallocFromPoolAsync; + if (strcmp("hipMallocHost", name) == 0) return HIP_API_ID_hipMallocHost; + if (strcmp("hipMallocManaged", name) == 0) return HIP_API_ID_hipMallocManaged; + if (strcmp("hipMallocMipmappedArray", name) == 0) return HIP_API_ID_hipMallocMipmappedArray; + if (strcmp("hipMallocPitch", name) == 0) return HIP_API_ID_hipMallocPitch; + if (strcmp("hipMemAddressFree", name) == 0) return HIP_API_ID_hipMemAddressFree; + if (strcmp("hipMemAddressReserve", name) == 0) return HIP_API_ID_hipMemAddressReserve; + if (strcmp("hipMemAdvise", name) == 0) return HIP_API_ID_hipMemAdvise; + if (strcmp("hipMemAllocHost", name) == 0) return HIP_API_ID_hipMemAllocHost; + if (strcmp("hipMemAllocPitch", name) == 0) return HIP_API_ID_hipMemAllocPitch; + if (strcmp("hipMemCreate", name) == 0) return HIP_API_ID_hipMemCreate; + if (strcmp("hipMemExportToShareableHandle", name) == 0) return HIP_API_ID_hipMemExportToShareableHandle; + if (strcmp("hipMemGetAccess", name) == 0) return HIP_API_ID_hipMemGetAccess; + if (strcmp("hipMemGetAddressRange", name) == 0) return HIP_API_ID_hipMemGetAddressRange; + if (strcmp("hipMemGetAllocationGranularity", name) == 0) return HIP_API_ID_hipMemGetAllocationGranularity; + if (strcmp("hipMemGetAllocationPropertiesFromHandle", name) == 0) return HIP_API_ID_hipMemGetAllocationPropertiesFromHandle; + if (strcmp("hipMemGetInfo", name) == 0) return HIP_API_ID_hipMemGetInfo; + if (strcmp("hipMemImportFromShareableHandle", name) == 0) return HIP_API_ID_hipMemImportFromShareableHandle; + if (strcmp("hipMemMap", name) == 0) return HIP_API_ID_hipMemMap; + if (strcmp("hipMemMapArrayAsync", name) == 0) return HIP_API_ID_hipMemMapArrayAsync; + if (strcmp("hipMemPoolCreate", name) == 0) return HIP_API_ID_hipMemPoolCreate; + if (strcmp("hipMemPoolDestroy", name) == 0) return HIP_API_ID_hipMemPoolDestroy; + if (strcmp("hipMemPoolExportPointer", name) == 0) return HIP_API_ID_hipMemPoolExportPointer; + if (strcmp("hipMemPoolExportToShareableHandle", name) == 0) return HIP_API_ID_hipMemPoolExportToShareableHandle; + if (strcmp("hipMemPoolGetAccess", name) == 0) return HIP_API_ID_hipMemPoolGetAccess; + if (strcmp("hipMemPoolGetAttribute", name) == 0) return HIP_API_ID_hipMemPoolGetAttribute; + if (strcmp("hipMemPoolImportFromShareableHandle", name) == 0) return HIP_API_ID_hipMemPoolImportFromShareableHandle; + if (strcmp("hipMemPoolImportPointer", name) == 0) return HIP_API_ID_hipMemPoolImportPointer; + if (strcmp("hipMemPoolSetAccess", name) == 0) return HIP_API_ID_hipMemPoolSetAccess; + if (strcmp("hipMemPoolSetAttribute", name) == 0) return HIP_API_ID_hipMemPoolSetAttribute; + if (strcmp("hipMemPoolTrimTo", name) == 0) return HIP_API_ID_hipMemPoolTrimTo; + if (strcmp("hipMemPrefetchAsync", name) == 0) return HIP_API_ID_hipMemPrefetchAsync; + if (strcmp("hipMemPtrGetInfo", name) == 0) return HIP_API_ID_hipMemPtrGetInfo; + if (strcmp("hipMemRangeGetAttribute", name) == 0) return HIP_API_ID_hipMemRangeGetAttribute; + if (strcmp("hipMemRangeGetAttributes", name) == 0) return HIP_API_ID_hipMemRangeGetAttributes; + if (strcmp("hipMemRelease", name) == 0) return HIP_API_ID_hipMemRelease; + if (strcmp("hipMemRetainAllocationHandle", name) == 0) return HIP_API_ID_hipMemRetainAllocationHandle; + if (strcmp("hipMemSetAccess", name) == 0) return HIP_API_ID_hipMemSetAccess; + if (strcmp("hipMemUnmap", name) == 0) return HIP_API_ID_hipMemUnmap; + if (strcmp("hipMemcpy", name) == 0) return HIP_API_ID_hipMemcpy; + if (strcmp("hipMemcpy2D", name) == 0) return HIP_API_ID_hipMemcpy2D; + if (strcmp("hipMemcpy2DAsync", name) == 0) return HIP_API_ID_hipMemcpy2DAsync; + if (strcmp("hipMemcpy2DFromArray", name) == 0) return HIP_API_ID_hipMemcpy2DFromArray; + if (strcmp("hipMemcpy2DFromArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DFromArrayAsync; + if (strcmp("hipMemcpy2DToArray", name) == 0) return HIP_API_ID_hipMemcpy2DToArray; + if (strcmp("hipMemcpy2DToArrayAsync", name) == 0) return HIP_API_ID_hipMemcpy2DToArrayAsync; + if (strcmp("hipMemcpy3D", name) == 0) return HIP_API_ID_hipMemcpy3D; + if (strcmp("hipMemcpy3DAsync", name) == 0) return HIP_API_ID_hipMemcpy3DAsync; + if (strcmp("hipMemcpyAsync", name) == 0) return HIP_API_ID_hipMemcpyAsync; + if (strcmp("hipMemcpyAtoH", name) == 0) return HIP_API_ID_hipMemcpyAtoH; + if (strcmp("hipMemcpyDtoD", name) == 0) return HIP_API_ID_hipMemcpyDtoD; + if (strcmp("hipMemcpyDtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoDAsync; + if (strcmp("hipMemcpyDtoH", name) == 0) return HIP_API_ID_hipMemcpyDtoH; + if (strcmp("hipMemcpyDtoHAsync", name) == 0) return HIP_API_ID_hipMemcpyDtoHAsync; + if (strcmp("hipMemcpyFromArray", name) == 0) return HIP_API_ID_hipMemcpyFromArray; + if (strcmp("hipMemcpyFromSymbol", name) == 0) return HIP_API_ID_hipMemcpyFromSymbol; + if (strcmp("hipMemcpyFromSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyFromSymbolAsync; + if (strcmp("hipMemcpyHtoA", name) == 0) return HIP_API_ID_hipMemcpyHtoA; + if (strcmp("hipMemcpyHtoD", name) == 0) return HIP_API_ID_hipMemcpyHtoD; + if (strcmp("hipMemcpyHtoDAsync", name) == 0) return HIP_API_ID_hipMemcpyHtoDAsync; + if (strcmp("hipMemcpyParam2D", name) == 0) return HIP_API_ID_hipMemcpyParam2D; + if (strcmp("hipMemcpyParam2DAsync", name) == 0) return HIP_API_ID_hipMemcpyParam2DAsync; + if (strcmp("hipMemcpyPeer", name) == 0) return HIP_API_ID_hipMemcpyPeer; + if (strcmp("hipMemcpyPeerAsync", name) == 0) return HIP_API_ID_hipMemcpyPeerAsync; + if (strcmp("hipMemcpyToArray", name) == 0) return HIP_API_ID_hipMemcpyToArray; + if (strcmp("hipMemcpyToSymbol", name) == 0) return HIP_API_ID_hipMemcpyToSymbol; + if (strcmp("hipMemcpyToSymbolAsync", name) == 0) return HIP_API_ID_hipMemcpyToSymbolAsync; + if (strcmp("hipMemcpyWithStream", name) == 0) return HIP_API_ID_hipMemcpyWithStream; + if (strcmp("hipMemset", name) == 0) return HIP_API_ID_hipMemset; + if (strcmp("hipMemset2D", name) == 0) return HIP_API_ID_hipMemset2D; + if (strcmp("hipMemset2DAsync", name) == 0) return HIP_API_ID_hipMemset2DAsync; + if (strcmp("hipMemset3D", name) == 0) return HIP_API_ID_hipMemset3D; + if (strcmp("hipMemset3DAsync", name) == 0) return HIP_API_ID_hipMemset3DAsync; + if (strcmp("hipMemsetAsync", name) == 0) return HIP_API_ID_hipMemsetAsync; + if (strcmp("hipMemsetD16", name) == 0) return HIP_API_ID_hipMemsetD16; + if (strcmp("hipMemsetD16Async", name) == 0) return HIP_API_ID_hipMemsetD16Async; + if (strcmp("hipMemsetD32", name) == 0) return HIP_API_ID_hipMemsetD32; + if (strcmp("hipMemsetD32Async", name) == 0) return HIP_API_ID_hipMemsetD32Async; + if (strcmp("hipMemsetD8", name) == 0) return HIP_API_ID_hipMemsetD8; + if (strcmp("hipMemsetD8Async", name) == 0) return HIP_API_ID_hipMemsetD8Async; + if (strcmp("hipMipmappedArrayCreate", name) == 0) return HIP_API_ID_hipMipmappedArrayCreate; + if (strcmp("hipMipmappedArrayDestroy", name) == 0) return HIP_API_ID_hipMipmappedArrayDestroy; + if (strcmp("hipMipmappedArrayGetLevel", name) == 0) return HIP_API_ID_hipMipmappedArrayGetLevel; + if (strcmp("hipModuleGetFunction", name) == 0) return HIP_API_ID_hipModuleGetFunction; + if (strcmp("hipModuleGetGlobal", name) == 0) return HIP_API_ID_hipModuleGetGlobal; + if (strcmp("hipModuleGetTexRef", name) == 0) return HIP_API_ID_hipModuleGetTexRef; + if (strcmp("hipModuleLaunchCooperativeKernel", name) == 0) return HIP_API_ID_hipModuleLaunchCooperativeKernel; + if (strcmp("hipModuleLaunchCooperativeKernelMultiDevice", name) == 0) return HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice; + if (strcmp("hipModuleLaunchKernel", name) == 0) return HIP_API_ID_hipModuleLaunchKernel; + if (strcmp("hipModuleLoad", name) == 0) return HIP_API_ID_hipModuleLoad; + if (strcmp("hipModuleLoadData", name) == 0) return HIP_API_ID_hipModuleLoadData; + if (strcmp("hipModuleLoadDataEx", name) == 0) return HIP_API_ID_hipModuleLoadDataEx; + if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor; + if (strcmp("hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + if (strcmp("hipModuleOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize; + if (strcmp("hipModuleOccupancyMaxPotentialBlockSizeWithFlags", name) == 0) return HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags; + if (strcmp("hipModuleUnload", name) == 0) return HIP_API_ID_hipModuleUnload; + if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessor", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor; + if (strcmp("hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", name) == 0) return HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + if (strcmp("hipOccupancyMaxPotentialBlockSize", name) == 0) return HIP_API_ID_hipOccupancyMaxPotentialBlockSize; + if (strcmp("hipPeekAtLastError", name) == 0) return HIP_API_ID_hipPeekAtLastError; + if (strcmp("hipPointerGetAttribute", name) == 0) return HIP_API_ID_hipPointerGetAttribute; + if (strcmp("hipPointerGetAttributes", name) == 0) return HIP_API_ID_hipPointerGetAttributes; + if (strcmp("hipPointerSetAttribute", name) == 0) return HIP_API_ID_hipPointerSetAttribute; + if (strcmp("hipProfilerStart", name) == 0) return HIP_API_ID_hipProfilerStart; + if (strcmp("hipProfilerStop", name) == 0) return HIP_API_ID_hipProfilerStop; + if (strcmp("hipRuntimeGetVersion", name) == 0) return HIP_API_ID_hipRuntimeGetVersion; + if (strcmp("hipSetDevice", name) == 0) return HIP_API_ID_hipSetDevice; + if (strcmp("hipSetDeviceFlags", name) == 0) return HIP_API_ID_hipSetDeviceFlags; + if (strcmp("hipSetupArgument", name) == 0) return HIP_API_ID_hipSetupArgument; + if (strcmp("hipSignalExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipSignalExternalSemaphoresAsync; + if (strcmp("hipStreamAddCallback", name) == 0) return HIP_API_ID_hipStreamAddCallback; + if (strcmp("hipStreamAttachMemAsync", name) == 0) return HIP_API_ID_hipStreamAttachMemAsync; + if (strcmp("hipStreamBeginCapture", name) == 0) return HIP_API_ID_hipStreamBeginCapture; + if (strcmp("hipStreamCreate", name) == 0) return HIP_API_ID_hipStreamCreate; + if (strcmp("hipStreamCreateWithFlags", name) == 0) return HIP_API_ID_hipStreamCreateWithFlags; + if (strcmp("hipStreamCreateWithPriority", name) == 0) return HIP_API_ID_hipStreamCreateWithPriority; + if (strcmp("hipStreamDestroy", name) == 0) return HIP_API_ID_hipStreamDestroy; + if (strcmp("hipStreamEndCapture", name) == 0) return HIP_API_ID_hipStreamEndCapture; + if (strcmp("hipStreamGetCaptureInfo", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo; + if (strcmp("hipStreamGetCaptureInfo_v2", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo_v2; + if (strcmp("hipStreamGetDevice", name) == 0) return HIP_API_ID_hipStreamGetDevice; + if (strcmp("hipStreamGetFlags", name) == 0) return HIP_API_ID_hipStreamGetFlags; + if (strcmp("hipStreamGetPriority", name) == 0) return HIP_API_ID_hipStreamGetPriority; + if (strcmp("hipStreamIsCapturing", name) == 0) return HIP_API_ID_hipStreamIsCapturing; + if (strcmp("hipStreamQuery", name) == 0) return HIP_API_ID_hipStreamQuery; + if (strcmp("hipStreamSynchronize", name) == 0) return HIP_API_ID_hipStreamSynchronize; + if (strcmp("hipStreamUpdateCaptureDependencies", name) == 0) return HIP_API_ID_hipStreamUpdateCaptureDependencies; + if (strcmp("hipStreamWaitEvent", name) == 0) return HIP_API_ID_hipStreamWaitEvent; + if (strcmp("hipStreamWaitValue32", name) == 0) return HIP_API_ID_hipStreamWaitValue32; + if (strcmp("hipStreamWaitValue64", name) == 0) return HIP_API_ID_hipStreamWaitValue64; + if (strcmp("hipStreamWriteValue32", name) == 0) return HIP_API_ID_hipStreamWriteValue32; + if (strcmp("hipStreamWriteValue64", name) == 0) return HIP_API_ID_hipStreamWriteValue64; + if (strcmp("hipTexRefGetAddress", name) == 0) return HIP_API_ID_hipTexRefGetAddress; + if (strcmp("hipTexRefGetFlags", name) == 0) return HIP_API_ID_hipTexRefGetFlags; + if (strcmp("hipTexRefGetFormat", name) == 0) return HIP_API_ID_hipTexRefGetFormat; + if (strcmp("hipTexRefGetMaxAnisotropy", name) == 0) return HIP_API_ID_hipTexRefGetMaxAnisotropy; + if (strcmp("hipTexRefGetMipMappedArray", name) == 0) return HIP_API_ID_hipTexRefGetMipMappedArray; + if (strcmp("hipTexRefGetMipmapLevelBias", name) == 0) return HIP_API_ID_hipTexRefGetMipmapLevelBias; + if (strcmp("hipTexRefGetMipmapLevelClamp", name) == 0) return HIP_API_ID_hipTexRefGetMipmapLevelClamp; + if (strcmp("hipTexRefSetAddress", name) == 0) return HIP_API_ID_hipTexRefSetAddress; + if (strcmp("hipTexRefSetAddress2D", name) == 0) return HIP_API_ID_hipTexRefSetAddress2D; + if (strcmp("hipTexRefSetArray", name) == 0) return HIP_API_ID_hipTexRefSetArray; + if (strcmp("hipTexRefSetBorderColor", name) == 0) return HIP_API_ID_hipTexRefSetBorderColor; + if (strcmp("hipTexRefSetFlags", name) == 0) return HIP_API_ID_hipTexRefSetFlags; + if (strcmp("hipTexRefSetFormat", name) == 0) return HIP_API_ID_hipTexRefSetFormat; + if (strcmp("hipTexRefSetMaxAnisotropy", name) == 0) return HIP_API_ID_hipTexRefSetMaxAnisotropy; + if (strcmp("hipTexRefSetMipmapLevelBias", name) == 0) return HIP_API_ID_hipTexRefSetMipmapLevelBias; + if (strcmp("hipTexRefSetMipmapLevelClamp", name) == 0) return HIP_API_ID_hipTexRefSetMipmapLevelClamp; + if (strcmp("hipTexRefSetMipmappedArray", name) == 0) return HIP_API_ID_hipTexRefSetMipmappedArray; + if (strcmp("hipThreadExchangeStreamCaptureMode", name) == 0) return HIP_API_ID_hipThreadExchangeStreamCaptureMode; + if (strcmp("hipUserObjectCreate", name) == 0) return HIP_API_ID_hipUserObjectCreate; + if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease; + if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain; + if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync; + return HIP_API_ID_NONE; +} + +// HIP API callbacks data structures +typedef struct hip_api_data_s { + uint64_t correlation_id; + uint32_t phase; + union { + struct { + dim3* gridDim; + dim3 gridDim__val; + dim3* blockDim; + dim3 blockDim__val; + size_t* sharedMem; + size_t sharedMem__val; + hipStream_t* stream; + hipStream_t stream__val; + } __hipPopCallConfiguration; + struct { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } __hipPushCallConfiguration; + struct { + hipArray** array; + hipArray* array__val; + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray; + HIP_ARRAY3D_DESCRIPTOR pAllocateArray__val; + } hipArray3DCreate; + struct { + HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor; + HIP_ARRAY3D_DESCRIPTOR pArrayDescriptor__val; + hipArray* array; + hipArray array__val; + } hipArray3DGetDescriptor; + struct { + hipArray** pHandle; + hipArray* pHandle__val; + const HIP_ARRAY_DESCRIPTOR* pAllocateArray; + HIP_ARRAY_DESCRIPTOR pAllocateArray__val; + } hipArrayCreate; + struct { + hipArray* array; + hipArray array__val; + } hipArrayDestroy; + struct { + HIP_ARRAY_DESCRIPTOR* pArrayDescriptor; + HIP_ARRAY_DESCRIPTOR pArrayDescriptor__val; + hipArray* array; + hipArray array__val; + } hipArrayGetDescriptor; + struct { + hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipExtent* extent; + hipExtent extent__val; + unsigned int* flags; + unsigned int flags__val; + hipArray* array; + hipArray array__val; + } hipArrayGetInfo; + struct { + int* device; + int device__val; + const hipDeviceProp_t* prop; + hipDeviceProp_t prop__val; + } hipChooseDevice; + struct { + dim3 gridDim; + dim3 blockDim; + size_t sharedMem; + hipStream_t stream; + } hipConfigureCall; + struct { + hipSurfaceObject_t* pSurfObject; + hipSurfaceObject_t pSurfObject__val; + const hipResourceDesc* pResDesc; + hipResourceDesc pResDesc__val; + } hipCreateSurfaceObject; + struct { + hipCtx_t* ctx; + hipCtx_t ctx__val; + unsigned int flags; + hipDevice_t device; + } hipCtxCreate; + struct { + hipCtx_t ctx; + } hipCtxDestroy; + struct { + hipCtx_t peerCtx; + } hipCtxDisablePeerAccess; + struct { + hipCtx_t peerCtx; + unsigned int flags; + } hipCtxEnablePeerAccess; + struct { + hipCtx_t ctx; + int* apiVersion; + int apiVersion__val; + } hipCtxGetApiVersion; + struct { + hipFuncCache_t* cacheConfig; + hipFuncCache_t cacheConfig__val; + } hipCtxGetCacheConfig; + struct { + hipCtx_t* ctx; + hipCtx_t ctx__val; + } hipCtxGetCurrent; + struct { + hipDevice_t* device; + hipDevice_t device__val; + } hipCtxGetDevice; + struct { + unsigned int* flags; + unsigned int flags__val; + } hipCtxGetFlags; + struct { + hipSharedMemConfig* pConfig; + hipSharedMemConfig pConfig__val; + } hipCtxGetSharedMemConfig; + struct { + hipCtx_t* ctx; + hipCtx_t ctx__val; + } hipCtxPopCurrent; + struct { + hipCtx_t ctx; + } hipCtxPushCurrent; + struct { + hipFuncCache_t cacheConfig; + } hipCtxSetCacheConfig; + struct { + hipCtx_t ctx; + } hipCtxSetCurrent; + struct { + hipSharedMemConfig config; + } hipCtxSetSharedMemConfig; + struct { + hipExternalMemory_t extMem; + } hipDestroyExternalMemory; + struct { + hipExternalSemaphore_t extSem; + } hipDestroyExternalSemaphore; + struct { + hipSurfaceObject_t surfaceObject; + } hipDestroySurfaceObject; + struct { + int* canAccessPeer; + int canAccessPeer__val; + int deviceId; + int peerDeviceId; + } hipDeviceCanAccessPeer; + struct { + int* major; + int major__val; + int* minor; + int minor__val; + hipDevice_t device; + } hipDeviceComputeCapability; + struct { + int peerDeviceId; + } hipDeviceDisablePeerAccess; + struct { + int peerDeviceId; + unsigned int flags; + } hipDeviceEnablePeerAccess; + struct { + hipDevice_t* device; + hipDevice_t device__val; + int ordinal; + } hipDeviceGet; + struct { + int* pi; + int pi__val; + hipDeviceAttribute_t attr; + int deviceId; + } hipDeviceGetAttribute; + struct { + int* device; + int device__val; + const char* pciBusId; + char pciBusId__val; + } hipDeviceGetByPCIBusId; + struct { + hipFuncCache_t* cacheConfig; + hipFuncCache_t cacheConfig__val; + } hipDeviceGetCacheConfig; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + int device; + } hipDeviceGetDefaultMemPool; + struct { + int device; + hipGraphMemAttributeType attr; + void* value; + } hipDeviceGetGraphMemAttribute; + struct { + size_t* pValue; + size_t pValue__val; + enum hipLimit_t limit; + } hipDeviceGetLimit; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + int device; + } hipDeviceGetMemPool; + struct { + char* name; + char name__val; + int len; + hipDevice_t device; + } hipDeviceGetName; + struct { + int* value; + int value__val; + hipDeviceP2PAttr attr; + int srcDevice; + int dstDevice; + } hipDeviceGetP2PAttribute; + struct { + char* pciBusId; + char pciBusId__val; + int len; + int device; + } hipDeviceGetPCIBusId; + struct { + hipSharedMemConfig* pConfig; + hipSharedMemConfig pConfig__val; + } hipDeviceGetSharedMemConfig; + struct { + int* leastPriority; + int leastPriority__val; + int* greatestPriority; + int greatestPriority__val; + } hipDeviceGetStreamPriorityRange; + struct { + hipUUID* uuid; + hipUUID uuid__val; + hipDevice_t device; + } hipDeviceGetUuid; + struct { + int device; + } hipDeviceGraphMemTrim; + struct { + hipDevice_t dev; + unsigned int* flags; + unsigned int flags__val; + int* active; + int active__val; + } hipDevicePrimaryCtxGetState; + struct { + hipDevice_t dev; + } hipDevicePrimaryCtxRelease; + struct { + hipDevice_t dev; + } hipDevicePrimaryCtxReset; + struct { + hipCtx_t* pctx; + hipCtx_t pctx__val; + hipDevice_t dev; + } hipDevicePrimaryCtxRetain; + struct { + hipDevice_t dev; + unsigned int flags; + } hipDevicePrimaryCtxSetFlags; + struct { + hipFuncCache_t cacheConfig; + } hipDeviceSetCacheConfig; + struct { + int device; + hipGraphMemAttributeType attr; + void* value; + } hipDeviceSetGraphMemAttribute; + struct { + enum hipLimit_t limit; + size_t value; + } hipDeviceSetLimit; + struct { + int device; + hipMemPool_t mem_pool; + } hipDeviceSetMemPool; + struct { + hipSharedMemConfig config; + } hipDeviceSetSharedMemConfig; + struct { + size_t* bytes; + size_t bytes__val; + hipDevice_t device; + } hipDeviceTotalMem; + struct { + int* driverVersion; + int driverVersion__val; + } hipDriverGetVersion; + struct { + const hip_Memcpy2D* pCopy; + hip_Memcpy2D pCopy__val; + } hipDrvMemcpy2DUnaligned; + struct { + const HIP_MEMCPY3D* pCopy; + HIP_MEMCPY3D pCopy__val; + } hipDrvMemcpy3D; + struct { + const HIP_MEMCPY3D* pCopy; + HIP_MEMCPY3D pCopy__val; + hipStream_t stream; + } hipDrvMemcpy3DAsync; + struct { + unsigned int numAttributes; + hipPointer_attribute* attributes; + hipPointer_attribute attributes__val; + void** data; + void* data__val; + hipDeviceptr_t ptr; + } hipDrvPointerGetAttributes; + struct { + hipEvent_t* event; + hipEvent_t event__val; + } hipEventCreate; + struct { + hipEvent_t* event; + hipEvent_t event__val; + unsigned int flags; + } hipEventCreateWithFlags; + struct { + hipEvent_t event; + } hipEventDestroy; + struct { + float* ms; + float ms__val; + hipEvent_t start; + hipEvent_t stop; + } hipEventElapsedTime; + struct { + hipEvent_t event; + } hipEventQuery; + struct { + hipEvent_t event; + hipStream_t stream; + } hipEventRecord; + struct { + hipEvent_t event; + } hipEventSynchronize; + struct { + int device1; + int device2; + unsigned int* linktype; + unsigned int linktype__val; + unsigned int* hopcount; + unsigned int hopcount__val; + } hipExtGetLinkTypeAndHopCount; + struct { + const void* function_address; + dim3 numBlocks; + dim3 dimBlocks; + void** args; + void* args__val; + size_t sharedMemBytes; + hipStream_t stream; + hipEvent_t startEvent; + hipEvent_t stopEvent; + int flags; + } hipExtLaunchKernel; + struct { + hipLaunchParams* launchParamsList; + hipLaunchParams launchParamsList__val; + int numDevices; + unsigned int flags; + } hipExtLaunchMultiKernelMultiDevice; + struct { + void** ptr; + void* ptr__val; + size_t sizeBytes; + unsigned int flags; + } hipExtMallocWithFlags; + struct { + hipFunction_t f; + unsigned int globalWorkSizeX; + unsigned int globalWorkSizeY; + unsigned int globalWorkSizeZ; + unsigned int localWorkSizeX; + unsigned int localWorkSizeY; + unsigned int localWorkSizeZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void* kernelParams__val; + void** extra; + void* extra__val; + hipEvent_t startEvent; + hipEvent_t stopEvent; + unsigned int flags; + } hipExtModuleLaunchKernel; + struct { + hipStream_t* stream; + hipStream_t stream__val; + unsigned int cuMaskSize; + const unsigned int* cuMask; + unsigned int cuMask__val; + } hipExtStreamCreateWithCUMask; + struct { + hipStream_t stream; + unsigned int cuMaskSize; + unsigned int* cuMask; + unsigned int cuMask__val; + } hipExtStreamGetCUMask; + struct { + void** devPtr; + void* devPtr__val; + hipExternalMemory_t extMem; + const hipExternalMemoryBufferDesc* bufferDesc; + hipExternalMemoryBufferDesc bufferDesc__val; + } hipExternalMemoryGetMappedBuffer; + struct { + void* ptr; + } hipFree; + struct { + hipArray* array; + hipArray array__val; + } hipFreeArray; + struct { + void* dev_ptr; + hipStream_t stream; + } hipFreeAsync; + struct { + void* ptr; + } hipFreeHost; + struct { + hipMipmappedArray_t mipmappedArray; + } hipFreeMipmappedArray; + struct { + int* value; + int value__val; + hipFunction_attribute attrib; + hipFunction_t hfunc; + } hipFuncGetAttribute; + struct { + hipFuncAttributes* attr; + hipFuncAttributes attr__val; + const void* func; + } hipFuncGetAttributes; + struct { + const void* func; + hipFuncAttribute attr; + int value; + } hipFuncSetAttribute; + struct { + const void* func; + hipFuncCache_t config; + } hipFuncSetCacheConfig; + struct { + const void* func; + hipSharedMemConfig config; + } hipFuncSetSharedMemConfig; + struct { + unsigned int* pHipDeviceCount; + unsigned int pHipDeviceCount__val; + int* pHipDevices; + int pHipDevices__val; + unsigned int hipDeviceCount; + hipGLDeviceList deviceList; + } hipGLGetDevices; + struct { + hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipArray_const_t array; + } hipGetChannelDesc; + struct { + int* deviceId; + int deviceId__val; + } hipGetDevice; + struct { + int* count; + int count__val; + } hipGetDeviceCount; + struct { + unsigned int* flags; + unsigned int flags__val; + } hipGetDeviceFlags; + struct { + hipDeviceProp_t* props; + hipDeviceProp_t props__val; + hipDevice_t device; + } hipGetDeviceProperties; + struct { + hipArray_t* levelArray; + hipArray_t levelArray__val; + hipMipmappedArray_const_t mipmappedArray; + unsigned int level; + } hipGetMipmappedArrayLevel; + struct { + void** devPtr; + void* devPtr__val; + const void* symbol; + } hipGetSymbolAddress; + struct { + size_t* size; + size_t size__val; + const void* symbol; + } hipGetSymbolSize; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipGraph_t childGraph; + } hipGraphAddChildGraphNode; + struct { + hipGraph_t graph; + const hipGraphNode_t* from; + hipGraphNode_t from__val; + const hipGraphNode_t* to; + hipGraphNode_t to__val; + size_t numDependencies; + } hipGraphAddDependencies; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + } hipGraphAddEmptyNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipEvent_t event; + } hipGraphAddEventRecordNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipEvent_t event; + } hipGraphAddEventWaitNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphAddHostNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphAddKernelNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + hipMemAllocNodeParams* pNodeParams; + hipMemAllocNodeParams pNodeParams__val; + } hipGraphAddMemAllocNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + void* dev_ptr; + } hipGraphAddMemFreeNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipMemcpy3DParms* pCopyParams; + hipMemcpy3DParms pCopyParams__val; + } hipGraphAddMemcpyNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphAddMemcpyNode1D; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphAddMemcpyNodeFromSymbol; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphAddMemcpyNodeToSymbol; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipMemsetParams* pMemsetParams; + hipMemsetParams pMemsetParams__val; + } hipGraphAddMemsetNode; + struct { + hipGraphNode_t node; + hipGraph_t* pGraph; + hipGraph_t pGraph__val; + } hipGraphChildGraphNodeGetGraph; + struct { + hipGraph_t* pGraphClone; + hipGraph_t pGraphClone__val; + hipGraph_t originalGraph; + } hipGraphClone; + struct { + hipGraph_t* pGraph; + hipGraph_t pGraph__val; + unsigned int flags; + } hipGraphCreate; + struct { + hipGraph_t graph; + const char* path; + char path__val; + unsigned int flags; + } hipGraphDebugDotPrint; + struct { + hipGraph_t graph; + } hipGraphDestroy; + struct { + hipGraphNode_t node; + } hipGraphDestroyNode; + struct { + hipGraphNode_t node; + hipEvent_t* event_out; + hipEvent_t event_out__val; + } hipGraphEventRecordNodeGetEvent; + struct { + hipGraphNode_t node; + hipEvent_t event; + } hipGraphEventRecordNodeSetEvent; + struct { + hipGraphNode_t node; + hipEvent_t* event_out; + hipEvent_t event_out__val; + } hipGraphEventWaitNodeGetEvent; + struct { + hipGraphNode_t node; + hipEvent_t event; + } hipGraphEventWaitNodeSetEvent; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + hipGraph_t childGraph; + } hipGraphExecChildGraphNodeSetParams; + struct { + hipGraphExec_t graphExec; + } hipGraphExecDestroy; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + hipEvent_t event; + } hipGraphExecEventRecordNodeSetEvent; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + hipEvent_t event; + } hipGraphExecEventWaitNodeSetEvent; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphExecHostNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphExecKernelNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + hipMemcpy3DParms* pNodeParams; + hipMemcpy3DParms pNodeParams__val; + } hipGraphExecMemcpyNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParams1D; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParamsFromSymbol; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphExecMemcpyNodeSetParamsToSymbol; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t node; + const hipMemsetParams* pNodeParams; + hipMemsetParams pNodeParams__val; + } hipGraphExecMemsetNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraph_t hGraph; + hipGraphNode_t* hErrorNode_out; + hipGraphNode_t hErrorNode_out__val; + hipGraphExecUpdateResult* updateResult_out; + hipGraphExecUpdateResult updateResult_out__val; + } hipGraphExecUpdate; + struct { + hipGraph_t graph; + hipGraphNode_t* from; + hipGraphNode_t from__val; + hipGraphNode_t* to; + hipGraphNode_t to__val; + size_t* numEdges; + size_t numEdges__val; + } hipGraphGetEdges; + struct { + hipGraph_t graph; + hipGraphNode_t* nodes; + hipGraphNode_t nodes__val; + size_t* numNodes; + size_t numNodes__val; + } hipGraphGetNodes; + struct { + hipGraph_t graph; + hipGraphNode_t* pRootNodes; + hipGraphNode_t pRootNodes__val; + size_t* pNumRootNodes; + size_t pNumRootNodes__val; + } hipGraphGetRootNodes; + struct { + hipGraphNode_t node; + hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphHostNodeGetParams; + struct { + hipGraphNode_t node; + const hipHostNodeParams* pNodeParams; + hipHostNodeParams pNodeParams__val; + } hipGraphHostNodeSetParams; + struct { + hipGraphExec_t* pGraphExec; + hipGraphExec_t pGraphExec__val; + hipGraph_t graph; + hipGraphNode_t* pErrorNode; + hipGraphNode_t pErrorNode__val; + char* pLogBuffer; + char pLogBuffer__val; + size_t bufferSize; + } hipGraphInstantiate; + struct { + hipGraphExec_t* pGraphExec; + hipGraphExec_t pGraphExec__val; + hipGraph_t graph; + unsigned long long flags; + } hipGraphInstantiateWithFlags; + struct { + hipGraphNode_t hSrc; + hipGraphNode_t hDst; + } hipGraphKernelNodeCopyAttributes; + struct { + hipGraphNode_t hNode; + hipKernelNodeAttrID attr; + hipKernelNodeAttrValue* value; + hipKernelNodeAttrValue value__val; + } hipGraphKernelNodeGetAttribute; + struct { + hipGraphNode_t node; + hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphKernelNodeGetParams; + struct { + hipGraphNode_t hNode; + hipKernelNodeAttrID attr; + const hipKernelNodeAttrValue* value; + hipKernelNodeAttrValue value__val; + } hipGraphKernelNodeSetAttribute; + struct { + hipGraphNode_t node; + const hipKernelNodeParams* pNodeParams; + hipKernelNodeParams pNodeParams__val; + } hipGraphKernelNodeSetParams; + struct { + hipGraphExec_t graphExec; + hipStream_t stream; + } hipGraphLaunch; + struct { + hipGraphNode_t node; + hipMemAllocNodeParams* pNodeParams; + hipMemAllocNodeParams pNodeParams__val; + } hipGraphMemAllocNodeGetParams; + struct { + hipGraphNode_t node; + void* dev_ptr; + } hipGraphMemFreeNodeGetParams; + struct { + hipGraphNode_t node; + hipMemcpy3DParms* pNodeParams; + hipMemcpy3DParms pNodeParams__val; + } hipGraphMemcpyNodeGetParams; + struct { + hipGraphNode_t node; + const hipMemcpy3DParms* pNodeParams; + hipMemcpy3DParms pNodeParams__val; + } hipGraphMemcpyNodeSetParams; + struct { + hipGraphNode_t node; + void* dst; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParams1D; + struct { + hipGraphNode_t node; + void* dst; + const void* symbol; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParamsFromSymbol; + struct { + hipGraphNode_t node; + const void* symbol; + const void* src; + size_t count; + size_t offset; + hipMemcpyKind kind; + } hipGraphMemcpyNodeSetParamsToSymbol; + struct { + hipGraphNode_t node; + hipMemsetParams* pNodeParams; + hipMemsetParams pNodeParams__val; + } hipGraphMemsetNodeGetParams; + struct { + hipGraphNode_t node; + const hipMemsetParams* pNodeParams; + hipMemsetParams pNodeParams__val; + } hipGraphMemsetNodeSetParams; + struct { + hipGraphNode_t* pNode; + hipGraphNode_t pNode__val; + hipGraphNode_t originalNode; + hipGraph_t clonedGraph; + } hipGraphNodeFindInClone; + struct { + hipGraphNode_t node; + hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t* pNumDependencies; + size_t pNumDependencies__val; + } hipGraphNodeGetDependencies; + struct { + hipGraphNode_t node; + hipGraphNode_t* pDependentNodes; + hipGraphNode_t pDependentNodes__val; + size_t* pNumDependentNodes; + size_t pNumDependentNodes__val; + } hipGraphNodeGetDependentNodes; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + unsigned int* isEnabled; + unsigned int isEnabled__val; + } hipGraphNodeGetEnabled; + struct { + hipGraphNode_t node; + hipGraphNodeType* pType; + hipGraphNodeType pType__val; + } hipGraphNodeGetType; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + unsigned int isEnabled; + } hipGraphNodeSetEnabled; + struct { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + } hipGraphReleaseUserObject; + struct { + hipGraph_t graph; + const hipGraphNode_t* from; + hipGraphNode_t from__val; + const hipGraphNode_t* to; + hipGraphNode_t to__val; + size_t numDependencies; + } hipGraphRemoveDependencies; + struct { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + unsigned int flags; + } hipGraphRetainUserObject; + struct { + hipGraphExec_t graphExec; + hipStream_t stream; + } hipGraphUpload; + struct { + hipGraphicsResource** resource; + hipGraphicsResource* resource__val; + GLuint buffer; + unsigned int flags; + } hipGraphicsGLRegisterBuffer; + struct { + hipGraphicsResource** resource; + hipGraphicsResource* resource__val; + GLuint image; + GLenum target; + unsigned int flags; + } hipGraphicsGLRegisterImage; + struct { + int count; + hipGraphicsResource_t* resources; + hipGraphicsResource_t resources__val; + hipStream_t stream; + } hipGraphicsMapResources; + struct { + void** devPtr; + void* devPtr__val; + size_t* size; + size_t size__val; + hipGraphicsResource_t resource; + } hipGraphicsResourceGetMappedPointer; + struct { + hipArray_t* array; + hipArray_t array__val; + hipGraphicsResource_t resource; + unsigned int arrayIndex; + unsigned int mipLevel; + } hipGraphicsSubResourceGetMappedArray; + struct { + int count; + hipGraphicsResource_t* resources; + hipGraphicsResource_t resources__val; + hipStream_t stream; + } hipGraphicsUnmapResources; + struct { + hipGraphicsResource_t resource; + } hipGraphicsUnregisterResource; + struct { + hipFunction_t f; + unsigned int globalWorkSizeX; + unsigned int globalWorkSizeY; + unsigned int globalWorkSizeZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + size_t sharedMemBytes; + hipStream_t hStream; + void** kernelParams; + void* kernelParams__val; + void** extra; + void* extra__val; + hipEvent_t startEvent; + hipEvent_t stopEvent; + } hipHccModuleLaunchKernel; + struct { + void** ptr; + void* ptr__val; + size_t size; + unsigned int flags; + } hipHostAlloc; + struct { + void* ptr; + } hipHostFree; + struct { + void** devPtr; + void* devPtr__val; + void* hstPtr; + unsigned int flags; + } hipHostGetDevicePointer; + struct { + unsigned int* flagsPtr; + unsigned int flagsPtr__val; + void* hostPtr; + } hipHostGetFlags; + struct { + void** ptr; + void* ptr__val; + size_t size; + unsigned int flags; + } hipHostMalloc; + struct { + void* hostPtr; + size_t sizeBytes; + unsigned int flags; + } hipHostRegister; + struct { + void* hostPtr; + } hipHostUnregister; + struct { + hipExternalMemory_t* extMem_out; + hipExternalMemory_t extMem_out__val; + const hipExternalMemoryHandleDesc* memHandleDesc; + hipExternalMemoryHandleDesc memHandleDesc__val; + } hipImportExternalMemory; + struct { + hipExternalSemaphore_t* extSem_out; + hipExternalSemaphore_t extSem_out__val; + const hipExternalSemaphoreHandleDesc* semHandleDesc; + hipExternalSemaphoreHandleDesc semHandleDesc__val; + } hipImportExternalSemaphore; + struct { + unsigned int flags; + } hipInit; + struct { + void* devPtr; + } hipIpcCloseMemHandle; + struct { + hipIpcEventHandle_t* handle; + hipIpcEventHandle_t handle__val; + hipEvent_t event; + } hipIpcGetEventHandle; + struct { + hipIpcMemHandle_t* handle; + hipIpcMemHandle_t handle__val; + void* devPtr; + } hipIpcGetMemHandle; + struct { + hipEvent_t* event; + hipEvent_t event__val; + hipIpcEventHandle_t handle; + } hipIpcOpenEventHandle; + struct { + void** devPtr; + void* devPtr__val; + hipIpcMemHandle_t handle; + unsigned int flags; + } hipIpcOpenMemHandle; + struct { + const void* hostFunction; + } hipLaunchByPtr; + struct { + const void* f; + dim3 gridDim; + dim3 blockDimX; + void** kernelParams; + void* kernelParams__val; + unsigned int sharedMemBytes; + hipStream_t stream; + } hipLaunchCooperativeKernel; + struct { + hipLaunchParams* launchParamsList; + hipLaunchParams launchParamsList__val; + int numDevices; + unsigned int flags; + } hipLaunchCooperativeKernelMultiDevice; + struct { + hipStream_t stream; + hipHostFn_t fn; + void* userData; + } hipLaunchHostFunc; + struct { + const void* function_address; + dim3 numBlocks; + dim3 dimBlocks; + void** args; + void* args__val; + size_t sharedMemBytes; + hipStream_t stream; + } hipLaunchKernel; + struct { + void** ptr; + void* ptr__val; + size_t size; + } hipMalloc; + struct { + hipPitchedPtr* pitchedDevPtr; + hipPitchedPtr pitchedDevPtr__val; + hipExtent extent; + } hipMalloc3D; + struct { + hipArray_t* array; + hipArray_t array__val; + const hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipExtent extent; + unsigned int flags; + } hipMalloc3DArray; + struct { + hipArray** array; + hipArray* array__val; + const hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + size_t width; + size_t height; + unsigned int flags; + } hipMallocArray; + struct { + void** dev_ptr; + void* dev_ptr__val; + size_t size; + hipStream_t stream; + } hipMallocAsync; + struct { + void** dev_ptr; + void* dev_ptr__val; + size_t size; + hipMemPool_t mem_pool; + hipStream_t stream; + } hipMallocFromPoolAsync; + struct { + void** ptr; + void* ptr__val; + size_t size; + } hipMallocHost; + struct { + void** dev_ptr; + void* dev_ptr__val; + size_t size; + unsigned int flags; + } hipMallocManaged; + struct { + hipMipmappedArray_t* mipmappedArray; + hipMipmappedArray_t mipmappedArray__val; + const hipChannelFormatDesc* desc; + hipChannelFormatDesc desc__val; + hipExtent extent; + unsigned int numLevels; + unsigned int flags; + } hipMallocMipmappedArray; + struct { + void** ptr; + void* ptr__val; + size_t* pitch; + size_t pitch__val; + size_t width; + size_t height; + } hipMallocPitch; + struct { + void* devPtr; + size_t size; + } hipMemAddressFree; + struct { + void** ptr; + void* ptr__val; + size_t size; + size_t alignment; + void* addr; + unsigned long long flags; + } hipMemAddressReserve; + struct { + const void* dev_ptr; + size_t count; + hipMemoryAdvise advice; + int device; + } hipMemAdvise; + struct { + void** ptr; + void* ptr__val; + size_t size; + } hipMemAllocHost; + struct { + hipDeviceptr_t* dptr; + hipDeviceptr_t dptr__val; + size_t* pitch; + size_t pitch__val; + size_t widthInBytes; + size_t height; + unsigned int elementSizeBytes; + } hipMemAllocPitch; + struct { + hipMemGenericAllocationHandle_t* handle; + hipMemGenericAllocationHandle_t handle__val; + size_t size; + const hipMemAllocationProp* prop; + hipMemAllocationProp prop__val; + unsigned long long flags; + } hipMemCreate; + struct { + void* shareableHandle; + hipMemGenericAllocationHandle_t handle; + hipMemAllocationHandleType handleType; + unsigned long long flags; + } hipMemExportToShareableHandle; + struct { + unsigned long long* flags; + unsigned long long flags__val; + const hipMemLocation* location; + hipMemLocation location__val; + void* ptr; + } hipMemGetAccess; + struct { + hipDeviceptr_t* pbase; + hipDeviceptr_t pbase__val; + size_t* psize; + size_t psize__val; + hipDeviceptr_t dptr; + } hipMemGetAddressRange; + struct { + size_t* granularity; + size_t granularity__val; + const hipMemAllocationProp* prop; + hipMemAllocationProp prop__val; + hipMemAllocationGranularity_flags option; + } hipMemGetAllocationGranularity; + struct { + hipMemAllocationProp* prop; + hipMemAllocationProp prop__val; + hipMemGenericAllocationHandle_t handle; + } hipMemGetAllocationPropertiesFromHandle; + struct { + size_t* free; + size_t free__val; + size_t* total; + size_t total__val; + } hipMemGetInfo; + struct { + hipMemGenericAllocationHandle_t* handle; + hipMemGenericAllocationHandle_t handle__val; + void* osHandle; + hipMemAllocationHandleType shHandleType; + } hipMemImportFromShareableHandle; + struct { + void* ptr; + size_t size; + size_t offset; + hipMemGenericAllocationHandle_t handle; + unsigned long long flags; + } hipMemMap; + struct { + hipArrayMapInfo* mapInfoList; + hipArrayMapInfo mapInfoList__val; + unsigned int count; + hipStream_t stream; + } hipMemMapArrayAsync; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + const hipMemPoolProps* pool_props; + hipMemPoolProps pool_props__val; + } hipMemPoolCreate; + struct { + hipMemPool_t mem_pool; + } hipMemPoolDestroy; + struct { + hipMemPoolPtrExportData* export_data; + hipMemPoolPtrExportData export_data__val; + void* dev_ptr; + } hipMemPoolExportPointer; + struct { + void* shared_handle; + hipMemPool_t mem_pool; + hipMemAllocationHandleType handle_type; + unsigned int flags; + } hipMemPoolExportToShareableHandle; + struct { + hipMemAccessFlags* flags; + hipMemAccessFlags flags__val; + hipMemPool_t mem_pool; + hipMemLocation* location; + hipMemLocation location__val; + } hipMemPoolGetAccess; + struct { + hipMemPool_t mem_pool; + hipMemPoolAttr attr; + void* value; + } hipMemPoolGetAttribute; + struct { + hipMemPool_t* mem_pool; + hipMemPool_t mem_pool__val; + void* shared_handle; + hipMemAllocationHandleType handle_type; + unsigned int flags; + } hipMemPoolImportFromShareableHandle; + struct { + void** dev_ptr; + void* dev_ptr__val; + hipMemPool_t mem_pool; + hipMemPoolPtrExportData* export_data; + hipMemPoolPtrExportData export_data__val; + } hipMemPoolImportPointer; + struct { + hipMemPool_t mem_pool; + const hipMemAccessDesc* desc_list; + hipMemAccessDesc desc_list__val; + size_t count; + } hipMemPoolSetAccess; + struct { + hipMemPool_t mem_pool; + hipMemPoolAttr attr; + void* value; + } hipMemPoolSetAttribute; + struct { + hipMemPool_t mem_pool; + size_t min_bytes_to_hold; + } hipMemPoolTrimTo; + struct { + const void* dev_ptr; + size_t count; + int device; + hipStream_t stream; + } hipMemPrefetchAsync; + struct { + void* ptr; + size_t* size; + size_t size__val; + } hipMemPtrGetInfo; + struct { + void* data; + size_t data_size; + hipMemRangeAttribute attribute; + const void* dev_ptr; + size_t count; + } hipMemRangeGetAttribute; + struct { + void** data; + void* data__val; + size_t* data_sizes; + size_t data_sizes__val; + hipMemRangeAttribute* attributes; + hipMemRangeAttribute attributes__val; + size_t num_attributes; + const void* dev_ptr; + size_t count; + } hipMemRangeGetAttributes; + struct { + hipMemGenericAllocationHandle_t handle; + } hipMemRelease; + struct { + hipMemGenericAllocationHandle_t* handle; + hipMemGenericAllocationHandle_t handle__val; + void* addr; + } hipMemRetainAllocationHandle; + struct { + void* ptr; + size_t size; + const hipMemAccessDesc* desc; + hipMemAccessDesc desc__val; + size_t count; + } hipMemSetAccess; + struct { + void* ptr; + size_t size; + } hipMemUnmap; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + } hipMemcpy; + struct { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2D; + struct { + void* dst; + size_t dpitch; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DAsync; + struct { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffset; + size_t hOffset; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DFromArray; + struct { + void* dst; + size_t dpitch; + hipArray_const_t src; + size_t wOffset; + size_t hOffset; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DFromArrayAsync; + struct { + hipArray* dst; + hipArray dst__val; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + } hipMemcpy2DToArray; + struct { + hipArray* dst; + hipArray dst__val; + size_t wOffset; + size_t hOffset; + const void* src; + size_t spitch; + size_t width; + size_t height; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpy2DToArrayAsync; + struct { + const hipMemcpy3DParms* p; + hipMemcpy3DParms p__val; + } hipMemcpy3D; + struct { + const hipMemcpy3DParms* p; + hipMemcpy3DParms p__val; + hipStream_t stream; + } hipMemcpy3DAsync; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyAsync; + struct { + void* dst; + hipArray* srcArray; + hipArray srcArray__val; + size_t srcOffset; + size_t count; + } hipMemcpyAtoH; + struct { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoD; + struct { + hipDeviceptr_t dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoDAsync; + struct { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + } hipMemcpyDtoH; + struct { + void* dst; + hipDeviceptr_t src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyDtoHAsync; + struct { + void* dst; + hipArray_const_t srcArray; + size_t wOffset; + size_t hOffset; + size_t count; + hipMemcpyKind kind; + } hipMemcpyFromArray; + struct { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyFromSymbol; + struct { + void* dst; + const void* symbol; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyFromSymbolAsync; + struct { + hipArray* dstArray; + hipArray dstArray__val; + size_t dstOffset; + const void* srcHost; + size_t count; + } hipMemcpyHtoA; + struct { + hipDeviceptr_t dst; + void* src; + size_t sizeBytes; + } hipMemcpyHtoD; + struct { + hipDeviceptr_t dst; + void* src; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyHtoDAsync; + struct { + const hip_Memcpy2D* pCopy; + hip_Memcpy2D pCopy__val; + } hipMemcpyParam2D; + struct { + const hip_Memcpy2D* pCopy; + hip_Memcpy2D pCopy__val; + hipStream_t stream; + } hipMemcpyParam2DAsync; + struct { + void* dst; + int dstDeviceId; + const void* src; + int srcDeviceId; + size_t sizeBytes; + } hipMemcpyPeer; + struct { + void* dst; + int dstDeviceId; + const void* src; + int srcDevice; + size_t sizeBytes; + hipStream_t stream; + } hipMemcpyPeerAsync; + struct { + hipArray* dst; + hipArray dst__val; + size_t wOffset; + size_t hOffset; + const void* src; + size_t count; + hipMemcpyKind kind; + } hipMemcpyToArray; + struct { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + } hipMemcpyToSymbol; + struct { + const void* symbol; + const void* src; + size_t sizeBytes; + size_t offset; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyToSymbolAsync; + struct { + void* dst; + const void* src; + size_t sizeBytes; + hipMemcpyKind kind; + hipStream_t stream; + } hipMemcpyWithStream; + struct { + void* dst; + int value; + size_t sizeBytes; + } hipMemset; + struct { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + } hipMemset2D; + struct { + void* dst; + size_t pitch; + int value; + size_t width; + size_t height; + hipStream_t stream; + } hipMemset2DAsync; + struct { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + } hipMemset3D; + struct { + hipPitchedPtr pitchedDevPtr; + int value; + hipExtent extent; + hipStream_t stream; + } hipMemset3DAsync; + struct { + void* dst; + int value; + size_t sizeBytes; + hipStream_t stream; + } hipMemsetAsync; + struct { + hipDeviceptr_t dest; + unsigned short value; + size_t count; + } hipMemsetD16; + struct { + hipDeviceptr_t dest; + unsigned short value; + size_t count; + hipStream_t stream; + } hipMemsetD16Async; + struct { + hipDeviceptr_t dest; + int value; + size_t count; + } hipMemsetD32; + struct { + hipDeviceptr_t dst; + int value; + size_t count; + hipStream_t stream; + } hipMemsetD32Async; + struct { + hipDeviceptr_t dest; + unsigned char value; + size_t count; + } hipMemsetD8; + struct { + hipDeviceptr_t dest; + unsigned char value; + size_t count; + hipStream_t stream; + } hipMemsetD8Async; + struct { + hipMipmappedArray_t* pHandle; + hipMipmappedArray_t pHandle__val; + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc; + HIP_ARRAY3D_DESCRIPTOR pMipmappedArrayDesc__val; + unsigned int numMipmapLevels; + } hipMipmappedArrayCreate; + struct { + hipMipmappedArray_t hMipmappedArray; + } hipMipmappedArrayDestroy; + struct { + hipArray_t* pLevelArray; + hipArray_t pLevelArray__val; + hipMipmappedArray_t hMipMappedArray; + unsigned int level; + } hipMipmappedArrayGetLevel; + struct { + hipFunction_t* function; + hipFunction_t function__val; + hipModule_t module; + const char* kname; + char kname__val; + } hipModuleGetFunction; + struct { + hipDeviceptr_t* dptr; + hipDeviceptr_t dptr__val; + size_t* bytes; + size_t bytes__val; + hipModule_t hmod; + const char* name; + char name__val; + } hipModuleGetGlobal; + struct { + textureReference** texRef; + textureReference* texRef__val; + hipModule_t hmod; + const char* name; + char name__val; + } hipModuleGetTexRef; + struct { + hipFunction_t f; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + hipStream_t stream; + void** kernelParams; + void* kernelParams__val; + } hipModuleLaunchCooperativeKernel; + struct { + hipFunctionLaunchParams* launchParamsList; + hipFunctionLaunchParams launchParamsList__val; + unsigned int numDevices; + unsigned int flags; + } hipModuleLaunchCooperativeKernelMultiDevice; + struct { + hipFunction_t f; + unsigned int gridDimX; + unsigned int gridDimY; + unsigned int gridDimZ; + unsigned int blockDimX; + unsigned int blockDimY; + unsigned int blockDimZ; + unsigned int sharedMemBytes; + hipStream_t stream; + void** kernelParams; + void* kernelParams__val; + void** extra; + void* extra__val; + } hipModuleLaunchKernel; + struct { + hipModule_t* module; + hipModule_t module__val; + const char* fname; + char fname__val; + } hipModuleLoad; + struct { + hipModule_t* module; + hipModule_t module__val; + const void* image; + } hipModuleLoadData; + struct { + hipModule_t* module; + hipModule_t module__val; + const void* image; + unsigned int numOptions; + hipJitOption* options; + hipJitOption options__val; + void** optionsValues; + void* optionsValues__val; + } hipModuleLoadDataEx; + struct { + int* numBlocks; + int numBlocks__val; + hipFunction_t f; + int blockSize; + size_t dynSharedMemPerBlk; + } hipModuleOccupancyMaxActiveBlocksPerMultiprocessor; + struct { + int* numBlocks; + int numBlocks__val; + hipFunction_t f; + int blockSize; + size_t dynSharedMemPerBlk; + unsigned int flags; + } hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + struct { + int* gridSize; + int gridSize__val; + int* blockSize; + int blockSize__val; + hipFunction_t f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + } hipModuleOccupancyMaxPotentialBlockSize; + struct { + int* gridSize; + int gridSize__val; + int* blockSize; + int blockSize__val; + hipFunction_t f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + unsigned int flags; + } hipModuleOccupancyMaxPotentialBlockSizeWithFlags; + struct { + hipModule_t module; + } hipModuleUnload; + struct { + int* numBlocks; + int numBlocks__val; + const void* f; + int blockSize; + size_t dynamicSMemSize; + } hipOccupancyMaxActiveBlocksPerMultiprocessor; + struct { + int* numBlocks; + int numBlocks__val; + const void* f; + int blockSize; + size_t dynamicSMemSize; + unsigned int flags; + } hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + struct { + int* gridSize; + int gridSize__val; + int* blockSize; + int blockSize__val; + const void* f; + size_t dynSharedMemPerBlk; + int blockSizeLimit; + } hipOccupancyMaxPotentialBlockSize; + struct { + void* data; + hipPointer_attribute attribute; + hipDeviceptr_t ptr; + } hipPointerGetAttribute; + struct { + hipPointerAttribute_t* attributes; + hipPointerAttribute_t attributes__val; + const void* ptr; + } hipPointerGetAttributes; + struct { + const void* value; + hipPointer_attribute attribute; + hipDeviceptr_t ptr; + } hipPointerSetAttribute; + struct { + int* runtimeVersion; + int runtimeVersion__val; + } hipRuntimeGetVersion; + struct { + int deviceId; + } hipSetDevice; + struct { + unsigned int flags; + } hipSetDeviceFlags; + struct { + const void* arg; + size_t size; + size_t offset; + } hipSetupArgument; + struct { + const hipExternalSemaphore_t* extSemArray; + hipExternalSemaphore_t extSemArray__val; + const hipExternalSemaphoreSignalParams* paramsArray; + hipExternalSemaphoreSignalParams paramsArray__val; + unsigned int numExtSems; + hipStream_t stream; + } hipSignalExternalSemaphoresAsync; + struct { + hipStream_t stream; + hipStreamCallback_t callback; + void* userData; + unsigned int flags; + } hipStreamAddCallback; + struct { + hipStream_t stream; + void* dev_ptr; + size_t length; + unsigned int flags; + } hipStreamAttachMemAsync; + struct { + hipStream_t stream; + hipStreamCaptureMode mode; + } hipStreamBeginCapture; + struct { + hipStream_t* stream; + hipStream_t stream__val; + } hipStreamCreate; + struct { + hipStream_t* stream; + hipStream_t stream__val; + unsigned int flags; + } hipStreamCreateWithFlags; + struct { + hipStream_t* stream; + hipStream_t stream__val; + unsigned int flags; + int priority; + } hipStreamCreateWithPriority; + struct { + hipStream_t stream; + } hipStreamDestroy; + struct { + hipStream_t stream; + hipGraph_t* pGraph; + hipGraph_t pGraph__val; + } hipStreamEndCapture; + struct { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + hipStreamCaptureStatus pCaptureStatus__val; + unsigned long long* pId; + unsigned long long pId__val; + } hipStreamGetCaptureInfo; + struct { + hipStream_t stream; + hipStreamCaptureStatus* captureStatus_out; + hipStreamCaptureStatus captureStatus_out__val; + unsigned long long* id_out; + unsigned long long id_out__val; + hipGraph_t* graph_out; + hipGraph_t graph_out__val; + const hipGraphNode_t** dependencies_out; + const hipGraphNode_t* dependencies_out__val; + size_t* numDependencies_out; + size_t numDependencies_out__val; + } hipStreamGetCaptureInfo_v2; + struct { + hipStream_t stream; + hipDevice_t* device; + hipDevice_t device__val; + } hipStreamGetDevice; + struct { + hipStream_t stream; + unsigned int* flags; + unsigned int flags__val; + } hipStreamGetFlags; + struct { + hipStream_t stream; + int* priority; + int priority__val; + } hipStreamGetPriority; + struct { + hipStream_t stream; + hipStreamCaptureStatus* pCaptureStatus; + hipStreamCaptureStatus pCaptureStatus__val; + } hipStreamIsCapturing; + struct { + hipStream_t stream; + } hipStreamQuery; + struct { + hipStream_t stream; + } hipStreamSynchronize; + struct { + hipStream_t stream; + hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + size_t numDependencies; + unsigned int flags; + } hipStreamUpdateCaptureDependencies; + struct { + hipStream_t stream; + hipEvent_t event; + unsigned int flags; + } hipStreamWaitEvent; + struct { + hipStream_t stream; + void* ptr; + unsigned int value; + unsigned int flags; + unsigned int mask; + } hipStreamWaitValue32; + struct { + hipStream_t stream; + void* ptr; + uint64_t value; + unsigned int flags; + uint64_t mask; + } hipStreamWaitValue64; + struct { + hipStream_t stream; + void* ptr; + unsigned int value; + unsigned int flags; + } hipStreamWriteValue32; + struct { + hipStream_t stream; + void* ptr; + uint64_t value; + unsigned int flags; + } hipStreamWriteValue64; + struct { + hipDeviceptr_t* dev_ptr; + hipDeviceptr_t dev_ptr__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetAddress; + struct { + unsigned int* pFlags; + unsigned int pFlags__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetFlags; + struct { + hipArray_Format* pFormat; + hipArray_Format pFormat__val; + int* pNumChannels; + int pNumChannels__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetFormat; + struct { + int* pmaxAnsio; + int pmaxAnsio__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMaxAnisotropy; + struct { + hipMipmappedArray_t* pArray; + hipMipmappedArray_t pArray__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMipMappedArray; + struct { + float* pbias; + float pbias__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMipmapLevelBias; + struct { + float* pminMipmapLevelClamp; + float pminMipmapLevelClamp__val; + float* pmaxMipmapLevelClamp; + float pmaxMipmapLevelClamp__val; + const textureReference* texRef; + textureReference texRef__val; + } hipTexRefGetMipmapLevelClamp; + struct { + size_t* ByteOffset; + size_t ByteOffset__val; + textureReference* texRef; + textureReference texRef__val; + hipDeviceptr_t dptr; + size_t bytes; + } hipTexRefSetAddress; + struct { + textureReference* texRef; + textureReference texRef__val; + const HIP_ARRAY_DESCRIPTOR* desc; + HIP_ARRAY_DESCRIPTOR desc__val; + hipDeviceptr_t dptr; + size_t Pitch; + } hipTexRefSetAddress2D; + struct { + textureReference* tex; + textureReference tex__val; + hipArray_const_t array; + unsigned int flags; + } hipTexRefSetArray; + struct { + textureReference* texRef; + textureReference texRef__val; + float* pBorderColor; + float pBorderColor__val; + } hipTexRefSetBorderColor; + struct { + textureReference* texRef; + textureReference texRef__val; + unsigned int Flags; + } hipTexRefSetFlags; + struct { + textureReference* texRef; + textureReference texRef__val; + hipArray_Format fmt; + int NumPackedComponents; + } hipTexRefSetFormat; + struct { + textureReference* texRef; + textureReference texRef__val; + unsigned int maxAniso; + } hipTexRefSetMaxAnisotropy; + struct { + textureReference* texRef; + textureReference texRef__val; + float bias; + } hipTexRefSetMipmapLevelBias; + struct { + textureReference* texRef; + textureReference texRef__val; + float minMipMapLevelClamp; + float maxMipMapLevelClamp; + } hipTexRefSetMipmapLevelClamp; + struct { + textureReference* texRef; + textureReference texRef__val; + hipMipmappedArray* mipmappedArray; + hipMipmappedArray mipmappedArray__val; + unsigned int Flags; + } hipTexRefSetMipmappedArray; + struct { + hipStreamCaptureMode* mode; + hipStreamCaptureMode mode__val; + } hipThreadExchangeStreamCaptureMode; + struct { + hipUserObject_t* object_out; + hipUserObject_t object_out__val; + void* ptr; + hipHostFn_t destroy; + unsigned int initialRefcount; + unsigned int flags; + } hipUserObjectCreate; + struct { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRelease; + struct { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRetain; + struct { + const hipExternalSemaphore_t* extSemArray; + hipExternalSemaphore_t extSemArray__val; + const hipExternalSemaphoreWaitParams* paramsArray; + hipExternalSemaphoreWaitParams paramsArray__val; + unsigned int numExtSems; + hipStream_t stream; + } hipWaitExternalSemaphoresAsync; + } args; + uint64_t *phase_data; +} hip_api_data_t; + +// HIP API callbacks args data filling macros +// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')] +#define INIT___hipPopCallConfiguration_CB_ARGS_DATA(cb_data) { \ + cb_data.args.__hipPopCallConfiguration.gridDim = (dim3*)gridDim; \ + cb_data.args.__hipPopCallConfiguration.blockDim = (dim3*)blockDim; \ + cb_data.args.__hipPopCallConfiguration.sharedMem = (size_t*)sharedMem; \ + cb_data.args.__hipPopCallConfiguration.stream = (hipStream_t*)stream; \ +}; +// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] +#define INIT___hipPushCallConfiguration_CB_ARGS_DATA(cb_data) { \ + cb_data.args.__hipPushCallConfiguration.gridDim = (dim3)gridDim; \ + cb_data.args.__hipPushCallConfiguration.blockDim = (dim3)blockDim; \ + cb_data.args.__hipPushCallConfiguration.sharedMem = (size_t)sharedMem; \ + cb_data.args.__hipPushCallConfiguration.stream = (hipStream_t)stream; \ +}; +// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] +#define INIT_hipArray3DCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArray3DCreate.array = (hipArray**)array; \ + cb_data.args.hipArray3DCreate.pAllocateArray = (const HIP_ARRAY3D_DESCRIPTOR*)pAllocateArray; \ +}; +// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] +#define INIT_hipArray3DGetDescriptor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArray3DGetDescriptor.pArrayDescriptor = (HIP_ARRAY3D_DESCRIPTOR*)pArrayDescriptor; \ + cb_data.args.hipArray3DGetDescriptor.array = (hipArray*)array; \ +}; +// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] +#define INIT_hipArrayCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayCreate.pHandle = (hipArray**)array; \ + cb_data.args.hipArrayCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \ +}; +// hipArrayDestroy[('hipArray*', 'array')] +#define INIT_hipArrayDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayDestroy.array = (hipArray*)array; \ +}; +// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] +#define INIT_hipArrayGetDescriptor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayGetDescriptor.pArrayDescriptor = (HIP_ARRAY_DESCRIPTOR*)pArrayDescriptor; \ + cb_data.args.hipArrayGetDescriptor.array = (hipArray*)array; \ +}; +// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray*', 'array')] +#define INIT_hipArrayGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipArrayGetInfo.desc = (hipChannelFormatDesc*)desc; \ + cb_data.args.hipArrayGetInfo.extent = (hipExtent*)extent; \ + cb_data.args.hipArrayGetInfo.flags = (unsigned int*)flags; \ + cb_data.args.hipArrayGetInfo.array = (hipArray*)array; \ +}; +// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')] +#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipChooseDevice.device = (int*)device; \ + cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)properties; \ +}; +// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] +#define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipConfigureCall.gridDim = (dim3)gridDim; \ + cb_data.args.hipConfigureCall.blockDim = (dim3)blockDim; \ + cb_data.args.hipConfigureCall.sharedMem = (size_t)sharedMem; \ + cb_data.args.hipConfigureCall.stream = (hipStream_t)stream; \ +}; +// hipCreateSurfaceObject[('hipSurfaceObject_t*', 'pSurfObject'), ('const hipResourceDesc*', 'pResDesc')] +#define INIT_hipCreateSurfaceObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCreateSurfaceObject.pSurfObject = (hipSurfaceObject_t*)pSurfObject; \ + cb_data.args.hipCreateSurfaceObject.pResDesc = (const hipResourceDesc*)pResDesc; \ +}; +// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')] +#define INIT_hipCtxCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxCreate.ctx = (hipCtx_t*)ctx; \ + cb_data.args.hipCtxCreate.flags = (unsigned int)flags; \ + cb_data.args.hipCtxCreate.device = (hipDevice_t)device; \ +}; +// hipCtxDestroy[('hipCtx_t', 'ctx')] +#define INIT_hipCtxDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxDestroy.ctx = (hipCtx_t)ctx; \ +}; +// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')] +#define INIT_hipCtxDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxDisablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ +}; +// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')] +#define INIT_hipCtxEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxEnablePeerAccess.peerCtx = (hipCtx_t)peerCtx; \ + cb_data.args.hipCtxEnablePeerAccess.flags = (unsigned int)flags; \ +}; +// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('int*', 'apiVersion')] +#define INIT_hipCtxGetApiVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetApiVersion.ctx = (hipCtx_t)ctx; \ + cb_data.args.hipCtxGetApiVersion.apiVersion = (int*)apiVersion; \ +}; +// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] +#define INIT_hipCtxGetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ +}; +// hipCtxGetCurrent[('hipCtx_t*', 'ctx')] +#define INIT_hipCtxGetCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetCurrent.ctx = (hipCtx_t*)ctx; \ +}; +// hipCtxGetDevice[('hipDevice_t*', 'device')] +#define INIT_hipCtxGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetDevice.device = (hipDevice_t*)device; \ +}; +// hipCtxGetFlags[('unsigned int*', 'flags')] +#define INIT_hipCtxGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetFlags.flags = (unsigned int*)flags; \ +}; +// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] +#define INIT_hipCtxGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ +}; +// hipCtxPopCurrent[('hipCtx_t*', 'ctx')] +#define INIT_hipCtxPopCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxPopCurrent.ctx = (hipCtx_t*)ctx; \ +}; +// hipCtxPushCurrent[('hipCtx_t', 'ctx')] +#define INIT_hipCtxPushCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxPushCurrent.ctx = (hipCtx_t)ctx; \ +}; +// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] +#define INIT_hipCtxSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ +}; +// hipCtxSetCurrent[('hipCtx_t', 'ctx')] +#define INIT_hipCtxSetCurrent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetCurrent.ctx = (hipCtx_t)ctx; \ +}; +// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')] +#define INIT_hipCtxSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipCtxSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +// hipCtxSynchronize[] +#define INIT_hipCtxSynchronize_CB_ARGS_DATA(cb_data) { \ +}; +// hipDestroyExternalMemory[('hipExternalMemory_t', 'extMem')] +#define INIT_hipDestroyExternalMemory_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDestroyExternalMemory.extMem = (hipExternalMemory_t)extMem; \ +}; +// hipDestroyExternalSemaphore[('hipExternalSemaphore_t', 'extSem')] +#define INIT_hipDestroyExternalSemaphore_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDestroyExternalSemaphore.extSem = (hipExternalSemaphore_t)extSem; \ +}; +// hipDestroySurfaceObject[('hipSurfaceObject_t', 'surfaceObject')] +#define INIT_hipDestroySurfaceObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDestroySurfaceObject.surfaceObject = (hipSurfaceObject_t)surfaceObject; \ +}; +// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')] +#define INIT_hipDeviceCanAccessPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceCanAccessPeer.canAccessPeer = (int*)canAccess; \ + cb_data.args.hipDeviceCanAccessPeer.deviceId = (int)deviceId; \ + cb_data.args.hipDeviceCanAccessPeer.peerDeviceId = (int)peerDeviceId; \ +}; +// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceComputeCapability_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceComputeCapability.major = (int*)major; \ + cb_data.args.hipDeviceComputeCapability.minor = (int*)minor; \ + cb_data.args.hipDeviceComputeCapability.device = (hipDevice_t)device; \ +}; +// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')] +#define INIT_hipDeviceDisablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceDisablePeerAccess.peerDeviceId = (int)peerDeviceId; \ +}; +// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')] +#define INIT_hipDeviceEnablePeerAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceEnablePeerAccess.peerDeviceId = (int)peerDeviceId; \ + cb_data.args.hipDeviceEnablePeerAccess.flags = (unsigned int)flags; \ +}; +// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')] +#define INIT_hipDeviceGet_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGet.device = (hipDevice_t*)device; \ + cb_data.args.hipDeviceGet.ordinal = (int)deviceId; \ +}; +// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')] +#define INIT_hipDeviceGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetAttribute.pi = (int*)pi; \ + cb_data.args.hipDeviceGetAttribute.attr = (hipDeviceAttribute_t)attr; \ + cb_data.args.hipDeviceGetAttribute.deviceId = (int)device; \ +}; +// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')] +#define INIT_hipDeviceGetByPCIBusId_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetByPCIBusId.device = (int*)device; \ + cb_data.args.hipDeviceGetByPCIBusId.pciBusId = (pciBusIdstr) ? strdup(pciBusIdstr) : NULL; \ +}; +// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] +#define INIT_hipDeviceGetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetCacheConfig.cacheConfig = (hipFuncCache_t*)cacheConfig; \ +}; +// hipDeviceGetDefaultMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] +#define INIT_hipDeviceGetDefaultMemPool_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetDefaultMemPool.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipDeviceGetDefaultMemPool.device = (int)device; \ +}; +// hipDeviceGetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] +#define INIT_hipDeviceGetGraphMemAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetGraphMemAttribute.device = (int)device; \ + cb_data.args.hipDeviceGetGraphMemAttribute.attr = (hipGraphMemAttributeType)attr; \ + cb_data.args.hipDeviceGetGraphMemAttribute.value = (void*)value; \ +}; +// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')] +#define INIT_hipDeviceGetLimit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetLimit.pValue = (size_t*)pValue; \ + cb_data.args.hipDeviceGetLimit.limit = (hipLimit_t)limit; \ +}; +// hipDeviceGetMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] +#define INIT_hipDeviceGetMemPool_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetMemPool.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipDeviceGetMemPool.device = (int)device; \ +}; +// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceGetName_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetName.name = (char*)name; \ + cb_data.args.hipDeviceGetName.len = (int)len; \ + cb_data.args.hipDeviceGetName.device = (hipDevice_t)device; \ +}; +// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')] +#define INIT_hipDeviceGetP2PAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetP2PAttribute.value = (int*)value; \ + cb_data.args.hipDeviceGetP2PAttribute.attr = (hipDeviceP2PAttr)attr; \ + cb_data.args.hipDeviceGetP2PAttribute.srcDevice = (int)srcDevice; \ + cb_data.args.hipDeviceGetP2PAttribute.dstDevice = (int)dstDevice; \ +}; +// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')] +#define INIT_hipDeviceGetPCIBusId_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetPCIBusId.pciBusId = (char*)pciBusId; \ + cb_data.args.hipDeviceGetPCIBusId.len = (int)len; \ + cb_data.args.hipDeviceGetPCIBusId.device = (int)device; \ +}; +// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] +#define INIT_hipDeviceGetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetSharedMemConfig.pConfig = (hipSharedMemConfig*)pConfig; \ +}; +// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')] +#define INIT_hipDeviceGetStreamPriorityRange_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetStreamPriorityRange.leastPriority = (int*)leastPriority; \ + cb_data.args.hipDeviceGetStreamPriorityRange.greatestPriority = (int*)greatestPriority; \ +}; +// hipDeviceGetUuid[('hipUUID*', 'uuid'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceGetUuid_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGetUuid.uuid = (hipUUID*)uuid; \ + cb_data.args.hipDeviceGetUuid.device = (hipDevice_t)device; \ +}; +// hipDeviceGraphMemTrim[('int', 'device')] +#define INIT_hipDeviceGraphMemTrim_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceGraphMemTrim.device = (int)device; \ +}; +// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')] +#define INIT_hipDevicePrimaryCtxGetState_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxGetState.dev = (hipDevice_t)dev; \ + cb_data.args.hipDevicePrimaryCtxGetState.flags = (unsigned int*)flags; \ + cb_data.args.hipDevicePrimaryCtxGetState.active = (int*)active; \ +}; +// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')] +#define INIT_hipDevicePrimaryCtxRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxRelease.dev = (hipDevice_t)dev; \ +}; +// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')] +#define INIT_hipDevicePrimaryCtxReset_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxReset.dev = (hipDevice_t)dev; \ +}; +// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')] +#define INIT_hipDevicePrimaryCtxRetain_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxRetain.pctx = (hipCtx_t*)pctx; \ + cb_data.args.hipDevicePrimaryCtxRetain.dev = (hipDevice_t)dev; \ +}; +// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')] +#define INIT_hipDevicePrimaryCtxSetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDevicePrimaryCtxSetFlags.dev = (hipDevice_t)dev; \ + cb_data.args.hipDevicePrimaryCtxSetFlags.flags = (unsigned int)flags; \ +}; +// hipDeviceReset[] +#define INIT_hipDeviceReset_CB_ARGS_DATA(cb_data) { \ +}; +// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] +#define INIT_hipDeviceSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetCacheConfig.cacheConfig = (hipFuncCache_t)cacheConfig; \ +}; +// hipDeviceSetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] +#define INIT_hipDeviceSetGraphMemAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetGraphMemAttribute.device = (int)device; \ + cb_data.args.hipDeviceSetGraphMemAttribute.attr = (hipGraphMemAttributeType)attr; \ + cb_data.args.hipDeviceSetGraphMemAttribute.value = (void*)value; \ +}; +// hipDeviceSetLimit[('hipLimit_t', 'limit'), ('size_t', 'value')] +#define INIT_hipDeviceSetLimit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetLimit.limit = (hipLimit_t)limit; \ + cb_data.args.hipDeviceSetLimit.value = (size_t)value; \ +}; +// hipDeviceSetMemPool[('int', 'device'), ('hipMemPool_t', 'mem_pool')] +#define INIT_hipDeviceSetMemPool_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetMemPool.device = (int)device; \ + cb_data.args.hipDeviceSetMemPool.mem_pool = (hipMemPool_t)mem_pool; \ +}; +// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')] +#define INIT_hipDeviceSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +// hipDeviceSynchronize[] +#define INIT_hipDeviceSynchronize_CB_ARGS_DATA(cb_data) { \ +}; +// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')] +#define INIT_hipDeviceTotalMem_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDeviceTotalMem.bytes = (size_t*)bytes; \ + cb_data.args.hipDeviceTotalMem.device = (hipDevice_t)device; \ +}; +// hipDriverGetVersion[('int*', 'driverVersion')] +#define INIT_hipDriverGetVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDriverGetVersion.driverVersion = (int*)driverVersion; \ +}; +// hipDrvMemcpy2DUnaligned[('const hip_Memcpy2D*', 'pCopy')] +#define INIT_hipDrvMemcpy2DUnaligned_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvMemcpy2DUnaligned.pCopy = (const hip_Memcpy2D*)pCopy; \ +}; +// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')] +#define INIT_hipDrvMemcpy3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvMemcpy3D.pCopy = (const HIP_MEMCPY3D*)pCopy; \ +}; +// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')] +#define INIT_hipDrvMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvMemcpy3DAsync.pCopy = (const HIP_MEMCPY3D*)pCopy; \ + cb_data.args.hipDrvMemcpy3DAsync.stream = (hipStream_t)stream; \ +}; +// hipDrvPointerGetAttributes[('unsigned int', 'numAttributes'), ('hipPointer_attribute*', 'attributes'), ('void**', 'data'), ('hipDeviceptr_t', 'ptr')] +#define INIT_hipDrvPointerGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipDrvPointerGetAttributes.numAttributes = (unsigned int)numAttributes; \ + cb_data.args.hipDrvPointerGetAttributes.attributes = (hipPointer_attribute*)attributes; \ + cb_data.args.hipDrvPointerGetAttributes.data = (void**)data; \ + cb_data.args.hipDrvPointerGetAttributes.ptr = (hipDeviceptr_t)ptr; \ +}; +// hipEventCreate[('hipEvent_t*', 'event')] +#define INIT_hipEventCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventCreate.event = (hipEvent_t*)event; \ +}; +// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')] +#define INIT_hipEventCreateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventCreateWithFlags.event = (hipEvent_t*)event; \ + cb_data.args.hipEventCreateWithFlags.flags = (unsigned int)flags; \ +}; +// hipEventDestroy[('hipEvent_t', 'event')] +#define INIT_hipEventDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventDestroy.event = (hipEvent_t)event; \ +}; +// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')] +#define INIT_hipEventElapsedTime_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventElapsedTime.ms = (float*)ms; \ + cb_data.args.hipEventElapsedTime.start = (hipEvent_t)start; \ + cb_data.args.hipEventElapsedTime.stop = (hipEvent_t)stop; \ +}; +// hipEventQuery[('hipEvent_t', 'event')] +#define INIT_hipEventQuery_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventQuery.event = (hipEvent_t)event; \ +}; +// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')] +#define INIT_hipEventRecord_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventRecord.event = (hipEvent_t)event; \ + cb_data.args.hipEventRecord.stream = (hipStream_t)stream; \ +}; +// hipEventSynchronize[('hipEvent_t', 'event')] +#define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \ +}; +// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')] +#define INIT_hipExtGetLinkTypeAndHopCount_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtGetLinkTypeAndHopCount.device1 = (int)device1; \ + cb_data.args.hipExtGetLinkTypeAndHopCount.device2 = (int)device2; \ + cb_data.args.hipExtGetLinkTypeAndHopCount.linktype = (unsigned int*)linktype; \ + cb_data.args.hipExtGetLinkTypeAndHopCount.hopcount = (unsigned int*)hopcount; \ +}; +// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')] +#define INIT_hipExtLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtLaunchKernel.function_address = (const void*)hostFunction; \ + cb_data.args.hipExtLaunchKernel.numBlocks = (dim3)gridDim; \ + cb_data.args.hipExtLaunchKernel.dimBlocks = (dim3)blockDim; \ + cb_data.args.hipExtLaunchKernel.args = (void**)args; \ + cb_data.args.hipExtLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipExtLaunchKernel.stream = (hipStream_t)stream; \ + cb_data.args.hipExtLaunchKernel.startEvent = (hipEvent_t)startEvent; \ + cb_data.args.hipExtLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \ + cb_data.args.hipExtLaunchKernel.flags = (int)flags; \ +}; +// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] +#define INIT_hipExtLaunchMultiKernelMultiDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtLaunchMultiKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \ + cb_data.args.hipExtLaunchMultiKernelMultiDevice.numDevices = (int)numDevices; \ + cb_data.args.hipExtLaunchMultiKernelMultiDevice.flags = (unsigned int)flags; \ +}; +// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] +#define INIT_hipExtMallocWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtMallocWithFlags.ptr = (void**)ptr; \ + cb_data.args.hipExtMallocWithFlags.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipExtMallocWithFlags.flags = (unsigned int)flags; \ +}; +// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')] +#define INIT_hipExtModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtModuleLaunchKernel.f = (hipFunction_t)f; \ + cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \ + cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \ + cb_data.args.hipExtModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \ + cb_data.args.hipExtModuleLaunchKernel.localWorkSizeX = (unsigned int)localWorkSizeX; \ + cb_data.args.hipExtModuleLaunchKernel.localWorkSizeY = (unsigned int)localWorkSizeY; \ + cb_data.args.hipExtModuleLaunchKernel.localWorkSizeZ = (unsigned int)localWorkSizeZ; \ + cb_data.args.hipExtModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipExtModuleLaunchKernel.hStream = (hipStream_t)hStream; \ + cb_data.args.hipExtModuleLaunchKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipExtModuleLaunchKernel.extra = (void**)extra; \ + cb_data.args.hipExtModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \ + cb_data.args.hipExtModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \ + cb_data.args.hipExtModuleLaunchKernel.flags = (unsigned int)flags; \ +}; +// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')] +#define INIT_hipExtStreamCreateWithCUMask_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtStreamCreateWithCUMask.stream = (hipStream_t*)stream; \ + cb_data.args.hipExtStreamCreateWithCUMask.cuMaskSize = (unsigned int)cuMaskSize; \ + cb_data.args.hipExtStreamCreateWithCUMask.cuMask = (const unsigned int*)cuMask; \ +}; +// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')] +#define INIT_hipExtStreamGetCUMask_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExtStreamGetCUMask.stream = (hipStream_t)stream; \ + cb_data.args.hipExtStreamGetCUMask.cuMaskSize = (unsigned int)cuMaskSize; \ + cb_data.args.hipExtStreamGetCUMask.cuMask = (unsigned int*)cuMask; \ +}; +// hipExternalMemoryGetMappedBuffer[('void**', 'devPtr'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryBufferDesc*', 'bufferDesc')] +#define INIT_hipExternalMemoryGetMappedBuffer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipExternalMemoryGetMappedBuffer.devPtr = (void**)devPtr; \ + cb_data.args.hipExternalMemoryGetMappedBuffer.extMem = (hipExternalMemory_t)extMem; \ + cb_data.args.hipExternalMemoryGetMappedBuffer.bufferDesc = (const hipExternalMemoryBufferDesc*)bufferDesc; \ +}; +// hipFree[('void*', 'ptr')] +#define INIT_hipFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFree.ptr = (void*)ptr; \ +}; +// hipFreeArray[('hipArray*', 'array')] +#define INIT_hipFreeArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeArray.array = (hipArray*)array; \ +}; +// hipFreeAsync[('void*', 'dev_ptr'), ('hipStream_t', 'stream')] +#define INIT_hipFreeAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeAsync.dev_ptr = (void*)dev_ptr; \ + cb_data.args.hipFreeAsync.stream = (hipStream_t)stream; \ +}; +// hipFreeHost[('void*', 'ptr')] +#define INIT_hipFreeHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeHost.ptr = (void*)ptr; \ +}; +// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')] +#define INIT_hipFreeMipmappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFreeMipmappedArray.mipmappedArray = (hipMipmappedArray_t)mipmappedArray; \ +}; +// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')] +#define INIT_hipFuncGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncGetAttribute.value = (int*)value; \ + cb_data.args.hipFuncGetAttribute.attrib = (hipFunction_attribute)attrib; \ + cb_data.args.hipFuncGetAttribute.hfunc = (hipFunction_t)hfunc; \ +}; +// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')] +#define INIT_hipFuncGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncGetAttributes.attr = (hipFuncAttributes*)attr; \ + cb_data.args.hipFuncGetAttributes.func = (const void*)func; \ +}; +// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')] +#define INIT_hipFuncSetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncSetAttribute.func = (const void*)func; \ + cb_data.args.hipFuncSetAttribute.attr = (hipFuncAttribute)attr; \ + cb_data.args.hipFuncSetAttribute.value = (int)value; \ +}; +// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')] +#define INIT_hipFuncSetCacheConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncSetCacheConfig.func = (const void*)func; \ + cb_data.args.hipFuncSetCacheConfig.config = (hipFuncCache_t)cacheConfig; \ +}; +// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')] +#define INIT_hipFuncSetSharedMemConfig_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipFuncSetSharedMemConfig.func = (const void*)func; \ + cb_data.args.hipFuncSetSharedMemConfig.config = (hipSharedMemConfig)config; \ +}; +// hipGLGetDevices[('unsigned int*', 'pHipDeviceCount'), ('int*', 'pHipDevices'), ('unsigned int', 'hipDeviceCount'), ('hipGLDeviceList', 'deviceList')] +#define INIT_hipGLGetDevices_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGLGetDevices.pHipDeviceCount = (unsigned int*)pHipDeviceCount; \ + cb_data.args.hipGLGetDevices.pHipDevices = (int*)pHipDevices; \ + cb_data.args.hipGLGetDevices.hipDeviceCount = (unsigned int)hipDeviceCount; \ + cb_data.args.hipGLGetDevices.deviceList = (hipGLDeviceList)deviceList; \ +}; +// hipGetChannelDesc[('hipChannelFormatDesc*', 'desc'), ('hipArray_const_t', 'array')] +#define INIT_hipGetChannelDesc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetChannelDesc.desc = (hipChannelFormatDesc*)desc; \ + cb_data.args.hipGetChannelDesc.array = (hipArray_const_t)array; \ +}; +// hipGetDevice[('int*', 'deviceId')] +#define INIT_hipGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDevice.deviceId = (int*)deviceId; \ +}; +// hipGetDeviceCount[('int*', 'count')] +#define INIT_hipGetDeviceCount_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDeviceCount.count = (int*)count; \ +}; +// hipGetDeviceFlags[('unsigned int*', 'flags')] +#define INIT_hipGetDeviceFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDeviceFlags.flags = (unsigned int*)flags; \ +}; +// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')] +#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDeviceProperties.props = (hipDeviceProp_t*)props; \ + cb_data.args.hipGetDeviceProperties.device = (hipDevice_t)device; \ +}; +// hipGetErrorString[] +#define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) { \ +}; +// hipGetLastError[] +#define INIT_hipGetLastError_CB_ARGS_DATA(cb_data) { \ +}; +// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')] +#define INIT_hipGetMipmappedArrayLevel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetMipmappedArrayLevel.levelArray = (hipArray_t*)levelArray; \ + cb_data.args.hipGetMipmappedArrayLevel.mipmappedArray = (hipMipmappedArray_const_t)mipmappedArray; \ + cb_data.args.hipGetMipmappedArrayLevel.level = (unsigned int)level; \ +}; +// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')] +#define INIT_hipGetSymbolAddress_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetSymbolAddress.devPtr = (void**)devPtr; \ + cb_data.args.hipGetSymbolAddress.symbol = (const void*)symbol; \ +}; +// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')] +#define INIT_hipGetSymbolSize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetSymbolSize.size = (size_t*)sizePtr; \ + cb_data.args.hipGetSymbolSize.symbol = (const void*)symbol; \ +}; +// hipGraphAddChildGraphNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipGraph_t', 'childGraph')] +#define INIT_hipGraphAddChildGraphNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddChildGraphNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddChildGraphNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddChildGraphNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddChildGraphNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddChildGraphNode.childGraph = (hipGraph_t)childGraph; \ +}; +// hipGraphAddDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] +#define INIT_hipGraphAddDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddDependencies.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddDependencies.from = (const hipGraphNode_t*)from; \ + cb_data.args.hipGraphAddDependencies.to = (const hipGraphNode_t*)to; \ + cb_data.args.hipGraphAddDependencies.numDependencies = (size_t)numDependencies; \ +}; +// hipGraphAddEmptyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies')] +#define INIT_hipGraphAddEmptyNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddEmptyNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddEmptyNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddEmptyNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddEmptyNode.numDependencies = (size_t)numDependencies; \ +}; +// hipGraphAddEventRecordNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] +#define INIT_hipGraphAddEventRecordNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddEventRecordNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddEventRecordNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddEventRecordNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddEventRecordNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddEventRecordNode.event = (hipEvent_t)event; \ +}; +// hipGraphAddEventWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] +#define INIT_hipGraphAddEventWaitNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddEventWaitNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddEventWaitNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddEventWaitNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddEventWaitNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddEventWaitNode.event = (hipEvent_t)event; \ +}; +// hipGraphAddHostNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphAddHostNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddHostNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddHostNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddHostNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddHostNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddHostNode.pNodeParams = (const hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphAddKernelNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphAddKernelNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddKernelNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddKernelNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddKernelNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddKernelNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddKernelNode.pNodeParams = (const hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphAddMemAllocNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipMemAllocNodeParams*', 'pNodeParams')] +#define INIT_hipGraphAddMemAllocNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemAllocNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemAllocNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemAllocNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemAllocNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemAllocNode.pNodeParams = (hipMemAllocNodeParams*)pNodeParams; \ +}; +// hipGraphAddMemFreeNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dev_ptr')] +#define INIT_hipGraphAddMemFreeNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemFreeNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemFreeNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemFreeNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemFreeNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemFreeNode.dev_ptr = (void*)dev_ptr; \ +}; +// hipGraphAddMemcpyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemcpy3DParms*', 'pCopyParams')] +#define INIT_hipGraphAddMemcpyNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNode.pCopyParams = (const hipMemcpy3DParms*)pCopyParams; \ +}; +// hipGraphAddMemcpyNode1D[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphAddMemcpyNode1D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNode1D.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNode1D.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNode1D.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNode1D.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNode1D.dst = (void*)dst; \ + cb_data.args.hipGraphAddMemcpyNode1D.src = (const void*)src; \ + cb_data.args.hipGraphAddMemcpyNode1D.count = (size_t)count; \ + cb_data.args.hipGraphAddMemcpyNode1D.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphAddMemcpyNodeFromSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphAddMemcpyNodeFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.dst = (void*)dst; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.count = (size_t)count; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphAddMemcpyNodeFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphAddMemcpyNodeToSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphAddMemcpyNodeToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.src = (const void*)src; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.count = (size_t)count; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphAddMemcpyNodeToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphAddMemsetNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemsetParams*', 'pMemsetParams')] +#define INIT_hipGraphAddMemsetNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphAddMemsetNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ + cb_data.args.hipGraphAddMemsetNode.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphAddMemsetNode.pDependencies = (const hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphAddMemsetNode.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipGraphAddMemsetNode.pMemsetParams = (const hipMemsetParams*)pMemsetParams; \ +}; +// hipGraphChildGraphNodeGetGraph[('hipGraphNode_t', 'node'), ('hipGraph_t*', 'pGraph')] +#define INIT_hipGraphChildGraphNodeGetGraph_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphChildGraphNodeGetGraph.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphChildGraphNodeGetGraph.pGraph = (hipGraph_t*)pGraph; \ +}; +// hipGraphClone[('hipGraph_t*', 'pGraphClone'), ('hipGraph_t', 'originalGraph')] +#define INIT_hipGraphClone_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphClone.pGraphClone = (hipGraph_t*)pGraphClone; \ + cb_data.args.hipGraphClone.originalGraph = (hipGraph_t)originalGraph; \ +}; +// hipGraphCreate[('hipGraph_t*', 'pGraph'), ('unsigned int', 'flags')] +#define INIT_hipGraphCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphCreate.pGraph = (hipGraph_t*)pGraph; \ + cb_data.args.hipGraphCreate.flags = (unsigned int)flags; \ +}; +// hipGraphDebugDotPrint[('hipGraph_t', 'graph'), ('const char*', 'path'), ('unsigned int', 'flags')] +#define INIT_hipGraphDebugDotPrint_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphDebugDotPrint.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphDebugDotPrint.path = (path) ? strdup(path) : NULL; \ + cb_data.args.hipGraphDebugDotPrint.flags = (unsigned int)flags; \ +}; +// hipGraphDestroy[('hipGraph_t', 'graph')] +#define INIT_hipGraphDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphDestroy.graph = (hipGraph_t)graph; \ +}; +// hipGraphDestroyNode[('hipGraphNode_t', 'node')] +#define INIT_hipGraphDestroyNode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphDestroyNode.node = (hipGraphNode_t)node; \ +}; +// hipGraphEventRecordNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] +#define INIT_hipGraphEventRecordNodeGetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventRecordNodeGetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventRecordNodeGetEvent.event_out = (hipEvent_t*)event_out; \ +}; +// hipGraphEventRecordNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] +#define INIT_hipGraphEventRecordNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventRecordNodeSetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventRecordNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphEventWaitNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] +#define INIT_hipGraphEventWaitNodeGetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventWaitNodeGetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventWaitNodeGetEvent.event_out = (hipEvent_t*)event_out; \ +}; +// hipGraphEventWaitNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] +#define INIT_hipGraphEventWaitNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphEventWaitNodeSetEvent.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphEventWaitNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphExecChildGraphNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipGraph_t', 'childGraph')] +#define INIT_hipGraphExecChildGraphNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecChildGraphNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecChildGraphNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecChildGraphNodeSetParams.childGraph = (hipGraph_t)childGraph; \ +}; +// hipGraphExecDestroy[('hipGraphExec_t', 'graphExec')] +#define INIT_hipGraphExecDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecDestroy.graphExec = (hipGraphExec_t)pGraphExec; \ +}; +// hipGraphExecEventRecordNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] +#define INIT_hipGraphExecEventRecordNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecEventRecordNodeSetEvent.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecEventRecordNodeSetEvent.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExecEventRecordNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphExecEventWaitNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] +#define INIT_hipGraphExecEventWaitNodeSetEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecEventWaitNodeSetEvent.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecEventWaitNodeSetEvent.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphExecEventWaitNodeSetEvent.event = (hipEvent_t)event; \ +}; +// hipGraphExecHostNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphExecHostNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecHostNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecHostNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecHostNodeSetParams.pNodeParams = (const hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphExecKernelNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphExecKernelNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecKernelNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecKernelNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecKernelNodeSetParams.pNodeParams = (const hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphExecMemcpyNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] +#define INIT_hipGraphExecMemcpyNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams.pNodeParams = (hipMemcpy3DParms*)pNodeParams; \ +}; +// hipGraphExecMemcpyNodeSetParams1D[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphExecMemcpyNodeSetParams1D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.dst = (void*)dst; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.src = (const void*)src; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.count = (size_t)count; \ + cb_data.args.hipGraphExecMemcpyNodeSetParams1D.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphExecMemcpyNodeSetParamsFromSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphExecMemcpyNodeSetParamsFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.dst = (void*)dst; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.count = (size_t)count; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphExecMemcpyNodeSetParamsToSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphExecMemcpyNodeSetParamsToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.src = (const void*)src; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.count = (size_t)count; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphExecMemcpyNodeSetParamsToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphExecMemsetNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] +#define INIT_hipGraphExecMemsetNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecMemsetNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecMemsetNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphExecMemsetNodeSetParams.pNodeParams = (const hipMemsetParams*)pNodeParams; \ +}; +// hipGraphExecUpdate[('hipGraphExec_t', 'hGraphExec'), ('hipGraph_t', 'hGraph'), ('hipGraphNode_t*', 'hErrorNode_out'), ('hipGraphExecUpdateResult*', 'updateResult_out')] +#define INIT_hipGraphExecUpdate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphExecUpdate.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphExecUpdate.hGraph = (hipGraph_t)hGraph; \ + cb_data.args.hipGraphExecUpdate.hErrorNode_out = (hipGraphNode_t*)hErrorNode_out; \ + cb_data.args.hipGraphExecUpdate.updateResult_out = (hipGraphExecUpdateResult*)updateResult_out; \ +}; +// hipGraphGetEdges[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'from'), ('hipGraphNode_t*', 'to'), ('size_t*', 'numEdges')] +#define INIT_hipGraphGetEdges_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphGetEdges.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphGetEdges.from = (hipGraphNode_t*)from; \ + cb_data.args.hipGraphGetEdges.to = (hipGraphNode_t*)to; \ + cb_data.args.hipGraphGetEdges.numEdges = (size_t*)numEdges; \ +}; +// hipGraphGetNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'nodes'), ('size_t*', 'numNodes')] +#define INIT_hipGraphGetNodes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphGetNodes.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphGetNodes.nodes = (hipGraphNode_t*)nodes; \ + cb_data.args.hipGraphGetNodes.numNodes = (size_t*)numNodes; \ +}; +// hipGraphGetRootNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pRootNodes'), ('size_t*', 'pNumRootNodes')] +#define INIT_hipGraphGetRootNodes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphGetRootNodes.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphGetRootNodes.pRootNodes = (hipGraphNode_t*)pRootNodes; \ + cb_data.args.hipGraphGetRootNodes.pNumRootNodes = (size_t*)pNumRootNodes; \ +}; +// hipGraphHostNodeGetParams[('hipGraphNode_t', 'node'), ('hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphHostNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphHostNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphHostNodeGetParams.pNodeParams = (hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphHostNodeSetParams[('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] +#define INIT_hipGraphHostNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphHostNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphHostNodeSetParams.pNodeParams = (const hipHostNodeParams*)pNodeParams; \ +}; +// hipGraphInstantiate[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pErrorNode'), ('char*', 'pLogBuffer'), ('size_t', 'bufferSize')] +#define INIT_hipGraphInstantiate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphInstantiate.pGraphExec = (hipGraphExec_t*)pGraphExec; \ + cb_data.args.hipGraphInstantiate.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphInstantiate.pErrorNode = (hipGraphNode_t*)pErrorNode; \ + cb_data.args.hipGraphInstantiate.pLogBuffer = (char*)pLogBuffer; \ + cb_data.args.hipGraphInstantiate.bufferSize = (size_t)bufferSize; \ +}; +// hipGraphInstantiateWithFlags[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('unsigned long long', 'flags')] +#define INIT_hipGraphInstantiateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphInstantiateWithFlags.pGraphExec = (hipGraphExec_t*)pGraphExec; \ + cb_data.args.hipGraphInstantiateWithFlags.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphInstantiateWithFlags.flags = (unsigned long long)flags; \ +}; +// hipGraphKernelNodeCopyAttributes[('hipGraphNode_t', 'hSrc'), ('hipGraphNode_t', 'hDst')] +#define INIT_hipGraphKernelNodeCopyAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeCopyAttributes.hSrc = (hipGraphNode_t)hSrc; \ + cb_data.args.hipGraphKernelNodeCopyAttributes.hDst = (hipGraphNode_t)hDst; \ +}; +// hipGraphKernelNodeGetAttribute[('hipGraphNode_t', 'hNode'), ('hipKernelNodeAttrID', 'attr'), ('hipKernelNodeAttrValue*', 'value')] +#define INIT_hipGraphKernelNodeGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeGetAttribute.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphKernelNodeGetAttribute.attr = (hipKernelNodeAttrID)attr; \ + cb_data.args.hipGraphKernelNodeGetAttribute.value = (hipKernelNodeAttrValue*)value; \ +}; +// hipGraphKernelNodeGetParams[('hipGraphNode_t', 'node'), ('hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphKernelNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphKernelNodeGetParams.pNodeParams = (hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphKernelNodeSetAttribute[('hipGraphNode_t', 'hNode'), ('hipKernelNodeAttrID', 'attr'), ('const hipKernelNodeAttrValue*', 'value')] +#define INIT_hipGraphKernelNodeSetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeSetAttribute.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphKernelNodeSetAttribute.attr = (hipKernelNodeAttrID)attr; \ + cb_data.args.hipGraphKernelNodeSetAttribute.value = (const hipKernelNodeAttrValue*)value; \ +}; +// hipGraphKernelNodeSetParams[('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] +#define INIT_hipGraphKernelNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphKernelNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphKernelNodeSetParams.pNodeParams = (const hipKernelNodeParams*)pNodeParams; \ +}; +// hipGraphLaunch[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] +#define INIT_hipGraphLaunch_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphLaunch.graphExec = (hipGraphExec_t)graphExec; \ + cb_data.args.hipGraphLaunch.stream = (hipStream_t)stream; \ +}; +// hipGraphMemAllocNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemAllocNodeParams*', 'pNodeParams')] +#define INIT_hipGraphMemAllocNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemAllocNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemAllocNodeGetParams.pNodeParams = (hipMemAllocNodeParams*)pNodeParams; \ +}; +// hipGraphMemFreeNodeGetParams[('hipGraphNode_t', 'node'), ('void*', 'dev_ptr')] +#define INIT_hipGraphMemFreeNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemFreeNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemFreeNodeGetParams.dev_ptr = (void*)dev_ptr; \ +}; +// hipGraphMemcpyNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] +#define INIT_hipGraphMemcpyNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeGetParams.pNodeParams = (hipMemcpy3DParms*)pNodeParams; \ +}; +// hipGraphMemcpyNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemcpy3DParms*', 'pNodeParams')] +#define INIT_hipGraphMemcpyNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParams.pNodeParams = (const hipMemcpy3DParms*)pNodeParams; \ +}; +// hipGraphMemcpyNodeSetParams1D[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphMemcpyNodeSetParams1D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.dst = (void*)dst; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.src = (const void*)src; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.count = (size_t)count; \ + cb_data.args.hipGraphMemcpyNodeSetParams1D.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphMemcpyNodeSetParamsFromSymbol[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphMemcpyNodeSetParamsFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.dst = (void*)dst; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.count = (size_t)count; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphMemcpyNodeSetParamsFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphMemcpyNodeSetParamsToSymbol[('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipGraphMemcpyNodeSetParamsToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.src = (const void*)src; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.count = (size_t)count; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.offset = (size_t)offset; \ + cb_data.args.hipGraphMemcpyNodeSetParamsToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipGraphMemsetNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemsetParams*', 'pNodeParams')] +#define INIT_hipGraphMemsetNodeGetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemsetNodeGetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemsetNodeGetParams.pNodeParams = (hipMemsetParams*)pNodeParams; \ +}; +// hipGraphMemsetNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] +#define INIT_hipGraphMemsetNodeSetParams_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphMemsetNodeSetParams.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphMemsetNodeSetParams.pNodeParams = (const hipMemsetParams*)pNodeParams; \ +}; +// hipGraphNodeFindInClone[('hipGraphNode_t*', 'pNode'), ('hipGraphNode_t', 'originalNode'), ('hipGraph_t', 'clonedGraph')] +#define INIT_hipGraphNodeFindInClone_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeFindInClone.pNode = (hipGraphNode_t*)pNode; \ + cb_data.args.hipGraphNodeFindInClone.originalNode = (hipGraphNode_t)originalNode; \ + cb_data.args.hipGraphNodeFindInClone.clonedGraph = (hipGraph_t)clonedGraph; \ +}; +// hipGraphNodeGetDependencies[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependencies'), ('size_t*', 'pNumDependencies')] +#define INIT_hipGraphNodeGetDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetDependencies.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphNodeGetDependencies.pDependencies = (hipGraphNode_t*)pDependencies; \ + cb_data.args.hipGraphNodeGetDependencies.pNumDependencies = (size_t*)pNumDependencies; \ +}; +// hipGraphNodeGetDependentNodes[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependentNodes'), ('size_t*', 'pNumDependentNodes')] +#define INIT_hipGraphNodeGetDependentNodes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetDependentNodes.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphNodeGetDependentNodes.pDependentNodes = (hipGraphNode_t*)pDependentNodes; \ + cb_data.args.hipGraphNodeGetDependentNodes.pNumDependentNodes = (size_t*)pNumDependentNodes; \ +}; +// hipGraphNodeGetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int*', 'isEnabled')] +#define INIT_hipGraphNodeGetEnabled_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetEnabled.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphNodeGetEnabled.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphNodeGetEnabled.isEnabled = (unsigned int*)isEnabled; \ +}; +// hipGraphNodeGetType[('hipGraphNode_t', 'node'), ('hipGraphNodeType*', 'pType')] +#define INIT_hipGraphNodeGetType_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeGetType.node = (hipGraphNode_t)node; \ + cb_data.args.hipGraphNodeGetType.pType = (hipGraphNodeType*)pType; \ +}; +// hipGraphNodeSetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int', 'isEnabled')] +#define INIT_hipGraphNodeSetEnabled_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphNodeSetEnabled.hGraphExec = (hipGraphExec_t)hGraphExec; \ + cb_data.args.hipGraphNodeSetEnabled.hNode = (hipGraphNode_t)hNode; \ + cb_data.args.hipGraphNodeSetEnabled.isEnabled = (unsigned int)isEnabled; \ +}; +// hipGraphReleaseUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipGraphReleaseUserObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphReleaseUserObject.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphReleaseUserObject.object = (hipUserObject_t)object; \ + cb_data.args.hipGraphReleaseUserObject.count = (unsigned int)count; \ +}; +// hipGraphRemoveDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] +#define INIT_hipGraphRemoveDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphRemoveDependencies.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphRemoveDependencies.from = (const hipGraphNode_t*)from; \ + cb_data.args.hipGraphRemoveDependencies.to = (const hipGraphNode_t*)to; \ + cb_data.args.hipGraphRemoveDependencies.numDependencies = (size_t)numDependencies; \ +}; +// hipGraphRetainUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count'), ('unsigned int', 'flags')] +#define INIT_hipGraphRetainUserObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphRetainUserObject.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphRetainUserObject.object = (hipUserObject_t)object; \ + cb_data.args.hipGraphRetainUserObject.count = (unsigned int)count; \ + cb_data.args.hipGraphRetainUserObject.flags = (unsigned int)flags; \ +}; +// hipGraphUpload[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] +#define INIT_hipGraphUpload_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphUpload.graphExec = (hipGraphExec_t)graphExec; \ + cb_data.args.hipGraphUpload.stream = (hipStream_t)stream; \ +}; +// hipGraphicsGLRegisterBuffer[('hipGraphicsResource**', 'resource'), ('GLuint', 'buffer'), ('unsigned int', 'flags')] +#define INIT_hipGraphicsGLRegisterBuffer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsGLRegisterBuffer.resource = (hipGraphicsResource**)resource; \ + cb_data.args.hipGraphicsGLRegisterBuffer.buffer = (GLuint)buffer; \ + cb_data.args.hipGraphicsGLRegisterBuffer.flags = (unsigned int)flags; \ +}; +// hipGraphicsGLRegisterImage[('hipGraphicsResource**', 'resource'), ('GLuint', 'image'), ('GLenum', 'target'), ('unsigned int', 'flags')] +#define INIT_hipGraphicsGLRegisterImage_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsGLRegisterImage.resource = (hipGraphicsResource**)resource; \ + cb_data.args.hipGraphicsGLRegisterImage.image = (GLuint)image; \ + cb_data.args.hipGraphicsGLRegisterImage.target = (GLenum)target; \ + cb_data.args.hipGraphicsGLRegisterImage.flags = (unsigned int)flags; \ +}; +// hipGraphicsMapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] +#define INIT_hipGraphicsMapResources_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsMapResources.count = (int)count; \ + cb_data.args.hipGraphicsMapResources.resources = (hipGraphicsResource_t*)resources; \ + cb_data.args.hipGraphicsMapResources.stream = (hipStream_t)stream; \ +}; +// hipGraphicsResourceGetMappedPointer[('void**', 'devPtr'), ('size_t*', 'size'), ('hipGraphicsResource_t', 'resource')] +#define INIT_hipGraphicsResourceGetMappedPointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsResourceGetMappedPointer.devPtr = (void**)devPtr; \ + cb_data.args.hipGraphicsResourceGetMappedPointer.size = (size_t*)size; \ + cb_data.args.hipGraphicsResourceGetMappedPointer.resource = (hipGraphicsResource_t)resource; \ +}; +// hipGraphicsSubResourceGetMappedArray[('hipArray_t*', 'array'), ('hipGraphicsResource_t', 'resource'), ('unsigned int', 'arrayIndex'), ('unsigned int', 'mipLevel')] +#define INIT_hipGraphicsSubResourceGetMappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.array = (hipArray_t*)array; \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.resource = (hipGraphicsResource_t)resource; \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.arrayIndex = (unsigned int)arrayIndex; \ + cb_data.args.hipGraphicsSubResourceGetMappedArray.mipLevel = (unsigned int)mipLevel; \ +}; +// hipGraphicsUnmapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] +#define INIT_hipGraphicsUnmapResources_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsUnmapResources.count = (int)count; \ + cb_data.args.hipGraphicsUnmapResources.resources = (hipGraphicsResource_t*)resources; \ + cb_data.args.hipGraphicsUnmapResources.stream = (hipStream_t)stream; \ +}; +// hipGraphicsUnregisterResource[('hipGraphicsResource_t', 'resource')] +#define INIT_hipGraphicsUnregisterResource_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphicsUnregisterResource.resource = (hipGraphicsResource_t)resource; \ +}; +// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')] +#define INIT_hipHccModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHccModuleLaunchKernel.f = (hipFunction_t)f; \ + cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeX = (unsigned int)globalWorkSizeX; \ + cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeY = (unsigned int)globalWorkSizeY; \ + cb_data.args.hipHccModuleLaunchKernel.globalWorkSizeZ = (unsigned int)globalWorkSizeZ; \ + cb_data.args.hipHccModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \ + cb_data.args.hipHccModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \ + cb_data.args.hipHccModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \ + cb_data.args.hipHccModuleLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipHccModuleLaunchKernel.hStream = (hipStream_t)hStream; \ + cb_data.args.hipHccModuleLaunchKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipHccModuleLaunchKernel.extra = (void**)extra; \ + cb_data.args.hipHccModuleLaunchKernel.startEvent = (hipEvent_t)startEvent; \ + cb_data.args.hipHccModuleLaunchKernel.stopEvent = (hipEvent_t)stopEvent; \ +}; +// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] +#define INIT_hipHostAlloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostAlloc.ptr = (void**)ptr; \ + cb_data.args.hipHostAlloc.size = (size_t)sizeBytes; \ + cb_data.args.hipHostAlloc.flags = (unsigned int)flags; \ +}; +// hipHostFree[('void*', 'ptr')] +#define INIT_hipHostFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostFree.ptr = (void*)ptr; \ +}; +// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')] +#define INIT_hipHostGetDevicePointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostGetDevicePointer.devPtr = (void**)devicePointer; \ + cb_data.args.hipHostGetDevicePointer.hstPtr = (void*)hostPointer; \ + cb_data.args.hipHostGetDevicePointer.flags = (unsigned int)flags; \ +}; +// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')] +#define INIT_hipHostGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostGetFlags.flagsPtr = (unsigned int*)flagsPtr; \ + cb_data.args.hipHostGetFlags.hostPtr = (void*)hostPtr; \ +}; +// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] +#define INIT_hipHostMalloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostMalloc.ptr = (void**)ptr; \ + cb_data.args.hipHostMalloc.size = (size_t)sizeBytes; \ + cb_data.args.hipHostMalloc.flags = (unsigned int)flags; \ +}; +// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] +#define INIT_hipHostRegister_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostRegister.hostPtr = (void*)hostPtr; \ + cb_data.args.hipHostRegister.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipHostRegister.flags = (unsigned int)flags; \ +}; +// hipHostUnregister[('void*', 'hostPtr')] +#define INIT_hipHostUnregister_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipHostUnregister.hostPtr = (void*)hostPtr; \ +}; +// hipImportExternalMemory[('hipExternalMemory_t*', 'extMem_out'), ('const hipExternalMemoryHandleDesc*', 'memHandleDesc')] +#define INIT_hipImportExternalMemory_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipImportExternalMemory.extMem_out = (hipExternalMemory_t*)extMem_out; \ + cb_data.args.hipImportExternalMemory.memHandleDesc = (const hipExternalMemoryHandleDesc*)memHandleDesc; \ +}; +// hipImportExternalSemaphore[('hipExternalSemaphore_t*', 'extSem_out'), ('const hipExternalSemaphoreHandleDesc*', 'semHandleDesc')] +#define INIT_hipImportExternalSemaphore_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipImportExternalSemaphore.extSem_out = (hipExternalSemaphore_t*)extSem_out; \ + cb_data.args.hipImportExternalSemaphore.semHandleDesc = (const hipExternalSemaphoreHandleDesc*)semHandleDesc; \ +}; +// hipInit[('unsigned int', 'flags')] +#define INIT_hipInit_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipInit.flags = (unsigned int)flags; \ +}; +// hipIpcCloseMemHandle[('void*', 'devPtr')] +#define INIT_hipIpcCloseMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcCloseMemHandle.devPtr = (void*)dev_ptr; \ +}; +// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')] +#define INIT_hipIpcGetEventHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcGetEventHandle.handle = (hipIpcEventHandle_t*)handle; \ + cb_data.args.hipIpcGetEventHandle.event = (hipEvent_t)event; \ +}; +// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')] +#define INIT_hipIpcGetMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcGetMemHandle.handle = (hipIpcMemHandle_t*)handle; \ + cb_data.args.hipIpcGetMemHandle.devPtr = (void*)dev_ptr; \ +}; +// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')] +#define INIT_hipIpcOpenEventHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcOpenEventHandle.event = (hipEvent_t*)event; \ + cb_data.args.hipIpcOpenEventHandle.handle = (hipIpcEventHandle_t)handle; \ +}; +// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')] +#define INIT_hipIpcOpenMemHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipIpcOpenMemHandle.devPtr = (void**)dev_ptr; \ + cb_data.args.hipIpcOpenMemHandle.handle = (hipIpcMemHandle_t)handle; \ + cb_data.args.hipIpcOpenMemHandle.flags = (unsigned int)flags; \ +}; +// hipLaunchByPtr[('const void*', 'hostFunction')] +#define INIT_hipLaunchByPtr_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchByPtr.hostFunction = (const void*)hostFunction; \ +}; +// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')] +#define INIT_hipLaunchCooperativeKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchCooperativeKernel.f = (const void*)f; \ + cb_data.args.hipLaunchCooperativeKernel.gridDim = (dim3)gridDim; \ + cb_data.args.hipLaunchCooperativeKernel.blockDimX = (dim3)blockDim; \ + cb_data.args.hipLaunchCooperativeKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipLaunchCooperativeKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ + cb_data.args.hipLaunchCooperativeKernel.stream = (hipStream_t)hStream; \ +}; +// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] +#define INIT_hipLaunchCooperativeKernelMultiDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchCooperativeKernelMultiDevice.launchParamsList = (hipLaunchParams*)launchParamsList; \ + cb_data.args.hipLaunchCooperativeKernelMultiDevice.numDevices = (int)numDevices; \ + cb_data.args.hipLaunchCooperativeKernelMultiDevice.flags = (unsigned int)flags; \ +}; +// hipLaunchHostFunc[('hipStream_t', 'stream'), ('hipHostFn_t', 'fn'), ('void*', 'userData')] +#define INIT_hipLaunchHostFunc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchHostFunc.stream = (hipStream_t)stream; \ + cb_data.args.hipLaunchHostFunc.fn = (hipHostFn_t)fn; \ + cb_data.args.hipLaunchHostFunc.userData = (void*)userData; \ +}; +// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')] +#define INIT_hipLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipLaunchKernel.function_address = (const void*)hostFunction; \ + cb_data.args.hipLaunchKernel.numBlocks = (dim3)gridDim; \ + cb_data.args.hipLaunchKernel.dimBlocks = (dim3)blockDim; \ + cb_data.args.hipLaunchKernel.args = (void**)args; \ + cb_data.args.hipLaunchKernel.sharedMemBytes = (size_t)sharedMemBytes; \ + cb_data.args.hipLaunchKernel.stream = (hipStream_t)stream; \ +}; +// hipMalloc[('void**', 'ptr'), ('size_t', 'size')] +#define INIT_hipMalloc_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc.ptr = (void**)ptr; \ + cb_data.args.hipMalloc.size = (size_t)sizeBytes; \ +}; +// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')] +#define INIT_hipMalloc3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc3D.pitchedDevPtr = (hipPitchedPtr*)pitchedDevPtr; \ + cb_data.args.hipMalloc3D.extent = (hipExtent)extent; \ +}; +// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')] +#define INIT_hipMalloc3DArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMalloc3DArray.array = (hipArray_t*)array; \ + cb_data.args.hipMalloc3DArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMalloc3DArray.extent = (hipExtent)extent; \ + cb_data.args.hipMalloc3DArray.flags = (unsigned int)flags; \ +}; +// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] +#define INIT_hipMallocArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocArray.array = (hipArray**)array; \ + cb_data.args.hipMallocArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMallocArray.width = (size_t)width; \ + cb_data.args.hipMallocArray.height = (size_t)height; \ + cb_data.args.hipMallocArray.flags = (unsigned int)flags; \ +}; +// hipMallocAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipStream_t', 'stream')] +#define INIT_hipMallocAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocAsync.dev_ptr = (void**)dev_ptr; \ + cb_data.args.hipMallocAsync.size = (size_t)size; \ + cb_data.args.hipMallocAsync.stream = (hipStream_t)stream; \ +}; +// hipMallocFromPoolAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipMemPool_t', 'mem_pool'), ('hipStream_t', 'stream')] +#define INIT_hipMallocFromPoolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocFromPoolAsync.dev_ptr = (void**)dev_ptr; \ + cb_data.args.hipMallocFromPoolAsync.size = (size_t)size; \ + cb_data.args.hipMallocFromPoolAsync.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMallocFromPoolAsync.stream = (hipStream_t)stream; \ +}; +// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')] +#define INIT_hipMallocHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocHost.ptr = (void**)ptr; \ + cb_data.args.hipMallocHost.size = (size_t)size; \ +}; +// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] +#define INIT_hipMallocManaged_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocManaged.dev_ptr = (void**)dev_ptr; \ + cb_data.args.hipMallocManaged.size = (size_t)size; \ + cb_data.args.hipMallocManaged.flags = (unsigned int)flags; \ +}; +// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')] +#define INIT_hipMallocMipmappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocMipmappedArray.mipmappedArray = (hipMipmappedArray_t*)mipmappedArray; \ + cb_data.args.hipMallocMipmappedArray.desc = (const hipChannelFormatDesc*)desc; \ + cb_data.args.hipMallocMipmappedArray.extent = (hipExtent)extent; \ + cb_data.args.hipMallocMipmappedArray.numLevels = (unsigned int)numLevels; \ + cb_data.args.hipMallocMipmappedArray.flags = (unsigned int)flags; \ +}; +// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')] +#define INIT_hipMallocPitch_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMallocPitch.ptr = (void**)ptr; \ + cb_data.args.hipMallocPitch.pitch = (size_t*)pitch; \ + cb_data.args.hipMallocPitch.width = (size_t)width; \ + cb_data.args.hipMallocPitch.height = (size_t)height; \ +}; +// hipMemAddressFree[('void*', 'devPtr'), ('size_t', 'size')] +#define INIT_hipMemAddressFree_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAddressFree.devPtr = (void*)devPtr; \ + cb_data.args.hipMemAddressFree.size = (size_t)size; \ +}; +// hipMemAddressReserve[('void**', 'ptr'), ('size_t', 'size'), ('size_t', 'alignment'), ('void*', 'addr'), ('unsigned long long', 'flags')] +#define INIT_hipMemAddressReserve_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAddressReserve.ptr = (void**)ptr; \ + cb_data.args.hipMemAddressReserve.size = (size_t)size; \ + cb_data.args.hipMemAddressReserve.alignment = (size_t)alignment; \ + cb_data.args.hipMemAddressReserve.addr = (void*)addr; \ + cb_data.args.hipMemAddressReserve.flags = (unsigned long long)flags; \ +}; +// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')] +#define INIT_hipMemAdvise_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAdvise.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemAdvise.count = (size_t)count; \ + cb_data.args.hipMemAdvise.advice = (hipMemoryAdvise)advice; \ + cb_data.args.hipMemAdvise.device = (int)device; \ +}; +// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')] +#define INIT_hipMemAllocHost_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAllocHost.ptr = (void**)ptr; \ + cb_data.args.hipMemAllocHost.size = (size_t)size; \ +}; +// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')] +#define INIT_hipMemAllocPitch_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemAllocPitch.dptr = (hipDeviceptr_t*)dptr; \ + cb_data.args.hipMemAllocPitch.pitch = (size_t*)pitch; \ + cb_data.args.hipMemAllocPitch.widthInBytes = (size_t)widthInBytes; \ + cb_data.args.hipMemAllocPitch.height = (size_t)height; \ + cb_data.args.hipMemAllocPitch.elementSizeBytes = (unsigned int)elementSizeBytes; \ +}; +// hipMemCreate[('hipMemGenericAllocationHandle_t*', 'handle'), ('size_t', 'size'), ('const hipMemAllocationProp*', 'prop'), ('unsigned long long', 'flags')] +#define INIT_hipMemCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemCreate.handle = (hipMemGenericAllocationHandle_t*)handle; \ + cb_data.args.hipMemCreate.size = (size_t)size; \ + cb_data.args.hipMemCreate.prop = (const hipMemAllocationProp*)prop; \ + cb_data.args.hipMemCreate.flags = (unsigned long long)flags; \ +}; +// hipMemExportToShareableHandle[('void*', 'shareableHandle'), ('hipMemGenericAllocationHandle_t', 'handle'), ('hipMemAllocationHandleType', 'handleType'), ('unsigned long long', 'flags')] +#define INIT_hipMemExportToShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemExportToShareableHandle.shareableHandle = (void*)shareableHandle; \ + cb_data.args.hipMemExportToShareableHandle.handle = (hipMemGenericAllocationHandle_t)handle; \ + cb_data.args.hipMemExportToShareableHandle.handleType = (hipMemAllocationHandleType)handleType; \ + cb_data.args.hipMemExportToShareableHandle.flags = (unsigned long long)flags; \ +}; +// hipMemGetAccess[('unsigned long long*', 'flags'), ('const hipMemLocation*', 'location'), ('void*', 'ptr')] +#define INIT_hipMemGetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAccess.flags = (unsigned long long*)flags; \ + cb_data.args.hipMemGetAccess.location = (const hipMemLocation*)location; \ + cb_data.args.hipMemGetAccess.ptr = (void*)ptr; \ +}; +// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')] +#define INIT_hipMemGetAddressRange_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAddressRange.pbase = (hipDeviceptr_t*)pbase; \ + cb_data.args.hipMemGetAddressRange.psize = (size_t*)psize; \ + cb_data.args.hipMemGetAddressRange.dptr = (hipDeviceptr_t)dptr; \ +}; +// hipMemGetAllocationGranularity[('size_t*', 'granularity'), ('const hipMemAllocationProp*', 'prop'), ('hipMemAllocationGranularity_flags', 'option')] +#define INIT_hipMemGetAllocationGranularity_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAllocationGranularity.granularity = (size_t*)granularity; \ + cb_data.args.hipMemGetAllocationGranularity.prop = (const hipMemAllocationProp*)prop; \ + cb_data.args.hipMemGetAllocationGranularity.option = (hipMemAllocationGranularity_flags)option; \ +}; +// hipMemGetAllocationPropertiesFromHandle[('hipMemAllocationProp*', 'prop'), ('hipMemGenericAllocationHandle_t', 'handle')] +#define INIT_hipMemGetAllocationPropertiesFromHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetAllocationPropertiesFromHandle.prop = (hipMemAllocationProp*)prop; \ + cb_data.args.hipMemGetAllocationPropertiesFromHandle.handle = (hipMemGenericAllocationHandle_t)handle; \ +}; +// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')] +#define INIT_hipMemGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemGetInfo.free = (size_t*)free; \ + cb_data.args.hipMemGetInfo.total = (size_t*)total; \ +}; +// hipMemImportFromShareableHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'osHandle'), ('hipMemAllocationHandleType', 'shHandleType')] +#define INIT_hipMemImportFromShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemImportFromShareableHandle.handle = (hipMemGenericAllocationHandle_t*)handle; \ + cb_data.args.hipMemImportFromShareableHandle.osHandle = (void*)osHandle; \ + cb_data.args.hipMemImportFromShareableHandle.shHandleType = (hipMemAllocationHandleType)shHandleType; \ +}; +// hipMemMap[('void*', 'ptr'), ('size_t', 'size'), ('size_t', 'offset'), ('hipMemGenericAllocationHandle_t', 'handle'), ('unsigned long long', 'flags')] +#define INIT_hipMemMap_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemMap.ptr = (void*)ptr; \ + cb_data.args.hipMemMap.size = (size_t)size; \ + cb_data.args.hipMemMap.offset = (size_t)offset; \ + cb_data.args.hipMemMap.handle = (hipMemGenericAllocationHandle_t)handle; \ + cb_data.args.hipMemMap.flags = (unsigned long long)flags; \ +}; +// hipMemMapArrayAsync[('hipArrayMapInfo*', 'mapInfoList'), ('unsigned int', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemMapArrayAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemMapArrayAsync.mapInfoList = (hipArrayMapInfo*)mapInfoList; \ + cb_data.args.hipMemMapArrayAsync.count = (unsigned int)count; \ + cb_data.args.hipMemMapArrayAsync.stream = (hipStream_t)stream; \ +}; +// hipMemPoolCreate[('hipMemPool_t*', 'mem_pool'), ('const hipMemPoolProps*', 'pool_props')] +#define INIT_hipMemPoolCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolCreate.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipMemPoolCreate.pool_props = (const hipMemPoolProps*)pool_props; \ +}; +// hipMemPoolDestroy[('hipMemPool_t', 'mem_pool')] +#define INIT_hipMemPoolDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolDestroy.mem_pool = (hipMemPool_t)mem_pool; \ +}; +// hipMemPoolExportPointer[('hipMemPoolPtrExportData*', 'export_data'), ('void*', 'dev_ptr')] +#define INIT_hipMemPoolExportPointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolExportPointer.export_data = (hipMemPoolPtrExportData*)export_data; \ + cb_data.args.hipMemPoolExportPointer.dev_ptr = (void*)ptr; \ +}; +// hipMemPoolExportToShareableHandle[('void*', 'shared_handle'), ('hipMemPool_t', 'mem_pool'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] +#define INIT_hipMemPoolExportToShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolExportToShareableHandle.shared_handle = (void*)shared_handle; \ + cb_data.args.hipMemPoolExportToShareableHandle.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolExportToShareableHandle.handle_type = (hipMemAllocationHandleType)handle_type; \ + cb_data.args.hipMemPoolExportToShareableHandle.flags = (unsigned int)flags; \ +}; +// hipMemPoolGetAccess[('hipMemAccessFlags*', 'flags'), ('hipMemPool_t', 'mem_pool'), ('hipMemLocation*', 'location')] +#define INIT_hipMemPoolGetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolGetAccess.flags = (hipMemAccessFlags*)flags; \ + cb_data.args.hipMemPoolGetAccess.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolGetAccess.location = (hipMemLocation*)location; \ +}; +// hipMemPoolGetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] +#define INIT_hipMemPoolGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolGetAttribute.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolGetAttribute.attr = (hipMemPoolAttr)attr; \ + cb_data.args.hipMemPoolGetAttribute.value = (void*)value; \ +}; +// hipMemPoolImportFromShareableHandle[('hipMemPool_t*', 'mem_pool'), ('void*', 'shared_handle'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] +#define INIT_hipMemPoolImportFromShareableHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolImportFromShareableHandle.mem_pool = (hipMemPool_t*)mem_pool; \ + cb_data.args.hipMemPoolImportFromShareableHandle.shared_handle = (void*)shared_handle; \ + cb_data.args.hipMemPoolImportFromShareableHandle.handle_type = (hipMemAllocationHandleType)handle_type; \ + cb_data.args.hipMemPoolImportFromShareableHandle.flags = (unsigned int)flags; \ +}; +// hipMemPoolImportPointer[('void**', 'dev_ptr'), ('hipMemPool_t', 'mem_pool'), ('hipMemPoolPtrExportData*', 'export_data')] +#define INIT_hipMemPoolImportPointer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolImportPointer.dev_ptr = (void**)ptr; \ + cb_data.args.hipMemPoolImportPointer.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolImportPointer.export_data = (hipMemPoolPtrExportData*)export_data; \ +}; +// hipMemPoolSetAccess[('hipMemPool_t', 'mem_pool'), ('const hipMemAccessDesc*', 'desc_list'), ('size_t', 'count')] +#define INIT_hipMemPoolSetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolSetAccess.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolSetAccess.desc_list = (const hipMemAccessDesc*)desc_list; \ + cb_data.args.hipMemPoolSetAccess.count = (size_t)count; \ +}; +// hipMemPoolSetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] +#define INIT_hipMemPoolSetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolSetAttribute.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolSetAttribute.attr = (hipMemPoolAttr)attr; \ + cb_data.args.hipMemPoolSetAttribute.value = (void*)value; \ +}; +// hipMemPoolTrimTo[('hipMemPool_t', 'mem_pool'), ('size_t', 'min_bytes_to_hold')] +#define INIT_hipMemPoolTrimTo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPoolTrimTo.mem_pool = (hipMemPool_t)mem_pool; \ + cb_data.args.hipMemPoolTrimTo.min_bytes_to_hold = (size_t)min_bytes_to_hold; \ +}; +// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')] +#define INIT_hipMemPrefetchAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPrefetchAsync.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemPrefetchAsync.count = (size_t)count; \ + cb_data.args.hipMemPrefetchAsync.device = (int)device; \ + cb_data.args.hipMemPrefetchAsync.stream = (hipStream_t)stream; \ +}; +// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')] +#define INIT_hipMemPtrGetInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemPtrGetInfo.ptr = (void*)ptr; \ + cb_data.args.hipMemPtrGetInfo.size = (size_t*)size; \ +}; +// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')] +#define INIT_hipMemRangeGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRangeGetAttribute.data = (void*)data; \ + cb_data.args.hipMemRangeGetAttribute.data_size = (size_t)data_size; \ + cb_data.args.hipMemRangeGetAttribute.attribute = (hipMemRangeAttribute)attribute; \ + cb_data.args.hipMemRangeGetAttribute.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemRangeGetAttribute.count = (size_t)count; \ +}; +// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')] +#define INIT_hipMemRangeGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRangeGetAttributes.data = (void**)data; \ + cb_data.args.hipMemRangeGetAttributes.data_sizes = (size_t*)data_sizes; \ + cb_data.args.hipMemRangeGetAttributes.attributes = (hipMemRangeAttribute*)attributes; \ + cb_data.args.hipMemRangeGetAttributes.num_attributes = (size_t)num_attributes; \ + cb_data.args.hipMemRangeGetAttributes.dev_ptr = (const void*)dev_ptr; \ + cb_data.args.hipMemRangeGetAttributes.count = (size_t)count; \ +}; +// hipMemRelease[('hipMemGenericAllocationHandle_t', 'handle')] +#define INIT_hipMemRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRelease.handle = (hipMemGenericAllocationHandle_t)handle; \ +}; +// hipMemRetainAllocationHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'addr')] +#define INIT_hipMemRetainAllocationHandle_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemRetainAllocationHandle.handle = (hipMemGenericAllocationHandle_t*)handle; \ + cb_data.args.hipMemRetainAllocationHandle.addr = (void*)addr; \ +}; +// hipMemSetAccess[('void*', 'ptr'), ('size_t', 'size'), ('const hipMemAccessDesc*', 'desc'), ('size_t', 'count')] +#define INIT_hipMemSetAccess_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemSetAccess.ptr = (void*)ptr; \ + cb_data.args.hipMemSetAccess.size = (size_t)size; \ + cb_data.args.hipMemSetAccess.desc = (const hipMemAccessDesc*)desc; \ + cb_data.args.hipMemSetAccess.count = (size_t)count; \ +}; +// hipMemUnmap[('void*', 'ptr'), ('size_t', 'size')] +#define INIT_hipMemUnmap_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemUnmap.ptr = (void*)ptr; \ + cb_data.args.hipMemUnmap.size = (size_t)size; \ +}; +// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy.dst = (void*)dst; \ + cb_data.args.hipMemcpy.src = (const void*)src; \ + cb_data.args.hipMemcpy.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpy.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2D.dst = (void*)dst; \ + cb_data.args.hipMemcpy2D.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2D.src = (const void*)src; \ + cb_data.args.hipMemcpy2D.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2D.width = (size_t)width; \ + cb_data.args.hipMemcpy2D.height = (size_t)height; \ + cb_data.args.hipMemcpy2D.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpy2DAsync.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2DAsync.src = (const void*)src; \ + cb_data.args.hipMemcpy2DAsync.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DAsync.width = (size_t)width; \ + cb_data.args.hipMemcpy2DAsync.height = (size_t)height; \ + cb_data.args.hipMemcpy2DAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpy2DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy2DFromArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DFromArray.dst = (void*)dst; \ + cb_data.args.hipMemcpy2DFromArray.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2DFromArray.src = (hipArray_const_t)src; \ + cb_data.args.hipMemcpy2DFromArray.wOffset = (size_t)wOffsetSrc; \ + cb_data.args.hipMemcpy2DFromArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpy2DFromArray.width = (size_t)width; \ + cb_data.args.hipMemcpy2DFromArray.height = (size_t)height; \ + cb_data.args.hipMemcpy2DFromArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy2DFromArrayAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DFromArrayAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpy2DFromArrayAsync.dpitch = (size_t)dpitch; \ + cb_data.args.hipMemcpy2DFromArrayAsync.src = (hipArray_const_t)src; \ + cb_data.args.hipMemcpy2DFromArrayAsync.wOffset = (size_t)wOffsetSrc; \ + cb_data.args.hipMemcpy2DFromArrayAsync.hOffset = (size_t)hOffsetSrc; \ + cb_data.args.hipMemcpy2DFromArrayAsync.width = (size_t)width; \ + cb_data.args.hipMemcpy2DFromArrayAsync.height = (size_t)height; \ + cb_data.args.hipMemcpy2DFromArrayAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpy2DFromArrayAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpy2DToArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DToArray.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpy2DToArray.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpy2DToArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpy2DToArray.src = (const void*)src; \ + cb_data.args.hipMemcpy2DToArray.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DToArray.width = (size_t)width; \ + cb_data.args.hipMemcpy2DToArray.height = (size_t)height; \ + cb_data.args.hipMemcpy2DToArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpy2DToArrayAsync[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy2DToArrayAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy2DToArrayAsync.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpy2DToArrayAsync.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpy2DToArrayAsync.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpy2DToArrayAsync.src = (const void*)src; \ + cb_data.args.hipMemcpy2DToArrayAsync.spitch = (size_t)spitch; \ + cb_data.args.hipMemcpy2DToArrayAsync.width = (size_t)width; \ + cb_data.args.hipMemcpy2DToArrayAsync.height = (size_t)height; \ + cb_data.args.hipMemcpy2DToArrayAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpy2DToArrayAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')] +#define INIT_hipMemcpy3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3D.p = (const hipMemcpy3DParms*)p; \ +}; +// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpy3DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpy3DAsync.p = (const hipMemcpy3DParms*)p; \ + cb_data.args.hipMemcpy3DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] +#define INIT_hipMemcpyAtoH_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyAtoH.dst = (void*)dstHost; \ + cb_data.args.hipMemcpyAtoH.srcArray = (hipArray*)srcArray; \ + cb_data.args.hipMemcpyAtoH.srcOffset = (size_t)srcOffset; \ + cb_data.args.hipMemcpyAtoH.count = (size_t)ByteCount; \ +}; +// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyDtoD_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoD.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyDtoD.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoD.sizeBytes = (size_t)ByteCount; \ +}; +// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyDtoDAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoDAsync.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyDtoDAsync.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoDAsync.sizeBytes = (size_t)ByteCount; \ + cb_data.args.hipMemcpyDtoDAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyDtoH_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoH.dst = (void*)dstHost; \ + cb_data.args.hipMemcpyDtoH.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoH.sizeBytes = (size_t)ByteCount; \ +}; +// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyDtoHAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyDtoHAsync.dst = (void*)dstHost; \ + cb_data.args.hipMemcpyDtoHAsync.src = (hipDeviceptr_t)srcDevice; \ + cb_data.args.hipMemcpyDtoHAsync.sizeBytes = (size_t)ByteCount; \ + cb_data.args.hipMemcpyDtoHAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyFromArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromArray.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromArray.srcArray = (hipArray_const_t)src; \ + cb_data.args.hipMemcpyFromArray.wOffset = (size_t)wOffsetSrc; \ + cb_data.args.hipMemcpyFromArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpyFromArray.count = (size_t)count; \ + cb_data.args.hipMemcpyFromArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyFromSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromSymbol.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyFromSymbol.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyFromSymbol.offset = (size_t)offset; \ + cb_data.args.hipMemcpyFromSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyFromSymbolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyFromSymbolAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyFromSymbolAsync.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyFromSymbolAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyFromSymbolAsync.offset = (size_t)offset; \ + cb_data.args.hipMemcpyFromSymbolAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyFromSymbolAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] +#define INIT_hipMemcpyHtoA_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoA.dstArray = (hipArray*)dstArray; \ + cb_data.args.hipMemcpyHtoA.dstOffset = (size_t)dstOffset; \ + cb_data.args.hipMemcpyHtoA.srcHost = (const void*)srcHost; \ + cb_data.args.hipMemcpyHtoA.count = (size_t)ByteCount; \ +}; +// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyHtoD_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoD.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyHtoD.src = (void*)srcHost; \ + cb_data.args.hipMemcpyHtoD.sizeBytes = (size_t)ByteCount; \ +}; +// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyHtoDAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyHtoDAsync.dst = (hipDeviceptr_t)dstDevice; \ + cb_data.args.hipMemcpyHtoDAsync.src = (void*)srcHost; \ + cb_data.args.hipMemcpyHtoDAsync.sizeBytes = (size_t)ByteCount; \ + cb_data.args.hipMemcpyHtoDAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')] +#define INIT_hipMemcpyParam2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyParam2D.pCopy = (const hip_Memcpy2D*)pCopy; \ +}; +// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyParam2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyParam2DAsync.pCopy = (const hip_Memcpy2D*)pCopy; \ + cb_data.args.hipMemcpyParam2DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')] +#define INIT_hipMemcpyPeer_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyPeer.dst = (void*)dst; \ + cb_data.args.hipMemcpyPeer.dstDeviceId = (int)dstDevice; \ + cb_data.args.hipMemcpyPeer.src = (const void*)src; \ + cb_data.args.hipMemcpyPeer.srcDeviceId = (int)srcDevice; \ + cb_data.args.hipMemcpyPeer.sizeBytes = (size_t)sizeBytes; \ +}; +// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyPeerAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyPeerAsync.dst = (void*)dst; \ + cb_data.args.hipMemcpyPeerAsync.dstDeviceId = (int)dstDevice; \ + cb_data.args.hipMemcpyPeerAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyPeerAsync.srcDevice = (int)srcDevice; \ + cb_data.args.hipMemcpyPeerAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyPeerAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyToArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToArray.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpyToArray.wOffset = (size_t)wOffset; \ + cb_data.args.hipMemcpyToArray.hOffset = (size_t)hOffset; \ + cb_data.args.hipMemcpyToArray.src = (const void*)src; \ + cb_data.args.hipMemcpyToArray.count = (size_t)count; \ + cb_data.args.hipMemcpyToArray.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] +#define INIT_hipMemcpyToSymbol_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToSymbol.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyToSymbol.src = (const void*)src; \ + cb_data.args.hipMemcpyToSymbol.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyToSymbol.offset = (size_t)offset; \ + cb_data.args.hipMemcpyToSymbol.kind = (hipMemcpyKind)kind; \ +}; +// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyToSymbolAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyToSymbolAsync.symbol = (const void*)symbol; \ + cb_data.args.hipMemcpyToSymbolAsync.src = (const void*)src; \ + cb_data.args.hipMemcpyToSymbolAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyToSymbolAsync.offset = (size_t)offset; \ + cb_data.args.hipMemcpyToSymbolAsync.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyToSymbolAsync.stream = (hipStream_t)stream; \ +}; +// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +#define INIT_hipMemcpyWithStream_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemcpyWithStream.dst = (void*)dst; \ + cb_data.args.hipMemcpyWithStream.src = (const void*)src; \ + cb_data.args.hipMemcpyWithStream.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemcpyWithStream.kind = (hipMemcpyKind)kind; \ + cb_data.args.hipMemcpyWithStream.stream = (hipStream_t)stream; \ +}; +// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')] +#define INIT_hipMemset_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset.dst = (void*)dst; \ + cb_data.args.hipMemset.value = (int)value; \ + cb_data.args.hipMemset.sizeBytes = (size_t)sizeBytes; \ +}; +// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')] +#define INIT_hipMemset2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset2D.dst = (void*)dst; \ + cb_data.args.hipMemset2D.pitch = (size_t)pitch; \ + cb_data.args.hipMemset2D.value = (int)value; \ + cb_data.args.hipMemset2D.width = (size_t)width; \ + cb_data.args.hipMemset2D.height = (size_t)height; \ +}; +// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] +#define INIT_hipMemset2DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset2DAsync.dst = (void*)dst; \ + cb_data.args.hipMemset2DAsync.pitch = (size_t)pitch; \ + cb_data.args.hipMemset2DAsync.value = (int)value; \ + cb_data.args.hipMemset2DAsync.width = (size_t)width; \ + cb_data.args.hipMemset2DAsync.height = (size_t)height; \ + cb_data.args.hipMemset2DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')] +#define INIT_hipMemset3D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset3D.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ + cb_data.args.hipMemset3D.value = (int)value; \ + cb_data.args.hipMemset3D.extent = (hipExtent)extent; \ +}; +// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')] +#define INIT_hipMemset3DAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemset3DAsync.pitchedDevPtr = (hipPitchedPtr)pitchedDevPtr; \ + cb_data.args.hipMemset3DAsync.value = (int)value; \ + cb_data.args.hipMemset3DAsync.extent = (hipExtent)extent; \ + cb_data.args.hipMemset3DAsync.stream = (hipStream_t)stream; \ +}; +// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetAsync.dst = (void*)dst; \ + cb_data.args.hipMemsetAsync.value = (int)value; \ + cb_data.args.hipMemsetAsync.sizeBytes = (size_t)sizeBytes; \ + cb_data.args.hipMemsetAsync.stream = (hipStream_t)stream; \ +}; +// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')] +#define INIT_hipMemsetD16_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD16.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD16.value = (unsigned short)value; \ + cb_data.args.hipMemsetD16.count = (size_t)count; \ +}; +// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD16Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD16Async.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD16Async.value = (unsigned short)value; \ + cb_data.args.hipMemsetD16Async.count = (size_t)count; \ + cb_data.args.hipMemsetD16Async.stream = (hipStream_t)stream; \ +}; +// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')] +#define INIT_hipMemsetD32_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD32.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD32.value = (int)value; \ + cb_data.args.hipMemsetD32.count = (size_t)count; \ +}; +// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD32Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD32Async.dst = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD32Async.value = (int)value; \ + cb_data.args.hipMemsetD32Async.count = (size_t)count; \ + cb_data.args.hipMemsetD32Async.stream = (hipStream_t)stream; \ +}; +// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')] +#define INIT_hipMemsetD8_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD8.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD8.value = (unsigned char)value; \ + cb_data.args.hipMemsetD8.count = (size_t)count; \ +}; +// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] +#define INIT_hipMemsetD8Async_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMemsetD8Async.dest = (hipDeviceptr_t)dst; \ + cb_data.args.hipMemsetD8Async.value = (unsigned char)value; \ + cb_data.args.hipMemsetD8Async.count = (size_t)count; \ + cb_data.args.hipMemsetD8Async.stream = (hipStream_t)stream; \ +}; +// hipMipmappedArrayCreate[('hipMipmappedArray_t*', 'pHandle'), ('HIP_ARRAY3D_DESCRIPTOR*', 'pMipmappedArrayDesc'), ('unsigned int', 'numMipmapLevels')] +#define INIT_hipMipmappedArrayCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMipmappedArrayCreate.pHandle = (hipMipmappedArray_t*)mipmapped_array_pptr; \ + cb_data.args.hipMipmappedArrayCreate.pMipmappedArrayDesc = (HIP_ARRAY3D_DESCRIPTOR*)mipmapped_array_desc_ptr; \ + cb_data.args.hipMipmappedArrayCreate.numMipmapLevels = (unsigned int)num_mipmap_levels; \ +}; +// hipMipmappedArrayDestroy[('hipMipmappedArray_t', 'hMipmappedArray')] +#define INIT_hipMipmappedArrayDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMipmappedArrayDestroy.hMipmappedArray = (hipMipmappedArray_t)mipmapped_array_ptr; \ +}; +// hipMipmappedArrayGetLevel[('hipArray_t*', 'pLevelArray'), ('hipMipmappedArray_t', 'hMipMappedArray'), ('unsigned int', 'level')] +#define INIT_hipMipmappedArrayGetLevel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipMipmappedArrayGetLevel.pLevelArray = (hipArray_t*)level_array_pptr; \ + cb_data.args.hipMipmappedArrayGetLevel.hMipMappedArray = (hipMipmappedArray_t)mipmapped_array_ptr; \ + cb_data.args.hipMipmappedArrayGetLevel.level = (unsigned int)mip_level; \ +}; +// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')] +#define INIT_hipModuleGetFunction_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetFunction.function = (hipFunction_t*)hfunc; \ + cb_data.args.hipModuleGetFunction.module = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetFunction.kname = (name) ? strdup(name) : NULL; \ +}; +// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')] +#define INIT_hipModuleGetGlobal_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetGlobal.dptr = (hipDeviceptr_t*)dptr; \ + cb_data.args.hipModuleGetGlobal.bytes = (size_t*)bytes; \ + cb_data.args.hipModuleGetGlobal.hmod = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetGlobal.name = (name) ? strdup(name) : NULL; \ +}; +// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')] +#define INIT_hipModuleGetTexRef_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleGetTexRef.texRef = (textureReference**)texRef; \ + cb_data.args.hipModuleGetTexRef.hmod = (hipModule_t)hmod; \ + cb_data.args.hipModuleGetTexRef.name = (name) ? strdup(name) : NULL; \ +}; +// hipModuleLaunchCooperativeKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams')] +#define INIT_hipModuleLaunchCooperativeKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchCooperativeKernel.f = (hipFunction_t)f; \ + cb_data.args.hipModuleLaunchCooperativeKernel.gridDimX = (unsigned int)gridDimX; \ + cb_data.args.hipModuleLaunchCooperativeKernel.gridDimY = (unsigned int)gridDimY; \ + cb_data.args.hipModuleLaunchCooperativeKernel.gridDimZ = (unsigned int)gridDimZ; \ + cb_data.args.hipModuleLaunchCooperativeKernel.blockDimX = (unsigned int)blockDimX; \ + cb_data.args.hipModuleLaunchCooperativeKernel.blockDimY = (unsigned int)blockDimY; \ + cb_data.args.hipModuleLaunchCooperativeKernel.blockDimZ = (unsigned int)blockDimZ; \ + cb_data.args.hipModuleLaunchCooperativeKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ + cb_data.args.hipModuleLaunchCooperativeKernel.stream = (hipStream_t)stream; \ + cb_data.args.hipModuleLaunchCooperativeKernel.kernelParams = (void**)kernelParams; \ +}; +// hipModuleLaunchCooperativeKernelMultiDevice[('hipFunctionLaunchParams*', 'launchParamsList'), ('unsigned int', 'numDevices'), ('unsigned int', 'flags')] +#define INIT_hipModuleLaunchCooperativeKernelMultiDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList = (hipFunctionLaunchParams*)launchParamsList; \ + cb_data.args.hipModuleLaunchCooperativeKernelMultiDevice.numDevices = (unsigned int)numDevices; \ + cb_data.args.hipModuleLaunchCooperativeKernelMultiDevice.flags = (unsigned int)flags; \ +}; +// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')] +#define INIT_hipModuleLaunchKernel_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLaunchKernel.f = (hipFunction_t)f; \ + cb_data.args.hipModuleLaunchKernel.gridDimX = (unsigned int)gridDimX; \ + cb_data.args.hipModuleLaunchKernel.gridDimY = (unsigned int)gridDimY; \ + cb_data.args.hipModuleLaunchKernel.gridDimZ = (unsigned int)gridDimZ; \ + cb_data.args.hipModuleLaunchKernel.blockDimX = (unsigned int)blockDimX; \ + cb_data.args.hipModuleLaunchKernel.blockDimY = (unsigned int)blockDimY; \ + cb_data.args.hipModuleLaunchKernel.blockDimZ = (unsigned int)blockDimZ; \ + cb_data.args.hipModuleLaunchKernel.sharedMemBytes = (unsigned int)sharedMemBytes; \ + cb_data.args.hipModuleLaunchKernel.stream = (hipStream_t)hStream; \ + cb_data.args.hipModuleLaunchKernel.kernelParams = (void**)kernelParams; \ + cb_data.args.hipModuleLaunchKernel.extra = (void**)extra; \ +}; +// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')] +#define INIT_hipModuleLoad_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoad.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoad.fname = (fname) ? strdup(fname) : NULL; \ +}; +// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')] +#define INIT_hipModuleLoadData_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoadData.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoadData.image = (const void*)image; \ +}; +// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')] +#define INIT_hipModuleLoadDataEx_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleLoadDataEx.module = (hipModule_t*)module; \ + cb_data.args.hipModuleLoadDataEx.image = (const void*)image; \ + cb_data.args.hipModuleLoadDataEx.numOptions = (unsigned int)numOptions; \ + cb_data.args.hipModuleLoadDataEx.options = (hipJitOption*)options; \ + cb_data.args.hipModuleLoadDataEx.optionsValues = (void**)optionsValues; \ +}; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')] +#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ +}; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')] +#define INIT_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \ +}; +// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] +#define INIT_hipModuleOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \ +}; +// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')] +#define INIT_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize = (int*)gridSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize = (int*)blockSize; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f = (hipFunction_t)f; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit = (int)blockSizeLimit; \ + cb_data.args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags = (unsigned int)flags; \ +}; +// hipModuleUnload[('hipModule_t', 'module')] +#define INIT_hipModuleUnload_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipModuleUnload.module = (hipModule_t)hmod; \ +}; +// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')] +#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks = (int*)numBlocks; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f = (const void*)f; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize = (int)blockSize; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize = (size_t)dynamicSMemSize; \ +}; +// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')] +#define INIT_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks = (int*)numBlocks; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f = (const void*)f; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize = (int)blockSize; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize = (size_t)dynamicSMemSize; \ + cb_data.args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags = (unsigned int)flags; \ +}; +// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] +#define INIT_hipOccupancyMaxPotentialBlockSize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.gridSize = (int*)gridSize; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSize = (int*)blockSize; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.f = (const void*)f; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk = (size_t)dynSharedMemPerBlk; \ + cb_data.args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit = (int)blockSizeLimit; \ +}; +// hipPeekAtLastError[] +#define INIT_hipPeekAtLastError_CB_ARGS_DATA(cb_data) { \ +}; +// hipPointerGetAttribute[('void*', 'data'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] +#define INIT_hipPointerGetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipPointerGetAttribute.data = (void*)data; \ + cb_data.args.hipPointerGetAttribute.attribute = (hipPointer_attribute)attribute; \ + cb_data.args.hipPointerGetAttribute.ptr = (hipDeviceptr_t)ptr; \ +}; +// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')] +#define INIT_hipPointerGetAttributes_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipPointerGetAttributes.attributes = (hipPointerAttribute_t*)attributes; \ + cb_data.args.hipPointerGetAttributes.ptr = (const void*)ptr; \ +}; +// hipPointerSetAttribute[('const void*', 'value'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] +#define INIT_hipPointerSetAttribute_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipPointerSetAttribute.value = (const void*)value; \ + cb_data.args.hipPointerSetAttribute.attribute = (hipPointer_attribute)attribute; \ + cb_data.args.hipPointerSetAttribute.ptr = (hipDeviceptr_t)ptr; \ +}; +// hipProfilerStart[] +#define INIT_hipProfilerStart_CB_ARGS_DATA(cb_data) { \ +}; +// hipProfilerStop[] +#define INIT_hipProfilerStop_CB_ARGS_DATA(cb_data) { \ +}; +// hipRuntimeGetVersion[('int*', 'runtimeVersion')] +#define INIT_hipRuntimeGetVersion_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipRuntimeGetVersion.runtimeVersion = (int*)runtimeVersion; \ +}; +// hipSetDevice[('int', 'deviceId')] +#define INIT_hipSetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetDevice.deviceId = (int)device; \ +}; +// hipSetDeviceFlags[('unsigned int', 'flags')] +#define INIT_hipSetDeviceFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetDeviceFlags.flags = (unsigned int)flags; \ +}; +// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')] +#define INIT_hipSetupArgument_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSetupArgument.arg = (const void*)arg; \ + cb_data.args.hipSetupArgument.size = (size_t)size; \ + cb_data.args.hipSetupArgument.offset = (size_t)offset; \ +}; +// hipSignalExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreSignalParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] +#define INIT_hipSignalExternalSemaphoresAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipSignalExternalSemaphoresAsync.extSemArray = (const hipExternalSemaphore_t*)extSemArray; \ + cb_data.args.hipSignalExternalSemaphoresAsync.paramsArray = (const hipExternalSemaphoreSignalParams*)paramsArray; \ + cb_data.args.hipSignalExternalSemaphoresAsync.numExtSems = (unsigned int)numExtSems; \ + cb_data.args.hipSignalExternalSemaphoresAsync.stream = (hipStream_t)stream; \ +}; +// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')] +#define INIT_hipStreamAddCallback_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamAddCallback.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamAddCallback.callback = (hipStreamCallback_t)callback; \ + cb_data.args.hipStreamAddCallback.userData = (void*)userData; \ + cb_data.args.hipStreamAddCallback.flags = (unsigned int)flags; \ +}; +// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('void*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')] +#define INIT_hipStreamAttachMemAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamAttachMemAsync.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamAttachMemAsync.dev_ptr = (void*)dev_ptr; \ + cb_data.args.hipStreamAttachMemAsync.length = (size_t)length; \ + cb_data.args.hipStreamAttachMemAsync.flags = (unsigned int)flags; \ +}; +// hipStreamBeginCapture[('hipStream_t', 'stream'), ('hipStreamCaptureMode', 'mode')] +#define INIT_hipStreamBeginCapture_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamBeginCapture.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamBeginCapture.mode = (hipStreamCaptureMode)mode; \ +}; +// hipStreamCreate[('hipStream_t*', 'stream')] +#define INIT_hipStreamCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreate.stream = (hipStream_t*)stream; \ +}; +// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')] +#define INIT_hipStreamCreateWithFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreateWithFlags.stream = (hipStream_t*)stream; \ + cb_data.args.hipStreamCreateWithFlags.flags = (unsigned int)flags; \ +}; +// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')] +#define INIT_hipStreamCreateWithPriority_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamCreateWithPriority.stream = (hipStream_t*)stream; \ + cb_data.args.hipStreamCreateWithPriority.flags = (unsigned int)flags; \ + cb_data.args.hipStreamCreateWithPriority.priority = (int)priority; \ +}; +// hipStreamDestroy[('hipStream_t', 'stream')] +#define INIT_hipStreamDestroy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamDestroy.stream = (hipStream_t)stream; \ +}; +// hipStreamEndCapture[('hipStream_t', 'stream'), ('hipGraph_t*', 'pGraph')] +#define INIT_hipStreamEndCapture_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamEndCapture.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamEndCapture.pGraph = (hipGraph_t*)pGraph; \ +}; +// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')] +#define INIT_hipStreamGetCaptureInfo_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetCaptureInfo.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetCaptureInfo.pCaptureStatus = (hipStreamCaptureStatus*)pCaptureStatus; \ + cb_data.args.hipStreamGetCaptureInfo.pId = (unsigned long long*)pId; \ +}; +// hipStreamGetCaptureInfo_v2[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'captureStatus_out'), ('unsigned long long*', 'id_out'), ('hipGraph_t*', 'graph_out'), ('const hipGraphNode_t**', 'dependencies_out'), ('size_t*', 'numDependencies_out')] +#define INIT_hipStreamGetCaptureInfo_v2_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetCaptureInfo_v2.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetCaptureInfo_v2.captureStatus_out = (hipStreamCaptureStatus*)captureStatus_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.id_out = (unsigned long long*)id_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.graph_out = (hipGraph_t*)graph_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.dependencies_out = (const hipGraphNode_t**)dependencies_out; \ + cb_data.args.hipStreamGetCaptureInfo_v2.numDependencies_out = (size_t*)numDependencies_out; \ +}; +// hipStreamGetDevice[('hipStream_t', 'stream'), ('hipDevice_t*', 'device')] +#define INIT_hipStreamGetDevice_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetDevice.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetDevice.device = (hipDevice_t*)device; \ +}; +// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')] +#define INIT_hipStreamGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetFlags.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetFlags.flags = (unsigned int*)flags; \ +}; +// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')] +#define INIT_hipStreamGetPriority_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamGetPriority.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamGetPriority.priority = (int*)priority; \ +}; +// hipStreamIsCapturing[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus')] +#define INIT_hipStreamIsCapturing_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamIsCapturing.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamIsCapturing.pCaptureStatus = (hipStreamCaptureStatus*)pCaptureStatus; \ +}; +// hipStreamQuery[('hipStream_t', 'stream')] +#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \ +}; +// hipStreamSynchronize[('hipStream_t', 'stream')] +#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \ +}; +// hipStreamUpdateCaptureDependencies[('hipStream_t', 'stream'), ('hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('unsigned int', 'flags')] +#define INIT_hipStreamUpdateCaptureDependencies_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamUpdateCaptureDependencies.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamUpdateCaptureDependencies.dependencies = (hipGraphNode_t*)dependencies; \ + cb_data.args.hipStreamUpdateCaptureDependencies.numDependencies = (size_t)numDependencies; \ + cb_data.args.hipStreamUpdateCaptureDependencies.flags = (unsigned int)flags; \ +}; +// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')] +#define INIT_hipStreamWaitEvent_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWaitEvent.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWaitEvent.event = (hipEvent_t)event; \ + cb_data.args.hipStreamWaitEvent.flags = (unsigned int)flags; \ +}; +// hipStreamWaitValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags'), ('unsigned int', 'mask')] +#define INIT_hipStreamWaitValue32_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWaitValue32.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWaitValue32.ptr = (void*)ptr; \ + cb_data.args.hipStreamWaitValue32.value = (unsigned int)value; \ + cb_data.args.hipStreamWaitValue32.flags = (unsigned int)flags; \ + cb_data.args.hipStreamWaitValue32.mask = (unsigned int)mask; \ +}; +// hipStreamWaitValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags'), ('uint64_t', 'mask')] +#define INIT_hipStreamWaitValue64_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWaitValue64.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWaitValue64.ptr = (void*)ptr; \ + cb_data.args.hipStreamWaitValue64.value = (uint64_t)value; \ + cb_data.args.hipStreamWaitValue64.flags = (unsigned int)flags; \ + cb_data.args.hipStreamWaitValue64.mask = (uint64_t)mask; \ +}; +// hipStreamWriteValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags')] +#define INIT_hipStreamWriteValue32_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWriteValue32.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWriteValue32.ptr = (void*)ptr; \ + cb_data.args.hipStreamWriteValue32.value = (unsigned int)value; \ + cb_data.args.hipStreamWriteValue32.flags = (unsigned int)flags; \ +}; +// hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')] +#define INIT_hipStreamWriteValue64_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipStreamWriteValue64.stream = (hipStream_t)stream; \ + cb_data.args.hipStreamWriteValue64.ptr = (void*)ptr; \ + cb_data.args.hipStreamWriteValue64.value = (uint64_t)value; \ + cb_data.args.hipStreamWriteValue64.flags = (unsigned int)flags; \ +}; +// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetAddress.dev_ptr = (hipDeviceptr_t*)dptr; \ + cb_data.args.hipTexRefGetAddress.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetFlags[('unsigned int*', 'pFlags'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetFlags.pFlags = (unsigned int*)pFlags; \ + cb_data.args.hipTexRefGetFlags.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetFormat[('hipArray_Format*', 'pFormat'), ('int*', 'pNumChannels'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetFormat_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetFormat.pFormat = (hipArray_Format*)pFormat; \ + cb_data.args.hipTexRefGetFormat.pNumChannels = (int*)pNumChannels; \ + cb_data.args.hipTexRefGetFormat.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetMaxAnisotropy[('int*', 'pmaxAnsio'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMaxAnisotropy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetMaxAnisotropy.pmaxAnsio = (int*)pmaxAnsio; \ + cb_data.args.hipTexRefGetMaxAnisotropy.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetMipMappedArray[('hipMipmappedArray_t*', 'pArray'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMipMappedArray_CB_ARGS_DATA(cb_data) { \ +}; +// hipTexRefGetMipmapLevelBias[('float*', 'pbias'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMipmapLevelBias_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetMipmapLevelBias.pbias = (float*)pbias; \ + cb_data.args.hipTexRefGetMipmapLevelBias.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefGetMipmapLevelClamp[('float*', 'pminMipmapLevelClamp'), ('float*', 'pmaxMipmapLevelClamp'), ('const textureReference*', 'texRef')] +#define INIT_hipTexRefGetMipmapLevelClamp_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp = (float*)pminMipmapLevelClamp; \ + cb_data.args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp = (float*)pmaxMipmapLevelClamp; \ + cb_data.args.hipTexRefGetMipmapLevelClamp.texRef = (const textureReference*)texRef; \ +}; +// hipTexRefSetAddress[('size_t*', 'ByteOffset'), ('textureReference*', 'texRef'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'bytes')] +#define INIT_hipTexRefSetAddress_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetAddress.ByteOffset = (size_t*)ByteOffset; \ + cb_data.args.hipTexRefSetAddress.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetAddress.dptr = (hipDeviceptr_t)dptr; \ + cb_data.args.hipTexRefSetAddress.bytes = (size_t)bytes; \ +}; +// hipTexRefSetAddress2D[('textureReference*', 'texRef'), ('const HIP_ARRAY_DESCRIPTOR*', 'desc'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'Pitch')] +#define INIT_hipTexRefSetAddress2D_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetAddress2D.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetAddress2D.desc = (const HIP_ARRAY_DESCRIPTOR*)desc; \ + cb_data.args.hipTexRefSetAddress2D.dptr = (hipDeviceptr_t)dptr; \ + cb_data.args.hipTexRefSetAddress2D.Pitch = (size_t)Pitch; \ +}; +// hipTexRefSetArray[('textureReference*', 'tex'), ('hipArray_const_t', 'array'), ('unsigned int', 'flags')] +#define INIT_hipTexRefSetArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetArray.tex = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetArray.array = (hipArray_const_t)array; \ + cb_data.args.hipTexRefSetArray.flags = (unsigned int)flags; \ +}; +// hipTexRefSetBorderColor[('textureReference*', 'texRef'), ('float*', 'pBorderColor')] +#define INIT_hipTexRefSetBorderColor_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetBorderColor.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetBorderColor.pBorderColor = (float*)pBorderColor; \ +}; +// hipTexRefSetFlags[('textureReference*', 'texRef'), ('unsigned int', 'Flags')] +#define INIT_hipTexRefSetFlags_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetFlags.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetFlags.Flags = (unsigned int)Flags; \ +}; +// hipTexRefSetFormat[('textureReference*', 'texRef'), ('hipArray_Format', 'fmt'), ('int', 'NumPackedComponents')] +#define INIT_hipTexRefSetFormat_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetFormat.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetFormat.fmt = (hipArray_Format)fmt; \ + cb_data.args.hipTexRefSetFormat.NumPackedComponents = (int)NumPackedComponents; \ +}; +// hipTexRefSetMaxAnisotropy[('textureReference*', 'texRef'), ('unsigned int', 'maxAniso')] +#define INIT_hipTexRefSetMaxAnisotropy_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMaxAnisotropy.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMaxAnisotropy.maxAniso = (unsigned int)maxAniso; \ +}; +// hipTexRefSetMipmapLevelBias[('textureReference*', 'texRef'), ('float', 'bias')] +#define INIT_hipTexRefSetMipmapLevelBias_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMipmapLevelBias.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMipmapLevelBias.bias = (float)bias; \ +}; +// hipTexRefSetMipmapLevelClamp[('textureReference*', 'texRef'), ('float', 'minMipMapLevelClamp'), ('float', 'maxMipMapLevelClamp')] +#define INIT_hipTexRefSetMipmapLevelClamp_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMipmapLevelClamp.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMipmapLevelClamp.minMipMapLevelClamp = (float)minMipMapLevelClamp; \ + cb_data.args.hipTexRefSetMipmapLevelClamp.maxMipMapLevelClamp = (float)maxMipMapLevelClamp; \ +}; +// hipTexRefSetMipmappedArray[('textureReference*', 'texRef'), ('hipMipmappedArray*', 'mipmappedArray'), ('unsigned int', 'Flags')] +#define INIT_hipTexRefSetMipmappedArray_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipTexRefSetMipmappedArray.texRef = (textureReference*)texRef; \ + cb_data.args.hipTexRefSetMipmappedArray.mipmappedArray = (hipMipmappedArray*)mipmappedArray; \ + cb_data.args.hipTexRefSetMipmappedArray.Flags = (unsigned int)Flags; \ +}; +// hipThreadExchangeStreamCaptureMode[('hipStreamCaptureMode*', 'mode')] +#define INIT_hipThreadExchangeStreamCaptureMode_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipThreadExchangeStreamCaptureMode.mode = (hipStreamCaptureMode*)mode; \ +}; +// hipUserObjectCreate[('hipUserObject_t*', 'object_out'), ('void*', 'ptr'), ('hipHostFn_t', 'destroy'), ('unsigned int', 'initialRefcount'), ('unsigned int', 'flags')] +#define INIT_hipUserObjectCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectCreate.object_out = (hipUserObject_t*)object_out; \ + cb_data.args.hipUserObjectCreate.ptr = (void*)ptr; \ + cb_data.args.hipUserObjectCreate.destroy = (hipHostFn_t)destroy; \ + cb_data.args.hipUserObjectCreate.initialRefcount = (unsigned int)initialRefcount; \ + cb_data.args.hipUserObjectCreate.flags = (unsigned int)flags; \ +}; +// hipUserObjectRelease[('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipUserObjectRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectRelease.object = (hipUserObject_t)object; \ + cb_data.args.hipUserObjectRelease.count = (unsigned int)count; \ +}; +// hipUserObjectRetain[('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipUserObjectRetain_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectRetain.object = (hipUserObject_t)object; \ + cb_data.args.hipUserObjectRetain.count = (unsigned int)count; \ +}; +// hipWaitExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreWaitParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] +#define INIT_hipWaitExternalSemaphoresAsync_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipWaitExternalSemaphoresAsync.extSemArray = (const hipExternalSemaphore_t*)extSemArray; \ + cb_data.args.hipWaitExternalSemaphoresAsync.paramsArray = (const hipExternalSemaphoreWaitParams*)paramsArray; \ + cb_data.args.hipWaitExternalSemaphoresAsync.numExtSems = (unsigned int)numExtSems; \ + cb_data.args.hipWaitExternalSemaphoresAsync.stream = (hipStream_t)stream; \ +}; +#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data) + +// Macros for non-public API primitives +// hipBindTexture() +#define INIT_hipBindTexture_CB_ARGS_DATA(cb_data) {}; +// hipBindTexture2D() +#define INIT_hipBindTexture2D_CB_ARGS_DATA(cb_data) {}; +// hipBindTextureToArray() +#define INIT_hipBindTextureToArray_CB_ARGS_DATA(cb_data) {}; +// hipBindTextureToMipmappedArray() +#define INIT_hipBindTextureToMipmappedArray_CB_ARGS_DATA(cb_data) {}; +// hipCreateTextureObject() +#define INIT_hipCreateTextureObject_CB_ARGS_DATA(cb_data) {}; +// hipDestroyTextureObject() +#define INIT_hipDestroyTextureObject_CB_ARGS_DATA(cb_data) {}; +// hipDeviceGetCount() +#define INIT_hipDeviceGetCount_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureAlignmentOffset() +#define INIT_hipGetTextureAlignmentOffset_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureObjectResourceDesc() +#define INIT_hipGetTextureObjectResourceDesc_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureObjectResourceViewDesc() +#define INIT_hipGetTextureObjectResourceViewDesc_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureObjectTextureDesc() +#define INIT_hipGetTextureObjectTextureDesc_CB_ARGS_DATA(cb_data) {}; +// hipGetTextureReference() +#define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {}; +// hipMemcpy2DArrayToArray() +#define INIT_hipMemcpy2DArrayToArray_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyArrayToArray() +#define INIT_hipMemcpyArrayToArray_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyAtoA() +#define INIT_hipMemcpyAtoA_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyAtoD() +#define INIT_hipMemcpyAtoD_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyAtoHAsync() +#define INIT_hipMemcpyAtoHAsync_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyDtoA() +#define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyFromArrayAsync() +#define INIT_hipMemcpyFromArrayAsync_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyHtoAAsync() +#define INIT_hipMemcpyHtoAAsync_CB_ARGS_DATA(cb_data) {}; +// hipMemcpyToArrayAsync() +#define INIT_hipMemcpyToArrayAsync_CB_ARGS_DATA(cb_data) {}; +// hipModuleLaunchKernelExt() +#define INIT_hipModuleLaunchKernelExt_CB_ARGS_DATA(cb_data) {}; +// hipSetValidDevices() +#define INIT_hipSetValidDevices_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectCreate() +#define INIT_hipTexObjectCreate_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectDestroy() +#define INIT_hipTexObjectDestroy_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectGetResourceDesc() +#define INIT_hipTexObjectGetResourceDesc_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectGetResourceViewDesc() +#define INIT_hipTexObjectGetResourceViewDesc_CB_ARGS_DATA(cb_data) {}; +// hipTexObjectGetTextureDesc() +#define INIT_hipTexObjectGetTextureDesc_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetAddressMode() +#define INIT_hipTexRefGetAddressMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetArray() +#define INIT_hipTexRefGetArray_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetBorderColor() +#define INIT_hipTexRefGetBorderColor_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetFilterMode() +#define INIT_hipTexRefGetFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetMipmapFilterMode() +#define INIT_hipTexRefGetMipmapFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefGetMipmappedArray() +#define INIT_hipTexRefGetMipmappedArray_CB_ARGS_DATA(cb_data) {}; +// hipTexRefSetAddressMode() +#define INIT_hipTexRefSetAddressMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefSetFilterMode() +#define INIT_hipTexRefSetFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipTexRefSetMipmapFilterMode() +#define INIT_hipTexRefSetMipmapFilterMode_CB_ARGS_DATA(cb_data) {}; +// hipUnbindTexture() +#define INIT_hipUnbindTexture_CB_ARGS_DATA(cb_data) {}; + +#define INIT_NONE_CB_ARGS_DATA(cb_data) {}; + +#if HIP_PROF_HIP_API_STRING +// HIP API args filling helper +static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { + switch (id) { +// __hipPopCallConfiguration[('dim3*', 'gridDim'), ('dim3*', 'blockDim'), ('size_t*', 'sharedMem'), ('hipStream_t*', 'stream')] + case HIP_API_ID___hipPopCallConfiguration: + if (data->args.__hipPopCallConfiguration.gridDim) data->args.__hipPopCallConfiguration.gridDim__val = *(data->args.__hipPopCallConfiguration.gridDim); + if (data->args.__hipPopCallConfiguration.blockDim) data->args.__hipPopCallConfiguration.blockDim__val = *(data->args.__hipPopCallConfiguration.blockDim); + if (data->args.__hipPopCallConfiguration.sharedMem) data->args.__hipPopCallConfiguration.sharedMem__val = *(data->args.__hipPopCallConfiguration.sharedMem); + if (data->args.__hipPopCallConfiguration.stream) data->args.__hipPopCallConfiguration.stream__val = *(data->args.__hipPopCallConfiguration.stream); + break; +// __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] + case HIP_API_ID___hipPushCallConfiguration: + break; +// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] + case HIP_API_ID_hipArray3DCreate: + if (data->args.hipArray3DCreate.array) data->args.hipArray3DCreate.array__val = *(data->args.hipArray3DCreate.array); + if (data->args.hipArray3DCreate.pAllocateArray) data->args.hipArray3DCreate.pAllocateArray__val = *(data->args.hipArray3DCreate.pAllocateArray); + break; +// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] + case HIP_API_ID_hipArray3DGetDescriptor: + if (data->args.hipArray3DGetDescriptor.pArrayDescriptor) data->args.hipArray3DGetDescriptor.pArrayDescriptor__val = *(data->args.hipArray3DGetDescriptor.pArrayDescriptor); + if (data->args.hipArray3DGetDescriptor.array) data->args.hipArray3DGetDescriptor.array__val = *(data->args.hipArray3DGetDescriptor.array); + break; +// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] + case HIP_API_ID_hipArrayCreate: + if (data->args.hipArrayCreate.pHandle) data->args.hipArrayCreate.pHandle__val = *(data->args.hipArrayCreate.pHandle); + if (data->args.hipArrayCreate.pAllocateArray) data->args.hipArrayCreate.pAllocateArray__val = *(data->args.hipArrayCreate.pAllocateArray); + break; +// hipArrayDestroy[('hipArray*', 'array')] + case HIP_API_ID_hipArrayDestroy: + if (data->args.hipArrayDestroy.array) data->args.hipArrayDestroy.array__val = *(data->args.hipArrayDestroy.array); + break; +// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] + case HIP_API_ID_hipArrayGetDescriptor: + if (data->args.hipArrayGetDescriptor.pArrayDescriptor) data->args.hipArrayGetDescriptor.pArrayDescriptor__val = *(data->args.hipArrayGetDescriptor.pArrayDescriptor); + if (data->args.hipArrayGetDescriptor.array) data->args.hipArrayGetDescriptor.array__val = *(data->args.hipArrayGetDescriptor.array); + break; +// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray*', 'array')] + case HIP_API_ID_hipArrayGetInfo: + if (data->args.hipArrayGetInfo.desc) data->args.hipArrayGetInfo.desc__val = *(data->args.hipArrayGetInfo.desc); + if (data->args.hipArrayGetInfo.extent) data->args.hipArrayGetInfo.extent__val = *(data->args.hipArrayGetInfo.extent); + if (data->args.hipArrayGetInfo.flags) data->args.hipArrayGetInfo.flags__val = *(data->args.hipArrayGetInfo.flags); + if (data->args.hipArrayGetInfo.array) data->args.hipArrayGetInfo.array__val = *(data->args.hipArrayGetInfo.array); + break; +// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')] + case HIP_API_ID_hipChooseDevice: + if (data->args.hipChooseDevice.device) data->args.hipChooseDevice.device__val = *(data->args.hipChooseDevice.device); + if (data->args.hipChooseDevice.prop) data->args.hipChooseDevice.prop__val = *(data->args.hipChooseDevice.prop); + break; +// hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipConfigureCall: + break; +// hipCreateSurfaceObject[('hipSurfaceObject_t*', 'pSurfObject'), ('const hipResourceDesc*', 'pResDesc')] + case HIP_API_ID_hipCreateSurfaceObject: + if (data->args.hipCreateSurfaceObject.pSurfObject) data->args.hipCreateSurfaceObject.pSurfObject__val = *(data->args.hipCreateSurfaceObject.pSurfObject); + if (data->args.hipCreateSurfaceObject.pResDesc) data->args.hipCreateSurfaceObject.pResDesc__val = *(data->args.hipCreateSurfaceObject.pResDesc); + break; +// hipCtxCreate[('hipCtx_t*', 'ctx'), ('unsigned int', 'flags'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipCtxCreate: + if (data->args.hipCtxCreate.ctx) data->args.hipCtxCreate.ctx__val = *(data->args.hipCtxCreate.ctx); + break; +// hipCtxDestroy[('hipCtx_t', 'ctx')] + case HIP_API_ID_hipCtxDestroy: + break; +// hipCtxDisablePeerAccess[('hipCtx_t', 'peerCtx')] + case HIP_API_ID_hipCtxDisablePeerAccess: + break; +// hipCtxEnablePeerAccess[('hipCtx_t', 'peerCtx'), ('unsigned int', 'flags')] + case HIP_API_ID_hipCtxEnablePeerAccess: + break; +// hipCtxGetApiVersion[('hipCtx_t', 'ctx'), ('int*', 'apiVersion')] + case HIP_API_ID_hipCtxGetApiVersion: + if (data->args.hipCtxGetApiVersion.apiVersion) data->args.hipCtxGetApiVersion.apiVersion__val = *(data->args.hipCtxGetApiVersion.apiVersion); + break; +// hipCtxGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] + case HIP_API_ID_hipCtxGetCacheConfig: + if (data->args.hipCtxGetCacheConfig.cacheConfig) data->args.hipCtxGetCacheConfig.cacheConfig__val = *(data->args.hipCtxGetCacheConfig.cacheConfig); + break; +// hipCtxGetCurrent[('hipCtx_t*', 'ctx')] + case HIP_API_ID_hipCtxGetCurrent: + if (data->args.hipCtxGetCurrent.ctx) data->args.hipCtxGetCurrent.ctx__val = *(data->args.hipCtxGetCurrent.ctx); + break; +// hipCtxGetDevice[('hipDevice_t*', 'device')] + case HIP_API_ID_hipCtxGetDevice: + if (data->args.hipCtxGetDevice.device) data->args.hipCtxGetDevice.device__val = *(data->args.hipCtxGetDevice.device); + break; +// hipCtxGetFlags[('unsigned int*', 'flags')] + case HIP_API_ID_hipCtxGetFlags: + if (data->args.hipCtxGetFlags.flags) data->args.hipCtxGetFlags.flags__val = *(data->args.hipCtxGetFlags.flags); + break; +// hipCtxGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] + case HIP_API_ID_hipCtxGetSharedMemConfig: + if (data->args.hipCtxGetSharedMemConfig.pConfig) data->args.hipCtxGetSharedMemConfig.pConfig__val = *(data->args.hipCtxGetSharedMemConfig.pConfig); + break; +// hipCtxPopCurrent[('hipCtx_t*', 'ctx')] + case HIP_API_ID_hipCtxPopCurrent: + if (data->args.hipCtxPopCurrent.ctx) data->args.hipCtxPopCurrent.ctx__val = *(data->args.hipCtxPopCurrent.ctx); + break; +// hipCtxPushCurrent[('hipCtx_t', 'ctx')] + case HIP_API_ID_hipCtxPushCurrent: + break; +// hipCtxSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] + case HIP_API_ID_hipCtxSetCacheConfig: + break; +// hipCtxSetCurrent[('hipCtx_t', 'ctx')] + case HIP_API_ID_hipCtxSetCurrent: + break; +// hipCtxSetSharedMemConfig[('hipSharedMemConfig', 'config')] + case HIP_API_ID_hipCtxSetSharedMemConfig: + break; +// hipCtxSynchronize[] + case HIP_API_ID_hipCtxSynchronize: + break; +// hipDestroyExternalMemory[('hipExternalMemory_t', 'extMem')] + case HIP_API_ID_hipDestroyExternalMemory: + break; +// hipDestroyExternalSemaphore[('hipExternalSemaphore_t', 'extSem')] + case HIP_API_ID_hipDestroyExternalSemaphore: + break; +// hipDestroySurfaceObject[('hipSurfaceObject_t', 'surfaceObject')] + case HIP_API_ID_hipDestroySurfaceObject: + break; +// hipDeviceCanAccessPeer[('int*', 'canAccessPeer'), ('int', 'deviceId'), ('int', 'peerDeviceId')] + case HIP_API_ID_hipDeviceCanAccessPeer: + if (data->args.hipDeviceCanAccessPeer.canAccessPeer) data->args.hipDeviceCanAccessPeer.canAccessPeer__val = *(data->args.hipDeviceCanAccessPeer.canAccessPeer); + break; +// hipDeviceComputeCapability[('int*', 'major'), ('int*', 'minor'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceComputeCapability: + if (data->args.hipDeviceComputeCapability.major) data->args.hipDeviceComputeCapability.major__val = *(data->args.hipDeviceComputeCapability.major); + if (data->args.hipDeviceComputeCapability.minor) data->args.hipDeviceComputeCapability.minor__val = *(data->args.hipDeviceComputeCapability.minor); + break; +// hipDeviceDisablePeerAccess[('int', 'peerDeviceId')] + case HIP_API_ID_hipDeviceDisablePeerAccess: + break; +// hipDeviceEnablePeerAccess[('int', 'peerDeviceId'), ('unsigned int', 'flags')] + case HIP_API_ID_hipDeviceEnablePeerAccess: + break; +// hipDeviceGet[('hipDevice_t*', 'device'), ('int', 'ordinal')] + case HIP_API_ID_hipDeviceGet: + if (data->args.hipDeviceGet.device) data->args.hipDeviceGet.device__val = *(data->args.hipDeviceGet.device); + break; +// hipDeviceGetAttribute[('int*', 'pi'), ('hipDeviceAttribute_t', 'attr'), ('int', 'deviceId')] + case HIP_API_ID_hipDeviceGetAttribute: + if (data->args.hipDeviceGetAttribute.pi) data->args.hipDeviceGetAttribute.pi__val = *(data->args.hipDeviceGetAttribute.pi); + break; +// hipDeviceGetByPCIBusId[('int*', 'device'), ('const char*', 'pciBusId')] + case HIP_API_ID_hipDeviceGetByPCIBusId: + if (data->args.hipDeviceGetByPCIBusId.device) data->args.hipDeviceGetByPCIBusId.device__val = *(data->args.hipDeviceGetByPCIBusId.device); + if (data->args.hipDeviceGetByPCIBusId.pciBusId) data->args.hipDeviceGetByPCIBusId.pciBusId__val = *(data->args.hipDeviceGetByPCIBusId.pciBusId); + break; +// hipDeviceGetCacheConfig[('hipFuncCache_t*', 'cacheConfig')] + case HIP_API_ID_hipDeviceGetCacheConfig: + if (data->args.hipDeviceGetCacheConfig.cacheConfig) data->args.hipDeviceGetCacheConfig.cacheConfig__val = *(data->args.hipDeviceGetCacheConfig.cacheConfig); + break; +// hipDeviceGetDefaultMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] + case HIP_API_ID_hipDeviceGetDefaultMemPool: + if (data->args.hipDeviceGetDefaultMemPool.mem_pool) data->args.hipDeviceGetDefaultMemPool.mem_pool__val = *(data->args.hipDeviceGetDefaultMemPool.mem_pool); + break; +// hipDeviceGetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipDeviceGetGraphMemAttribute: + break; +// hipDeviceGetLimit[('size_t*', 'pValue'), ('hipLimit_t', 'limit')] + case HIP_API_ID_hipDeviceGetLimit: + if (data->args.hipDeviceGetLimit.pValue) data->args.hipDeviceGetLimit.pValue__val = *(data->args.hipDeviceGetLimit.pValue); + break; +// hipDeviceGetMemPool[('hipMemPool_t*', 'mem_pool'), ('int', 'device')] + case HIP_API_ID_hipDeviceGetMemPool: + if (data->args.hipDeviceGetMemPool.mem_pool) data->args.hipDeviceGetMemPool.mem_pool__val = *(data->args.hipDeviceGetMemPool.mem_pool); + break; +// hipDeviceGetName[('char*', 'name'), ('int', 'len'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceGetName: + data->args.hipDeviceGetName.name = (data->args.hipDeviceGetName.name) ? strdup(data->args.hipDeviceGetName.name) : NULL; + break; +// hipDeviceGetP2PAttribute[('int*', 'value'), ('hipDeviceP2PAttr', 'attr'), ('int', 'srcDevice'), ('int', 'dstDevice')] + case HIP_API_ID_hipDeviceGetP2PAttribute: + if (data->args.hipDeviceGetP2PAttribute.value) data->args.hipDeviceGetP2PAttribute.value__val = *(data->args.hipDeviceGetP2PAttribute.value); + break; +// hipDeviceGetPCIBusId[('char*', 'pciBusId'), ('int', 'len'), ('int', 'device')] + case HIP_API_ID_hipDeviceGetPCIBusId: + data->args.hipDeviceGetPCIBusId.pciBusId = (data->args.hipDeviceGetPCIBusId.pciBusId) ? strdup(data->args.hipDeviceGetPCIBusId.pciBusId) : NULL; + break; +// hipDeviceGetSharedMemConfig[('hipSharedMemConfig*', 'pConfig')] + case HIP_API_ID_hipDeviceGetSharedMemConfig: + if (data->args.hipDeviceGetSharedMemConfig.pConfig) data->args.hipDeviceGetSharedMemConfig.pConfig__val = *(data->args.hipDeviceGetSharedMemConfig.pConfig); + break; +// hipDeviceGetStreamPriorityRange[('int*', 'leastPriority'), ('int*', 'greatestPriority')] + case HIP_API_ID_hipDeviceGetStreamPriorityRange: + if (data->args.hipDeviceGetStreamPriorityRange.leastPriority) data->args.hipDeviceGetStreamPriorityRange.leastPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.leastPriority); + if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority) data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val = *(data->args.hipDeviceGetStreamPriorityRange.greatestPriority); + break; +// hipDeviceGetUuid[('hipUUID*', 'uuid'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceGetUuid: + if (data->args.hipDeviceGetUuid.uuid) data->args.hipDeviceGetUuid.uuid__val = *(data->args.hipDeviceGetUuid.uuid); + break; +// hipDeviceGraphMemTrim[('int', 'device')] + case HIP_API_ID_hipDeviceGraphMemTrim: + break; +// hipDevicePrimaryCtxGetState[('hipDevice_t', 'dev'), ('unsigned int*', 'flags'), ('int*', 'active')] + case HIP_API_ID_hipDevicePrimaryCtxGetState: + if (data->args.hipDevicePrimaryCtxGetState.flags) data->args.hipDevicePrimaryCtxGetState.flags__val = *(data->args.hipDevicePrimaryCtxGetState.flags); + if (data->args.hipDevicePrimaryCtxGetState.active) data->args.hipDevicePrimaryCtxGetState.active__val = *(data->args.hipDevicePrimaryCtxGetState.active); + break; +// hipDevicePrimaryCtxRelease[('hipDevice_t', 'dev')] + case HIP_API_ID_hipDevicePrimaryCtxRelease: + break; +// hipDevicePrimaryCtxReset[('hipDevice_t', 'dev')] + case HIP_API_ID_hipDevicePrimaryCtxReset: + break; +// hipDevicePrimaryCtxRetain[('hipCtx_t*', 'pctx'), ('hipDevice_t', 'dev')] + case HIP_API_ID_hipDevicePrimaryCtxRetain: + if (data->args.hipDevicePrimaryCtxRetain.pctx) data->args.hipDevicePrimaryCtxRetain.pctx__val = *(data->args.hipDevicePrimaryCtxRetain.pctx); + break; +// hipDevicePrimaryCtxSetFlags[('hipDevice_t', 'dev'), ('unsigned int', 'flags')] + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: + break; +// hipDeviceReset[] + case HIP_API_ID_hipDeviceReset: + break; +// hipDeviceSetCacheConfig[('hipFuncCache_t', 'cacheConfig')] + case HIP_API_ID_hipDeviceSetCacheConfig: + break; +// hipDeviceSetGraphMemAttribute[('int', 'device'), ('hipGraphMemAttributeType', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipDeviceSetGraphMemAttribute: + break; +// hipDeviceSetLimit[('hipLimit_t', 'limit'), ('size_t', 'value')] + case HIP_API_ID_hipDeviceSetLimit: + break; +// hipDeviceSetMemPool[('int', 'device'), ('hipMemPool_t', 'mem_pool')] + case HIP_API_ID_hipDeviceSetMemPool: + break; +// hipDeviceSetSharedMemConfig[('hipSharedMemConfig', 'config')] + case HIP_API_ID_hipDeviceSetSharedMemConfig: + break; +// hipDeviceSynchronize[] + case HIP_API_ID_hipDeviceSynchronize: + break; +// hipDeviceTotalMem[('size_t*', 'bytes'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipDeviceTotalMem: + if (data->args.hipDeviceTotalMem.bytes) data->args.hipDeviceTotalMem.bytes__val = *(data->args.hipDeviceTotalMem.bytes); + break; +// hipDriverGetVersion[('int*', 'driverVersion')] + case HIP_API_ID_hipDriverGetVersion: + if (data->args.hipDriverGetVersion.driverVersion) data->args.hipDriverGetVersion.driverVersion__val = *(data->args.hipDriverGetVersion.driverVersion); + break; +// hipDrvMemcpy2DUnaligned[('const hip_Memcpy2D*', 'pCopy')] + case HIP_API_ID_hipDrvMemcpy2DUnaligned: + if (data->args.hipDrvMemcpy2DUnaligned.pCopy) data->args.hipDrvMemcpy2DUnaligned.pCopy__val = *(data->args.hipDrvMemcpy2DUnaligned.pCopy); + break; +// hipDrvMemcpy3D[('const HIP_MEMCPY3D*', 'pCopy')] + case HIP_API_ID_hipDrvMemcpy3D: + if (data->args.hipDrvMemcpy3D.pCopy) data->args.hipDrvMemcpy3D.pCopy__val = *(data->args.hipDrvMemcpy3D.pCopy); + break; +// hipDrvMemcpy3DAsync[('const HIP_MEMCPY3D*', 'pCopy'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipDrvMemcpy3DAsync: + if (data->args.hipDrvMemcpy3DAsync.pCopy) data->args.hipDrvMemcpy3DAsync.pCopy__val = *(data->args.hipDrvMemcpy3DAsync.pCopy); + break; +// hipDrvPointerGetAttributes[('unsigned int', 'numAttributes'), ('hipPointer_attribute*', 'attributes'), ('void**', 'data'), ('hipDeviceptr_t', 'ptr')] + case HIP_API_ID_hipDrvPointerGetAttributes: + if (data->args.hipDrvPointerGetAttributes.attributes) data->args.hipDrvPointerGetAttributes.attributes__val = *(data->args.hipDrvPointerGetAttributes.attributes); + if (data->args.hipDrvPointerGetAttributes.data) data->args.hipDrvPointerGetAttributes.data__val = *(data->args.hipDrvPointerGetAttributes.data); + break; +// hipEventCreate[('hipEvent_t*', 'event')] + case HIP_API_ID_hipEventCreate: + if (data->args.hipEventCreate.event) data->args.hipEventCreate.event__val = *(data->args.hipEventCreate.event); + break; +// hipEventCreateWithFlags[('hipEvent_t*', 'event'), ('unsigned int', 'flags')] + case HIP_API_ID_hipEventCreateWithFlags: + if (data->args.hipEventCreateWithFlags.event) data->args.hipEventCreateWithFlags.event__val = *(data->args.hipEventCreateWithFlags.event); + break; +// hipEventDestroy[('hipEvent_t', 'event')] + case HIP_API_ID_hipEventDestroy: + break; +// hipEventElapsedTime[('float*', 'ms'), ('hipEvent_t', 'start'), ('hipEvent_t', 'stop')] + case HIP_API_ID_hipEventElapsedTime: + if (data->args.hipEventElapsedTime.ms) data->args.hipEventElapsedTime.ms__val = *(data->args.hipEventElapsedTime.ms); + break; +// hipEventQuery[('hipEvent_t', 'event')] + case HIP_API_ID_hipEventQuery: + break; +// hipEventRecord[('hipEvent_t', 'event'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipEventRecord: + break; +// hipEventSynchronize[('hipEvent_t', 'event')] + case HIP_API_ID_hipEventSynchronize: + break; +// hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')] + case HIP_API_ID_hipExtGetLinkTypeAndHopCount: + if (data->args.hipExtGetLinkTypeAndHopCount.linktype) data->args.hipExtGetLinkTypeAndHopCount.linktype__val = *(data->args.hipExtGetLinkTypeAndHopCount.linktype); + if (data->args.hipExtGetLinkTypeAndHopCount.hopcount) data->args.hipExtGetLinkTypeAndHopCount.hopcount__val = *(data->args.hipExtGetLinkTypeAndHopCount.hopcount); + break; +// hipExtLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('int', 'flags')] + case HIP_API_ID_hipExtLaunchKernel: + if (data->args.hipExtLaunchKernel.args) data->args.hipExtLaunchKernel.args__val = *(data->args.hipExtLaunchKernel.args); + break; +// hipExtLaunchMultiKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] + case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: + if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList) data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val = *(data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList); + break; +// hipExtMallocWithFlags[('void**', 'ptr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] + case HIP_API_ID_hipExtMallocWithFlags: + if (data->args.hipExtMallocWithFlags.ptr) data->args.hipExtMallocWithFlags.ptr__val = *(data->args.hipExtMallocWithFlags.ptr); + break; +// hipExtModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'localWorkSizeX'), ('unsigned int', 'localWorkSizeY'), ('unsigned int', 'localWorkSizeZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent'), ('unsigned int', 'flags')] + case HIP_API_ID_hipExtModuleLaunchKernel: + if (data->args.hipExtModuleLaunchKernel.kernelParams) data->args.hipExtModuleLaunchKernel.kernelParams__val = *(data->args.hipExtModuleLaunchKernel.kernelParams); + if (data->args.hipExtModuleLaunchKernel.extra) data->args.hipExtModuleLaunchKernel.extra__val = *(data->args.hipExtModuleLaunchKernel.extra); + break; +// hipExtStreamCreateWithCUMask[('hipStream_t*', 'stream'), ('unsigned int', 'cuMaskSize'), ('const unsigned int*', 'cuMask')] + case HIP_API_ID_hipExtStreamCreateWithCUMask: + if (data->args.hipExtStreamCreateWithCUMask.stream) data->args.hipExtStreamCreateWithCUMask.stream__val = *(data->args.hipExtStreamCreateWithCUMask.stream); + if (data->args.hipExtStreamCreateWithCUMask.cuMask) data->args.hipExtStreamCreateWithCUMask.cuMask__val = *(data->args.hipExtStreamCreateWithCUMask.cuMask); + break; +// hipExtStreamGetCUMask[('hipStream_t', 'stream'), ('unsigned int', 'cuMaskSize'), ('unsigned int*', 'cuMask')] + case HIP_API_ID_hipExtStreamGetCUMask: + if (data->args.hipExtStreamGetCUMask.cuMask) data->args.hipExtStreamGetCUMask.cuMask__val = *(data->args.hipExtStreamGetCUMask.cuMask); + break; +// hipExternalMemoryGetMappedBuffer[('void**', 'devPtr'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryBufferDesc*', 'bufferDesc')] + case HIP_API_ID_hipExternalMemoryGetMappedBuffer: + if (data->args.hipExternalMemoryGetMappedBuffer.devPtr) data->args.hipExternalMemoryGetMappedBuffer.devPtr__val = *(data->args.hipExternalMemoryGetMappedBuffer.devPtr); + if (data->args.hipExternalMemoryGetMappedBuffer.bufferDesc) data->args.hipExternalMemoryGetMappedBuffer.bufferDesc__val = *(data->args.hipExternalMemoryGetMappedBuffer.bufferDesc); + break; +// hipFree[('void*', 'ptr')] + case HIP_API_ID_hipFree: + break; +// hipFreeArray[('hipArray*', 'array')] + case HIP_API_ID_hipFreeArray: + if (data->args.hipFreeArray.array) data->args.hipFreeArray.array__val = *(data->args.hipFreeArray.array); + break; +// hipFreeAsync[('void*', 'dev_ptr'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipFreeAsync: + break; +// hipFreeHost[('void*', 'ptr')] + case HIP_API_ID_hipFreeHost: + break; +// hipFreeMipmappedArray[('hipMipmappedArray_t', 'mipmappedArray')] + case HIP_API_ID_hipFreeMipmappedArray: + break; +// hipFuncGetAttribute[('int*', 'value'), ('hipFunction_attribute', 'attrib'), ('hipFunction_t', 'hfunc')] + case HIP_API_ID_hipFuncGetAttribute: + if (data->args.hipFuncGetAttribute.value) data->args.hipFuncGetAttribute.value__val = *(data->args.hipFuncGetAttribute.value); + break; +// hipFuncGetAttributes[('hipFuncAttributes*', 'attr'), ('const void*', 'func')] + case HIP_API_ID_hipFuncGetAttributes: + if (data->args.hipFuncGetAttributes.attr) data->args.hipFuncGetAttributes.attr__val = *(data->args.hipFuncGetAttributes.attr); + break; +// hipFuncSetAttribute[('const void*', 'func'), ('hipFuncAttribute', 'attr'), ('int', 'value')] + case HIP_API_ID_hipFuncSetAttribute: + break; +// hipFuncSetCacheConfig[('const void*', 'func'), ('hipFuncCache_t', 'config')] + case HIP_API_ID_hipFuncSetCacheConfig: + break; +// hipFuncSetSharedMemConfig[('const void*', 'func'), ('hipSharedMemConfig', 'config')] + case HIP_API_ID_hipFuncSetSharedMemConfig: + break; +// hipGLGetDevices[('unsigned int*', 'pHipDeviceCount'), ('int*', 'pHipDevices'), ('unsigned int', 'hipDeviceCount'), ('hipGLDeviceList', 'deviceList')] + case HIP_API_ID_hipGLGetDevices: + if (data->args.hipGLGetDevices.pHipDeviceCount) data->args.hipGLGetDevices.pHipDeviceCount__val = *(data->args.hipGLGetDevices.pHipDeviceCount); + if (data->args.hipGLGetDevices.pHipDevices) data->args.hipGLGetDevices.pHipDevices__val = *(data->args.hipGLGetDevices.pHipDevices); + break; +// hipGetChannelDesc[('hipChannelFormatDesc*', 'desc'), ('hipArray_const_t', 'array')] + case HIP_API_ID_hipGetChannelDesc: + if (data->args.hipGetChannelDesc.desc) data->args.hipGetChannelDesc.desc__val = *(data->args.hipGetChannelDesc.desc); + break; +// hipGetDevice[('int*', 'deviceId')] + case HIP_API_ID_hipGetDevice: + if (data->args.hipGetDevice.deviceId) data->args.hipGetDevice.deviceId__val = *(data->args.hipGetDevice.deviceId); + break; +// hipGetDeviceCount[('int*', 'count')] + case HIP_API_ID_hipGetDeviceCount: + if (data->args.hipGetDeviceCount.count) data->args.hipGetDeviceCount.count__val = *(data->args.hipGetDeviceCount.count); + break; +// hipGetDeviceFlags[('unsigned int*', 'flags')] + case HIP_API_ID_hipGetDeviceFlags: + if (data->args.hipGetDeviceFlags.flags) data->args.hipGetDeviceFlags.flags__val = *(data->args.hipGetDeviceFlags.flags); + break; +// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')] + case HIP_API_ID_hipGetDeviceProperties: + if (data->args.hipGetDeviceProperties.props) data->args.hipGetDeviceProperties.props__val = *(data->args.hipGetDeviceProperties.props); + break; +// hipGetErrorString[] + case HIP_API_ID_hipGetErrorString: + break; +// hipGetLastError[] + case HIP_API_ID_hipGetLastError: + break; +// hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')] + case HIP_API_ID_hipGetMipmappedArrayLevel: + if (data->args.hipGetMipmappedArrayLevel.levelArray) data->args.hipGetMipmappedArrayLevel.levelArray__val = *(data->args.hipGetMipmappedArrayLevel.levelArray); + break; +// hipGetSymbolAddress[('void**', 'devPtr'), ('const void*', 'symbol')] + case HIP_API_ID_hipGetSymbolAddress: + if (data->args.hipGetSymbolAddress.devPtr) data->args.hipGetSymbolAddress.devPtr__val = *(data->args.hipGetSymbolAddress.devPtr); + break; +// hipGetSymbolSize[('size_t*', 'size'), ('const void*', 'symbol')] + case HIP_API_ID_hipGetSymbolSize: + if (data->args.hipGetSymbolSize.size) data->args.hipGetSymbolSize.size__val = *(data->args.hipGetSymbolSize.size); + break; +// hipGraphAddChildGraphNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipGraph_t', 'childGraph')] + case HIP_API_ID_hipGraphAddChildGraphNode: + if (data->args.hipGraphAddChildGraphNode.pGraphNode) data->args.hipGraphAddChildGraphNode.pGraphNode__val = *(data->args.hipGraphAddChildGraphNode.pGraphNode); + if (data->args.hipGraphAddChildGraphNode.pDependencies) data->args.hipGraphAddChildGraphNode.pDependencies__val = *(data->args.hipGraphAddChildGraphNode.pDependencies); + break; +// hipGraphAddDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] + case HIP_API_ID_hipGraphAddDependencies: + if (data->args.hipGraphAddDependencies.from) data->args.hipGraphAddDependencies.from__val = *(data->args.hipGraphAddDependencies.from); + if (data->args.hipGraphAddDependencies.to) data->args.hipGraphAddDependencies.to__val = *(data->args.hipGraphAddDependencies.to); + break; +// hipGraphAddEmptyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies')] + case HIP_API_ID_hipGraphAddEmptyNode: + if (data->args.hipGraphAddEmptyNode.pGraphNode) data->args.hipGraphAddEmptyNode.pGraphNode__val = *(data->args.hipGraphAddEmptyNode.pGraphNode); + if (data->args.hipGraphAddEmptyNode.pDependencies) data->args.hipGraphAddEmptyNode.pDependencies__val = *(data->args.hipGraphAddEmptyNode.pDependencies); + break; +// hipGraphAddEventRecordNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphAddEventRecordNode: + if (data->args.hipGraphAddEventRecordNode.pGraphNode) data->args.hipGraphAddEventRecordNode.pGraphNode__val = *(data->args.hipGraphAddEventRecordNode.pGraphNode); + if (data->args.hipGraphAddEventRecordNode.pDependencies) data->args.hipGraphAddEventRecordNode.pDependencies__val = *(data->args.hipGraphAddEventRecordNode.pDependencies); + break; +// hipGraphAddEventWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphAddEventWaitNode: + if (data->args.hipGraphAddEventWaitNode.pGraphNode) data->args.hipGraphAddEventWaitNode.pGraphNode__val = *(data->args.hipGraphAddEventWaitNode.pGraphNode); + if (data->args.hipGraphAddEventWaitNode.pDependencies) data->args.hipGraphAddEventWaitNode.pDependencies__val = *(data->args.hipGraphAddEventWaitNode.pDependencies); + break; +// hipGraphAddHostNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphAddHostNode: + if (data->args.hipGraphAddHostNode.pGraphNode) data->args.hipGraphAddHostNode.pGraphNode__val = *(data->args.hipGraphAddHostNode.pGraphNode); + if (data->args.hipGraphAddHostNode.pDependencies) data->args.hipGraphAddHostNode.pDependencies__val = *(data->args.hipGraphAddHostNode.pDependencies); + if (data->args.hipGraphAddHostNode.pNodeParams) data->args.hipGraphAddHostNode.pNodeParams__val = *(data->args.hipGraphAddHostNode.pNodeParams); + break; +// hipGraphAddKernelNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphAddKernelNode: + if (data->args.hipGraphAddKernelNode.pGraphNode) data->args.hipGraphAddKernelNode.pGraphNode__val = *(data->args.hipGraphAddKernelNode.pGraphNode); + if (data->args.hipGraphAddKernelNode.pDependencies) data->args.hipGraphAddKernelNode.pDependencies__val = *(data->args.hipGraphAddKernelNode.pDependencies); + if (data->args.hipGraphAddKernelNode.pNodeParams) data->args.hipGraphAddKernelNode.pNodeParams__val = *(data->args.hipGraphAddKernelNode.pNodeParams); + break; +// hipGraphAddMemAllocNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('hipMemAllocNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphAddMemAllocNode: + if (data->args.hipGraphAddMemAllocNode.pGraphNode) data->args.hipGraphAddMemAllocNode.pGraphNode__val = *(data->args.hipGraphAddMemAllocNode.pGraphNode); + if (data->args.hipGraphAddMemAllocNode.pDependencies) data->args.hipGraphAddMemAllocNode.pDependencies__val = *(data->args.hipGraphAddMemAllocNode.pDependencies); + if (data->args.hipGraphAddMemAllocNode.pNodeParams) data->args.hipGraphAddMemAllocNode.pNodeParams__val = *(data->args.hipGraphAddMemAllocNode.pNodeParams); + break; +// hipGraphAddMemFreeNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dev_ptr')] + case HIP_API_ID_hipGraphAddMemFreeNode: + if (data->args.hipGraphAddMemFreeNode.pGraphNode) data->args.hipGraphAddMemFreeNode.pGraphNode__val = *(data->args.hipGraphAddMemFreeNode.pGraphNode); + if (data->args.hipGraphAddMemFreeNode.pDependencies) data->args.hipGraphAddMemFreeNode.pDependencies__val = *(data->args.hipGraphAddMemFreeNode.pDependencies); + break; +// hipGraphAddMemcpyNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemcpy3DParms*', 'pCopyParams')] + case HIP_API_ID_hipGraphAddMemcpyNode: + if (data->args.hipGraphAddMemcpyNode.pGraphNode) data->args.hipGraphAddMemcpyNode.pGraphNode__val = *(data->args.hipGraphAddMemcpyNode.pGraphNode); + if (data->args.hipGraphAddMemcpyNode.pDependencies) data->args.hipGraphAddMemcpyNode.pDependencies__val = *(data->args.hipGraphAddMemcpyNode.pDependencies); + if (data->args.hipGraphAddMemcpyNode.pCopyParams) data->args.hipGraphAddMemcpyNode.pCopyParams__val = *(data->args.hipGraphAddMemcpyNode.pCopyParams); + break; +// hipGraphAddMemcpyNode1D[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphAddMemcpyNode1D: + if (data->args.hipGraphAddMemcpyNode1D.pGraphNode) data->args.hipGraphAddMemcpyNode1D.pGraphNode__val = *(data->args.hipGraphAddMemcpyNode1D.pGraphNode); + if (data->args.hipGraphAddMemcpyNode1D.pDependencies) data->args.hipGraphAddMemcpyNode1D.pDependencies__val = *(data->args.hipGraphAddMemcpyNode1D.pDependencies); + break; +// hipGraphAddMemcpyNodeFromSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol: + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode) data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode__val = *(data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode); + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies) data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies__val = *(data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies); + break; +// hipGraphAddMemcpyNodeToSymbol[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphAddMemcpyNodeToSymbol: + if (data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode) data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode__val = *(data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode); + if (data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies) data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies__val = *(data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies); + break; +// hipGraphAddMemsetNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipMemsetParams*', 'pMemsetParams')] + case HIP_API_ID_hipGraphAddMemsetNode: + if (data->args.hipGraphAddMemsetNode.pGraphNode) data->args.hipGraphAddMemsetNode.pGraphNode__val = *(data->args.hipGraphAddMemsetNode.pGraphNode); + if (data->args.hipGraphAddMemsetNode.pDependencies) data->args.hipGraphAddMemsetNode.pDependencies__val = *(data->args.hipGraphAddMemsetNode.pDependencies); + if (data->args.hipGraphAddMemsetNode.pMemsetParams) data->args.hipGraphAddMemsetNode.pMemsetParams__val = *(data->args.hipGraphAddMemsetNode.pMemsetParams); + break; +// hipGraphChildGraphNodeGetGraph[('hipGraphNode_t', 'node'), ('hipGraph_t*', 'pGraph')] + case HIP_API_ID_hipGraphChildGraphNodeGetGraph: + if (data->args.hipGraphChildGraphNodeGetGraph.pGraph) data->args.hipGraphChildGraphNodeGetGraph.pGraph__val = *(data->args.hipGraphChildGraphNodeGetGraph.pGraph); + break; +// hipGraphClone[('hipGraph_t*', 'pGraphClone'), ('hipGraph_t', 'originalGraph')] + case HIP_API_ID_hipGraphClone: + if (data->args.hipGraphClone.pGraphClone) data->args.hipGraphClone.pGraphClone__val = *(data->args.hipGraphClone.pGraphClone); + break; +// hipGraphCreate[('hipGraph_t*', 'pGraph'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphCreate: + if (data->args.hipGraphCreate.pGraph) data->args.hipGraphCreate.pGraph__val = *(data->args.hipGraphCreate.pGraph); + break; +// hipGraphDebugDotPrint[('hipGraph_t', 'graph'), ('const char*', 'path'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphDebugDotPrint: + if (data->args.hipGraphDebugDotPrint.path) data->args.hipGraphDebugDotPrint.path__val = *(data->args.hipGraphDebugDotPrint.path); + break; +// hipGraphDestroy[('hipGraph_t', 'graph')] + case HIP_API_ID_hipGraphDestroy: + break; +// hipGraphDestroyNode[('hipGraphNode_t', 'node')] + case HIP_API_ID_hipGraphDestroyNode: + break; +// hipGraphEventRecordNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] + case HIP_API_ID_hipGraphEventRecordNodeGetEvent: + if (data->args.hipGraphEventRecordNodeGetEvent.event_out) data->args.hipGraphEventRecordNodeGetEvent.event_out__val = *(data->args.hipGraphEventRecordNodeGetEvent.event_out); + break; +// hipGraphEventRecordNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphEventRecordNodeSetEvent: + break; +// hipGraphEventWaitNodeGetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t*', 'event_out')] + case HIP_API_ID_hipGraphEventWaitNodeGetEvent: + if (data->args.hipGraphEventWaitNodeGetEvent.event_out) data->args.hipGraphEventWaitNodeGetEvent.event_out__val = *(data->args.hipGraphEventWaitNodeGetEvent.event_out); + break; +// hipGraphEventWaitNodeSetEvent[('hipGraphNode_t', 'node'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphEventWaitNodeSetEvent: + break; +// hipGraphExecChildGraphNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipGraph_t', 'childGraph')] + case HIP_API_ID_hipGraphExecChildGraphNodeSetParams: + break; +// hipGraphExecDestroy[('hipGraphExec_t', 'graphExec')] + case HIP_API_ID_hipGraphExecDestroy: + break; +// hipGraphExecEventRecordNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphExecEventRecordNodeSetEvent: + break; +// hipGraphExecEventWaitNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: + break; +// hipGraphExecHostNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecHostNodeSetParams: + if (data->args.hipGraphExecHostNodeSetParams.pNodeParams) data->args.hipGraphExecHostNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecHostNodeSetParams.pNodeParams); + break; +// hipGraphExecKernelNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecKernelNodeSetParams: + if (data->args.hipGraphExecKernelNodeSetParams.pNodeParams) data->args.hipGraphExecKernelNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecKernelNodeSetParams.pNodeParams); + break; +// hipGraphExecMemcpyNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams: + if (data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams) data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams); + break; +// hipGraphExecMemcpyNodeSetParams1D[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D: + break; +// hipGraphExecMemcpyNodeSetParamsFromSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol: + break; +// hipGraphExecMemcpyNodeSetParamsToSymbol[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol: + break; +// hipGraphExecMemsetNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphExecMemsetNodeSetParams: + if (data->args.hipGraphExecMemsetNodeSetParams.pNodeParams) data->args.hipGraphExecMemsetNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecMemsetNodeSetParams.pNodeParams); + break; +// hipGraphExecUpdate[('hipGraphExec_t', 'hGraphExec'), ('hipGraph_t', 'hGraph'), ('hipGraphNode_t*', 'hErrorNode_out'), ('hipGraphExecUpdateResult*', 'updateResult_out')] + case HIP_API_ID_hipGraphExecUpdate: + if (data->args.hipGraphExecUpdate.hErrorNode_out) data->args.hipGraphExecUpdate.hErrorNode_out__val = *(data->args.hipGraphExecUpdate.hErrorNode_out); + if (data->args.hipGraphExecUpdate.updateResult_out) data->args.hipGraphExecUpdate.updateResult_out__val = *(data->args.hipGraphExecUpdate.updateResult_out); + break; +// hipGraphGetEdges[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'from'), ('hipGraphNode_t*', 'to'), ('size_t*', 'numEdges')] + case HIP_API_ID_hipGraphGetEdges: + if (data->args.hipGraphGetEdges.from) data->args.hipGraphGetEdges.from__val = *(data->args.hipGraphGetEdges.from); + if (data->args.hipGraphGetEdges.to) data->args.hipGraphGetEdges.to__val = *(data->args.hipGraphGetEdges.to); + if (data->args.hipGraphGetEdges.numEdges) data->args.hipGraphGetEdges.numEdges__val = *(data->args.hipGraphGetEdges.numEdges); + break; +// hipGraphGetNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'nodes'), ('size_t*', 'numNodes')] + case HIP_API_ID_hipGraphGetNodes: + if (data->args.hipGraphGetNodes.nodes) data->args.hipGraphGetNodes.nodes__val = *(data->args.hipGraphGetNodes.nodes); + if (data->args.hipGraphGetNodes.numNodes) data->args.hipGraphGetNodes.numNodes__val = *(data->args.hipGraphGetNodes.numNodes); + break; +// hipGraphGetRootNodes[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pRootNodes'), ('size_t*', 'pNumRootNodes')] + case HIP_API_ID_hipGraphGetRootNodes: + if (data->args.hipGraphGetRootNodes.pRootNodes) data->args.hipGraphGetRootNodes.pRootNodes__val = *(data->args.hipGraphGetRootNodes.pRootNodes); + if (data->args.hipGraphGetRootNodes.pNumRootNodes) data->args.hipGraphGetRootNodes.pNumRootNodes__val = *(data->args.hipGraphGetRootNodes.pNumRootNodes); + break; +// hipGraphHostNodeGetParams[('hipGraphNode_t', 'node'), ('hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphHostNodeGetParams: + if (data->args.hipGraphHostNodeGetParams.pNodeParams) data->args.hipGraphHostNodeGetParams.pNodeParams__val = *(data->args.hipGraphHostNodeGetParams.pNodeParams); + break; +// hipGraphHostNodeSetParams[('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphHostNodeSetParams: + if (data->args.hipGraphHostNodeSetParams.pNodeParams) data->args.hipGraphHostNodeSetParams.pNodeParams__val = *(data->args.hipGraphHostNodeSetParams.pNodeParams); + break; +// hipGraphInstantiate[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'pErrorNode'), ('char*', 'pLogBuffer'), ('size_t', 'bufferSize')] + case HIP_API_ID_hipGraphInstantiate: + if (data->args.hipGraphInstantiate.pGraphExec) data->args.hipGraphInstantiate.pGraphExec__val = *(data->args.hipGraphInstantiate.pGraphExec); + if (data->args.hipGraphInstantiate.pErrorNode) data->args.hipGraphInstantiate.pErrorNode__val = *(data->args.hipGraphInstantiate.pErrorNode); + data->args.hipGraphInstantiate.pLogBuffer = (data->args.hipGraphInstantiate.pLogBuffer) ? strdup(data->args.hipGraphInstantiate.pLogBuffer) : NULL; + break; +// hipGraphInstantiateWithFlags[('hipGraphExec_t*', 'pGraphExec'), ('hipGraph_t', 'graph'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipGraphInstantiateWithFlags: + if (data->args.hipGraphInstantiateWithFlags.pGraphExec) data->args.hipGraphInstantiateWithFlags.pGraphExec__val = *(data->args.hipGraphInstantiateWithFlags.pGraphExec); + break; +// hipGraphKernelNodeCopyAttributes[('hipGraphNode_t', 'hSrc'), ('hipGraphNode_t', 'hDst')] + case HIP_API_ID_hipGraphKernelNodeCopyAttributes: + break; +// hipGraphKernelNodeGetAttribute[('hipGraphNode_t', 'hNode'), ('hipKernelNodeAttrID', 'attr'), ('hipKernelNodeAttrValue*', 'value')] + case HIP_API_ID_hipGraphKernelNodeGetAttribute: + if (data->args.hipGraphKernelNodeGetAttribute.value) data->args.hipGraphKernelNodeGetAttribute.value__val = *(data->args.hipGraphKernelNodeGetAttribute.value); + break; +// hipGraphKernelNodeGetParams[('hipGraphNode_t', 'node'), ('hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphKernelNodeGetParams: + if (data->args.hipGraphKernelNodeGetParams.pNodeParams) data->args.hipGraphKernelNodeGetParams.pNodeParams__val = *(data->args.hipGraphKernelNodeGetParams.pNodeParams); + break; +// hipGraphKernelNodeSetAttribute[('hipGraphNode_t', 'hNode'), ('hipKernelNodeAttrID', 'attr'), ('const hipKernelNodeAttrValue*', 'value')] + case HIP_API_ID_hipGraphKernelNodeSetAttribute: + if (data->args.hipGraphKernelNodeSetAttribute.value) data->args.hipGraphKernelNodeSetAttribute.value__val = *(data->args.hipGraphKernelNodeSetAttribute.value); + break; +// hipGraphKernelNodeSetParams[('hipGraphNode_t', 'node'), ('const hipKernelNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphKernelNodeSetParams: + if (data->args.hipGraphKernelNodeSetParams.pNodeParams) data->args.hipGraphKernelNodeSetParams.pNodeParams__val = *(data->args.hipGraphKernelNodeSetParams.pNodeParams); + break; +// hipGraphLaunch[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphLaunch: + break; +// hipGraphMemAllocNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemAllocNodeParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemAllocNodeGetParams: + if (data->args.hipGraphMemAllocNodeGetParams.pNodeParams) data->args.hipGraphMemAllocNodeGetParams.pNodeParams__val = *(data->args.hipGraphMemAllocNodeGetParams.pNodeParams); + break; +// hipGraphMemFreeNodeGetParams[('hipGraphNode_t', 'node'), ('void*', 'dev_ptr')] + case HIP_API_ID_hipGraphMemFreeNodeGetParams: + break; +// hipGraphMemcpyNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemcpy3DParms*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemcpyNodeGetParams: + if (data->args.hipGraphMemcpyNodeGetParams.pNodeParams) data->args.hipGraphMemcpyNodeGetParams.pNodeParams__val = *(data->args.hipGraphMemcpyNodeGetParams.pNodeParams); + break; +// hipGraphMemcpyNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemcpy3DParms*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemcpyNodeSetParams: + if (data->args.hipGraphMemcpyNodeSetParams.pNodeParams) data->args.hipGraphMemcpyNodeSetParams.pNodeParams__val = *(data->args.hipGraphMemcpyNodeSetParams.pNodeParams); + break; +// hipGraphMemcpyNodeSetParams1D[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphMemcpyNodeSetParams1D: + break; +// hipGraphMemcpyNodeSetParamsFromSymbol[('hipGraphNode_t', 'node'), ('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol: + break; +// hipGraphMemcpyNodeSetParamsToSymbol[('hipGraphNode_t', 'node'), ('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'count'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol: + break; +// hipGraphMemsetNodeGetParams[('hipGraphNode_t', 'node'), ('hipMemsetParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemsetNodeGetParams: + if (data->args.hipGraphMemsetNodeGetParams.pNodeParams) data->args.hipGraphMemsetNodeGetParams.pNodeParams__val = *(data->args.hipGraphMemsetNodeGetParams.pNodeParams); + break; +// hipGraphMemsetNodeSetParams[('hipGraphNode_t', 'node'), ('const hipMemsetParams*', 'pNodeParams')] + case HIP_API_ID_hipGraphMemsetNodeSetParams: + if (data->args.hipGraphMemsetNodeSetParams.pNodeParams) data->args.hipGraphMemsetNodeSetParams.pNodeParams__val = *(data->args.hipGraphMemsetNodeSetParams.pNodeParams); + break; +// hipGraphNodeFindInClone[('hipGraphNode_t*', 'pNode'), ('hipGraphNode_t', 'originalNode'), ('hipGraph_t', 'clonedGraph')] + case HIP_API_ID_hipGraphNodeFindInClone: + if (data->args.hipGraphNodeFindInClone.pNode) data->args.hipGraphNodeFindInClone.pNode__val = *(data->args.hipGraphNodeFindInClone.pNode); + break; +// hipGraphNodeGetDependencies[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependencies'), ('size_t*', 'pNumDependencies')] + case HIP_API_ID_hipGraphNodeGetDependencies: + if (data->args.hipGraphNodeGetDependencies.pDependencies) data->args.hipGraphNodeGetDependencies.pDependencies__val = *(data->args.hipGraphNodeGetDependencies.pDependencies); + if (data->args.hipGraphNodeGetDependencies.pNumDependencies) data->args.hipGraphNodeGetDependencies.pNumDependencies__val = *(data->args.hipGraphNodeGetDependencies.pNumDependencies); + break; +// hipGraphNodeGetDependentNodes[('hipGraphNode_t', 'node'), ('hipGraphNode_t*', 'pDependentNodes'), ('size_t*', 'pNumDependentNodes')] + case HIP_API_ID_hipGraphNodeGetDependentNodes: + if (data->args.hipGraphNodeGetDependentNodes.pDependentNodes) data->args.hipGraphNodeGetDependentNodes.pDependentNodes__val = *(data->args.hipGraphNodeGetDependentNodes.pDependentNodes); + if (data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes) data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes__val = *(data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes); + break; +// hipGraphNodeGetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int*', 'isEnabled')] + case HIP_API_ID_hipGraphNodeGetEnabled: + if (data->args.hipGraphNodeGetEnabled.isEnabled) data->args.hipGraphNodeGetEnabled.isEnabled__val = *(data->args.hipGraphNodeGetEnabled.isEnabled); + break; +// hipGraphNodeGetType[('hipGraphNode_t', 'node'), ('hipGraphNodeType*', 'pType')] + case HIP_API_ID_hipGraphNodeGetType: + if (data->args.hipGraphNodeGetType.pType) data->args.hipGraphNodeGetType.pType__val = *(data->args.hipGraphNodeGetType.pType); + break; +// hipGraphNodeSetEnabled[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('unsigned int', 'isEnabled')] + case HIP_API_ID_hipGraphNodeSetEnabled: + break; +// hipGraphReleaseUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count')] + case HIP_API_ID_hipGraphReleaseUserObject: + break; +// hipGraphRemoveDependencies[('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'from'), ('const hipGraphNode_t*', 'to'), ('size_t', 'numDependencies')] + case HIP_API_ID_hipGraphRemoveDependencies: + if (data->args.hipGraphRemoveDependencies.from) data->args.hipGraphRemoveDependencies.from__val = *(data->args.hipGraphRemoveDependencies.from); + if (data->args.hipGraphRemoveDependencies.to) data->args.hipGraphRemoveDependencies.to__val = *(data->args.hipGraphRemoveDependencies.to); + break; +// hipGraphRetainUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphRetainUserObject: + break; +// hipGraphUpload[('hipGraphExec_t', 'graphExec'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphUpload: + break; +// hipGraphicsGLRegisterBuffer[('hipGraphicsResource**', 'resource'), ('GLuint', 'buffer'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphicsGLRegisterBuffer: + if (data->args.hipGraphicsGLRegisterBuffer.resource) data->args.hipGraphicsGLRegisterBuffer.resource__val = *(data->args.hipGraphicsGLRegisterBuffer.resource); + break; +// hipGraphicsGLRegisterImage[('hipGraphicsResource**', 'resource'), ('GLuint', 'image'), ('GLenum', 'target'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphicsGLRegisterImage: + if (data->args.hipGraphicsGLRegisterImage.resource) data->args.hipGraphicsGLRegisterImage.resource__val = *(data->args.hipGraphicsGLRegisterImage.resource); + break; +// hipGraphicsMapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphicsMapResources: + if (data->args.hipGraphicsMapResources.resources) data->args.hipGraphicsMapResources.resources__val = *(data->args.hipGraphicsMapResources.resources); + break; +// hipGraphicsResourceGetMappedPointer[('void**', 'devPtr'), ('size_t*', 'size'), ('hipGraphicsResource_t', 'resource')] + case HIP_API_ID_hipGraphicsResourceGetMappedPointer: + if (data->args.hipGraphicsResourceGetMappedPointer.devPtr) data->args.hipGraphicsResourceGetMappedPointer.devPtr__val = *(data->args.hipGraphicsResourceGetMappedPointer.devPtr); + if (data->args.hipGraphicsResourceGetMappedPointer.size) data->args.hipGraphicsResourceGetMappedPointer.size__val = *(data->args.hipGraphicsResourceGetMappedPointer.size); + break; +// hipGraphicsSubResourceGetMappedArray[('hipArray_t*', 'array'), ('hipGraphicsResource_t', 'resource'), ('unsigned int', 'arrayIndex'), ('unsigned int', 'mipLevel')] + case HIP_API_ID_hipGraphicsSubResourceGetMappedArray: + if (data->args.hipGraphicsSubResourceGetMappedArray.array) data->args.hipGraphicsSubResourceGetMappedArray.array__val = *(data->args.hipGraphicsSubResourceGetMappedArray.array); + break; +// hipGraphicsUnmapResources[('int', 'count'), ('hipGraphicsResource_t*', 'resources'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipGraphicsUnmapResources: + if (data->args.hipGraphicsUnmapResources.resources) data->args.hipGraphicsUnmapResources.resources__val = *(data->args.hipGraphicsUnmapResources.resources); + break; +// hipGraphicsUnregisterResource[('hipGraphicsResource_t', 'resource')] + case HIP_API_ID_hipGraphicsUnregisterResource: + break; +// hipHccModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'globalWorkSizeX'), ('unsigned int', 'globalWorkSizeY'), ('unsigned int', 'globalWorkSizeZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'hStream'), ('void**', 'kernelParams'), ('void**', 'extra'), ('hipEvent_t', 'startEvent'), ('hipEvent_t', 'stopEvent')] + case HIP_API_ID_hipHccModuleLaunchKernel: + if (data->args.hipHccModuleLaunchKernel.kernelParams) data->args.hipHccModuleLaunchKernel.kernelParams__val = *(data->args.hipHccModuleLaunchKernel.kernelParams); + if (data->args.hipHccModuleLaunchKernel.extra) data->args.hipHccModuleLaunchKernel.extra__val = *(data->args.hipHccModuleLaunchKernel.extra); + break; +// hipHostAlloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostAlloc: + if (data->args.hipHostAlloc.ptr) data->args.hipHostAlloc.ptr__val = *(data->args.hipHostAlloc.ptr); + break; +// hipHostFree[('void*', 'ptr')] + case HIP_API_ID_hipHostFree: + break; +// hipHostGetDevicePointer[('void**', 'devPtr'), ('void*', 'hstPtr'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostGetDevicePointer: + if (data->args.hipHostGetDevicePointer.devPtr) data->args.hipHostGetDevicePointer.devPtr__val = *(data->args.hipHostGetDevicePointer.devPtr); + break; +// hipHostGetFlags[('unsigned int*', 'flagsPtr'), ('void*', 'hostPtr')] + case HIP_API_ID_hipHostGetFlags: + if (data->args.hipHostGetFlags.flagsPtr) data->args.hipHostGetFlags.flagsPtr__val = *(data->args.hipHostGetFlags.flagsPtr); + break; +// hipHostMalloc[('void**', 'ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostMalloc: + if (data->args.hipHostMalloc.ptr) data->args.hipHostMalloc.ptr__val = *(data->args.hipHostMalloc.ptr); + break; +// hipHostRegister[('void*', 'hostPtr'), ('size_t', 'sizeBytes'), ('unsigned int', 'flags')] + case HIP_API_ID_hipHostRegister: + break; +// hipHostUnregister[('void*', 'hostPtr')] + case HIP_API_ID_hipHostUnregister: + break; +// hipImportExternalMemory[('hipExternalMemory_t*', 'extMem_out'), ('const hipExternalMemoryHandleDesc*', 'memHandleDesc')] + case HIP_API_ID_hipImportExternalMemory: + if (data->args.hipImportExternalMemory.extMem_out) data->args.hipImportExternalMemory.extMem_out__val = *(data->args.hipImportExternalMemory.extMem_out); + if (data->args.hipImportExternalMemory.memHandleDesc) data->args.hipImportExternalMemory.memHandleDesc__val = *(data->args.hipImportExternalMemory.memHandleDesc); + break; +// hipImportExternalSemaphore[('hipExternalSemaphore_t*', 'extSem_out'), ('const hipExternalSemaphoreHandleDesc*', 'semHandleDesc')] + case HIP_API_ID_hipImportExternalSemaphore: + if (data->args.hipImportExternalSemaphore.extSem_out) data->args.hipImportExternalSemaphore.extSem_out__val = *(data->args.hipImportExternalSemaphore.extSem_out); + if (data->args.hipImportExternalSemaphore.semHandleDesc) data->args.hipImportExternalSemaphore.semHandleDesc__val = *(data->args.hipImportExternalSemaphore.semHandleDesc); + break; +// hipInit[('unsigned int', 'flags')] + case HIP_API_ID_hipInit: + break; +// hipIpcCloseMemHandle[('void*', 'devPtr')] + case HIP_API_ID_hipIpcCloseMemHandle: + break; +// hipIpcGetEventHandle[('hipIpcEventHandle_t*', 'handle'), ('hipEvent_t', 'event')] + case HIP_API_ID_hipIpcGetEventHandle: + if (data->args.hipIpcGetEventHandle.handle) data->args.hipIpcGetEventHandle.handle__val = *(data->args.hipIpcGetEventHandle.handle); + break; +// hipIpcGetMemHandle[('hipIpcMemHandle_t*', 'handle'), ('void*', 'devPtr')] + case HIP_API_ID_hipIpcGetMemHandle: + if (data->args.hipIpcGetMemHandle.handle) data->args.hipIpcGetMemHandle.handle__val = *(data->args.hipIpcGetMemHandle.handle); + break; +// hipIpcOpenEventHandle[('hipEvent_t*', 'event'), ('hipIpcEventHandle_t', 'handle')] + case HIP_API_ID_hipIpcOpenEventHandle: + if (data->args.hipIpcOpenEventHandle.event) data->args.hipIpcOpenEventHandle.event__val = *(data->args.hipIpcOpenEventHandle.event); + break; +// hipIpcOpenMemHandle[('void**', 'devPtr'), ('hipIpcMemHandle_t', 'handle'), ('unsigned int', 'flags')] + case HIP_API_ID_hipIpcOpenMemHandle: + if (data->args.hipIpcOpenMemHandle.devPtr) data->args.hipIpcOpenMemHandle.devPtr__val = *(data->args.hipIpcOpenMemHandle.devPtr); + break; +// hipLaunchByPtr[('const void*', 'hostFunction')] + case HIP_API_ID_hipLaunchByPtr: + break; +// hipLaunchCooperativeKernel[('const void*', 'f'), ('dim3', 'gridDim'), ('dim3', 'blockDimX'), ('void**', 'kernelParams'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipLaunchCooperativeKernel: + if (data->args.hipLaunchCooperativeKernel.kernelParams) data->args.hipLaunchCooperativeKernel.kernelParams__val = *(data->args.hipLaunchCooperativeKernel.kernelParams); + break; +// hipLaunchCooperativeKernelMultiDevice[('hipLaunchParams*', 'launchParamsList'), ('int', 'numDevices'), ('unsigned int', 'flags')] + case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: + if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList) data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val = *(data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList); + break; +// hipLaunchHostFunc[('hipStream_t', 'stream'), ('hipHostFn_t', 'fn'), ('void*', 'userData')] + case HIP_API_ID_hipLaunchHostFunc: + break; +// hipLaunchKernel[('const void*', 'function_address'), ('dim3', 'numBlocks'), ('dim3', 'dimBlocks'), ('void**', 'args'), ('size_t', 'sharedMemBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipLaunchKernel: + if (data->args.hipLaunchKernel.args) data->args.hipLaunchKernel.args__val = *(data->args.hipLaunchKernel.args); + break; +// hipMalloc[('void**', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMalloc: + if (data->args.hipMalloc.ptr) data->args.hipMalloc.ptr__val = *(data->args.hipMalloc.ptr); + break; +// hipMalloc3D[('hipPitchedPtr*', 'pitchedDevPtr'), ('hipExtent', 'extent')] + case HIP_API_ID_hipMalloc3D: + if (data->args.hipMalloc3D.pitchedDevPtr) data->args.hipMalloc3D.pitchedDevPtr__val = *(data->args.hipMalloc3D.pitchedDevPtr); + break; +// hipMalloc3DArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMalloc3DArray: + if (data->args.hipMalloc3DArray.array) data->args.hipMalloc3DArray.array__val = *(data->args.hipMalloc3DArray.array); + if (data->args.hipMalloc3DArray.desc) data->args.hipMalloc3DArray.desc__val = *(data->args.hipMalloc3DArray.desc); + break; +// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMallocArray: + if (data->args.hipMallocArray.array) data->args.hipMallocArray.array__val = *(data->args.hipMallocArray.array); + if (data->args.hipMallocArray.desc) data->args.hipMallocArray.desc__val = *(data->args.hipMallocArray.desc); + break; +// hipMallocAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMallocAsync: + if (data->args.hipMallocAsync.dev_ptr) data->args.hipMallocAsync.dev_ptr__val = *(data->args.hipMallocAsync.dev_ptr); + break; +// hipMallocFromPoolAsync[('void**', 'dev_ptr'), ('size_t', 'size'), ('hipMemPool_t', 'mem_pool'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMallocFromPoolAsync: + if (data->args.hipMallocFromPoolAsync.dev_ptr) data->args.hipMallocFromPoolAsync.dev_ptr__val = *(data->args.hipMallocFromPoolAsync.dev_ptr); + break; +// hipMallocHost[('void**', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMallocHost: + if (data->args.hipMallocHost.ptr) data->args.hipMallocHost.ptr__val = *(data->args.hipMallocHost.ptr); + break; +// hipMallocManaged[('void**', 'dev_ptr'), ('size_t', 'size'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMallocManaged: + if (data->args.hipMallocManaged.dev_ptr) data->args.hipMallocManaged.dev_ptr__val = *(data->args.hipMallocManaged.dev_ptr); + break; +// hipMallocMipmappedArray[('hipMipmappedArray_t*', 'mipmappedArray'), ('const hipChannelFormatDesc*', 'desc'), ('hipExtent', 'extent'), ('unsigned int', 'numLevels'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMallocMipmappedArray: + if (data->args.hipMallocMipmappedArray.mipmappedArray) data->args.hipMallocMipmappedArray.mipmappedArray__val = *(data->args.hipMallocMipmappedArray.mipmappedArray); + if (data->args.hipMallocMipmappedArray.desc) data->args.hipMallocMipmappedArray.desc__val = *(data->args.hipMallocMipmappedArray.desc); + break; +// hipMallocPitch[('void**', 'ptr'), ('size_t*', 'pitch'), ('size_t', 'width'), ('size_t', 'height')] + case HIP_API_ID_hipMallocPitch: + if (data->args.hipMallocPitch.ptr) data->args.hipMallocPitch.ptr__val = *(data->args.hipMallocPitch.ptr); + if (data->args.hipMallocPitch.pitch) data->args.hipMallocPitch.pitch__val = *(data->args.hipMallocPitch.pitch); + break; +// hipMemAddressFree[('void*', 'devPtr'), ('size_t', 'size')] + case HIP_API_ID_hipMemAddressFree: + break; +// hipMemAddressReserve[('void**', 'ptr'), ('size_t', 'size'), ('size_t', 'alignment'), ('void*', 'addr'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemAddressReserve: + if (data->args.hipMemAddressReserve.ptr) data->args.hipMemAddressReserve.ptr__val = *(data->args.hipMemAddressReserve.ptr); + break; +// hipMemAdvise[('const void*', 'dev_ptr'), ('size_t', 'count'), ('hipMemoryAdvise', 'advice'), ('int', 'device')] + case HIP_API_ID_hipMemAdvise: + break; +// hipMemAllocHost[('void**', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMemAllocHost: + if (data->args.hipMemAllocHost.ptr) data->args.hipMemAllocHost.ptr__val = *(data->args.hipMemAllocHost.ptr); + break; +// hipMemAllocPitch[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'pitch'), ('size_t', 'widthInBytes'), ('size_t', 'height'), ('unsigned int', 'elementSizeBytes')] + case HIP_API_ID_hipMemAllocPitch: + if (data->args.hipMemAllocPitch.dptr) data->args.hipMemAllocPitch.dptr__val = *(data->args.hipMemAllocPitch.dptr); + if (data->args.hipMemAllocPitch.pitch) data->args.hipMemAllocPitch.pitch__val = *(data->args.hipMemAllocPitch.pitch); + break; +// hipMemCreate[('hipMemGenericAllocationHandle_t*', 'handle'), ('size_t', 'size'), ('const hipMemAllocationProp*', 'prop'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemCreate: + if (data->args.hipMemCreate.handle) data->args.hipMemCreate.handle__val = *(data->args.hipMemCreate.handle); + if (data->args.hipMemCreate.prop) data->args.hipMemCreate.prop__val = *(data->args.hipMemCreate.prop); + break; +// hipMemExportToShareableHandle[('void*', 'shareableHandle'), ('hipMemGenericAllocationHandle_t', 'handle'), ('hipMemAllocationHandleType', 'handleType'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemExportToShareableHandle: + break; +// hipMemGetAccess[('unsigned long long*', 'flags'), ('const hipMemLocation*', 'location'), ('void*', 'ptr')] + case HIP_API_ID_hipMemGetAccess: + if (data->args.hipMemGetAccess.flags) data->args.hipMemGetAccess.flags__val = *(data->args.hipMemGetAccess.flags); + if (data->args.hipMemGetAccess.location) data->args.hipMemGetAccess.location__val = *(data->args.hipMemGetAccess.location); + break; +// hipMemGetAddressRange[('hipDeviceptr_t*', 'pbase'), ('size_t*', 'psize'), ('hipDeviceptr_t', 'dptr')] + case HIP_API_ID_hipMemGetAddressRange: + if (data->args.hipMemGetAddressRange.pbase) data->args.hipMemGetAddressRange.pbase__val = *(data->args.hipMemGetAddressRange.pbase); + if (data->args.hipMemGetAddressRange.psize) data->args.hipMemGetAddressRange.psize__val = *(data->args.hipMemGetAddressRange.psize); + break; +// hipMemGetAllocationGranularity[('size_t*', 'granularity'), ('const hipMemAllocationProp*', 'prop'), ('hipMemAllocationGranularity_flags', 'option')] + case HIP_API_ID_hipMemGetAllocationGranularity: + if (data->args.hipMemGetAllocationGranularity.granularity) data->args.hipMemGetAllocationGranularity.granularity__val = *(data->args.hipMemGetAllocationGranularity.granularity); + if (data->args.hipMemGetAllocationGranularity.prop) data->args.hipMemGetAllocationGranularity.prop__val = *(data->args.hipMemGetAllocationGranularity.prop); + break; +// hipMemGetAllocationPropertiesFromHandle[('hipMemAllocationProp*', 'prop'), ('hipMemGenericAllocationHandle_t', 'handle')] + case HIP_API_ID_hipMemGetAllocationPropertiesFromHandle: + if (data->args.hipMemGetAllocationPropertiesFromHandle.prop) data->args.hipMemGetAllocationPropertiesFromHandle.prop__val = *(data->args.hipMemGetAllocationPropertiesFromHandle.prop); + break; +// hipMemGetInfo[('size_t*', 'free'), ('size_t*', 'total')] + case HIP_API_ID_hipMemGetInfo: + if (data->args.hipMemGetInfo.free) data->args.hipMemGetInfo.free__val = *(data->args.hipMemGetInfo.free); + if (data->args.hipMemGetInfo.total) data->args.hipMemGetInfo.total__val = *(data->args.hipMemGetInfo.total); + break; +// hipMemImportFromShareableHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'osHandle'), ('hipMemAllocationHandleType', 'shHandleType')] + case HIP_API_ID_hipMemImportFromShareableHandle: + if (data->args.hipMemImportFromShareableHandle.handle) data->args.hipMemImportFromShareableHandle.handle__val = *(data->args.hipMemImportFromShareableHandle.handle); + break; +// hipMemMap[('void*', 'ptr'), ('size_t', 'size'), ('size_t', 'offset'), ('hipMemGenericAllocationHandle_t', 'handle'), ('unsigned long long', 'flags')] + case HIP_API_ID_hipMemMap: + break; +// hipMemMapArrayAsync[('hipArrayMapInfo*', 'mapInfoList'), ('unsigned int', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemMapArrayAsync: + if (data->args.hipMemMapArrayAsync.mapInfoList) data->args.hipMemMapArrayAsync.mapInfoList__val = *(data->args.hipMemMapArrayAsync.mapInfoList); + break; +// hipMemPoolCreate[('hipMemPool_t*', 'mem_pool'), ('const hipMemPoolProps*', 'pool_props')] + case HIP_API_ID_hipMemPoolCreate: + if (data->args.hipMemPoolCreate.mem_pool) data->args.hipMemPoolCreate.mem_pool__val = *(data->args.hipMemPoolCreate.mem_pool); + if (data->args.hipMemPoolCreate.pool_props) data->args.hipMemPoolCreate.pool_props__val = *(data->args.hipMemPoolCreate.pool_props); + break; +// hipMemPoolDestroy[('hipMemPool_t', 'mem_pool')] + case HIP_API_ID_hipMemPoolDestroy: + break; +// hipMemPoolExportPointer[('hipMemPoolPtrExportData*', 'export_data'), ('void*', 'dev_ptr')] + case HIP_API_ID_hipMemPoolExportPointer: + if (data->args.hipMemPoolExportPointer.export_data) data->args.hipMemPoolExportPointer.export_data__val = *(data->args.hipMemPoolExportPointer.export_data); + break; +// hipMemPoolExportToShareableHandle[('void*', 'shared_handle'), ('hipMemPool_t', 'mem_pool'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMemPoolExportToShareableHandle: + break; +// hipMemPoolGetAccess[('hipMemAccessFlags*', 'flags'), ('hipMemPool_t', 'mem_pool'), ('hipMemLocation*', 'location')] + case HIP_API_ID_hipMemPoolGetAccess: + if (data->args.hipMemPoolGetAccess.flags) data->args.hipMemPoolGetAccess.flags__val = *(data->args.hipMemPoolGetAccess.flags); + if (data->args.hipMemPoolGetAccess.location) data->args.hipMemPoolGetAccess.location__val = *(data->args.hipMemPoolGetAccess.location); + break; +// hipMemPoolGetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipMemPoolGetAttribute: + break; +// hipMemPoolImportFromShareableHandle[('hipMemPool_t*', 'mem_pool'), ('void*', 'shared_handle'), ('hipMemAllocationHandleType', 'handle_type'), ('unsigned int', 'flags')] + case HIP_API_ID_hipMemPoolImportFromShareableHandle: + if (data->args.hipMemPoolImportFromShareableHandle.mem_pool) data->args.hipMemPoolImportFromShareableHandle.mem_pool__val = *(data->args.hipMemPoolImportFromShareableHandle.mem_pool); + break; +// hipMemPoolImportPointer[('void**', 'dev_ptr'), ('hipMemPool_t', 'mem_pool'), ('hipMemPoolPtrExportData*', 'export_data')] + case HIP_API_ID_hipMemPoolImportPointer: + if (data->args.hipMemPoolImportPointer.dev_ptr) data->args.hipMemPoolImportPointer.dev_ptr__val = *(data->args.hipMemPoolImportPointer.dev_ptr); + if (data->args.hipMemPoolImportPointer.export_data) data->args.hipMemPoolImportPointer.export_data__val = *(data->args.hipMemPoolImportPointer.export_data); + break; +// hipMemPoolSetAccess[('hipMemPool_t', 'mem_pool'), ('const hipMemAccessDesc*', 'desc_list'), ('size_t', 'count')] + case HIP_API_ID_hipMemPoolSetAccess: + if (data->args.hipMemPoolSetAccess.desc_list) data->args.hipMemPoolSetAccess.desc_list__val = *(data->args.hipMemPoolSetAccess.desc_list); + break; +// hipMemPoolSetAttribute[('hipMemPool_t', 'mem_pool'), ('hipMemPoolAttr', 'attr'), ('void*', 'value')] + case HIP_API_ID_hipMemPoolSetAttribute: + break; +// hipMemPoolTrimTo[('hipMemPool_t', 'mem_pool'), ('size_t', 'min_bytes_to_hold')] + case HIP_API_ID_hipMemPoolTrimTo: + break; +// hipMemPrefetchAsync[('const void*', 'dev_ptr'), ('size_t', 'count'), ('int', 'device'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemPrefetchAsync: + break; +// hipMemPtrGetInfo[('void*', 'ptr'), ('size_t*', 'size')] + case HIP_API_ID_hipMemPtrGetInfo: + if (data->args.hipMemPtrGetInfo.size) data->args.hipMemPtrGetInfo.size__val = *(data->args.hipMemPtrGetInfo.size); + break; +// hipMemRangeGetAttribute[('void*', 'data'), ('size_t', 'data_size'), ('hipMemRangeAttribute', 'attribute'), ('const void*', 'dev_ptr'), ('size_t', 'count')] + case HIP_API_ID_hipMemRangeGetAttribute: + break; +// hipMemRangeGetAttributes[('void**', 'data'), ('size_t*', 'data_sizes'), ('hipMemRangeAttribute*', 'attributes'), ('size_t', 'num_attributes'), ('const void*', 'dev_ptr'), ('size_t', 'count')] + case HIP_API_ID_hipMemRangeGetAttributes: + if (data->args.hipMemRangeGetAttributes.data) data->args.hipMemRangeGetAttributes.data__val = *(data->args.hipMemRangeGetAttributes.data); + if (data->args.hipMemRangeGetAttributes.data_sizes) data->args.hipMemRangeGetAttributes.data_sizes__val = *(data->args.hipMemRangeGetAttributes.data_sizes); + if (data->args.hipMemRangeGetAttributes.attributes) data->args.hipMemRangeGetAttributes.attributes__val = *(data->args.hipMemRangeGetAttributes.attributes); + break; +// hipMemRelease[('hipMemGenericAllocationHandle_t', 'handle')] + case HIP_API_ID_hipMemRelease: + break; +// hipMemRetainAllocationHandle[('hipMemGenericAllocationHandle_t*', 'handle'), ('void*', 'addr')] + case HIP_API_ID_hipMemRetainAllocationHandle: + if (data->args.hipMemRetainAllocationHandle.handle) data->args.hipMemRetainAllocationHandle.handle__val = *(data->args.hipMemRetainAllocationHandle.handle); + break; +// hipMemSetAccess[('void*', 'ptr'), ('size_t', 'size'), ('const hipMemAccessDesc*', 'desc'), ('size_t', 'count')] + case HIP_API_ID_hipMemSetAccess: + if (data->args.hipMemSetAccess.desc) data->args.hipMemSetAccess.desc__val = *(data->args.hipMemSetAccess.desc); + break; +// hipMemUnmap[('void*', 'ptr'), ('size_t', 'size')] + case HIP_API_ID_hipMemUnmap: + break; +// hipMemcpy[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy: + break; +// hipMemcpy2D[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy2D: + break; +// hipMemcpy2DAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy2DAsync: + break; +// hipMemcpy2DFromArray[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy2DFromArray: + break; +// hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy2DFromArrayAsync: + break; +// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpy2DToArray: + if (data->args.hipMemcpy2DToArray.dst) data->args.hipMemcpy2DToArray.dst__val = *(data->args.hipMemcpy2DToArray.dst); + break; +// hipMemcpy2DToArrayAsync[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy2DToArrayAsync: + if (data->args.hipMemcpy2DToArrayAsync.dst) data->args.hipMemcpy2DToArrayAsync.dst__val = *(data->args.hipMemcpy2DToArrayAsync.dst); + break; +// hipMemcpy3D[('const hipMemcpy3DParms*', 'p')] + case HIP_API_ID_hipMemcpy3D: + if (data->args.hipMemcpy3D.p) data->args.hipMemcpy3D.p__val = *(data->args.hipMemcpy3D.p); + break; +// hipMemcpy3DAsync[('const hipMemcpy3DParms*', 'p'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpy3DAsync: + if (data->args.hipMemcpy3DAsync.p) data->args.hipMemcpy3DAsync.p__val = *(data->args.hipMemcpy3DAsync.p); + break; +// hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyAsync: + break; +// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] + case HIP_API_ID_hipMemcpyAtoH: + if (data->args.hipMemcpyAtoH.srcArray) data->args.hipMemcpyAtoH.srcArray__val = *(data->args.hipMemcpyAtoH.srcArray); + break; +// hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyDtoD: + break; +// hipMemcpyDtoDAsync[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyDtoDAsync: + break; +// hipMemcpyDtoH[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyDtoH: + break; +// hipMemcpyDtoHAsync[('void*', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyDtoHAsync: + break; +// hipMemcpyFromArray[('void*', 'dst'), ('hipArray_const_t', 'srcArray'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyFromArray: + break; +// hipMemcpyFromSymbol[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyFromSymbol: + break; +// hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyFromSymbolAsync: + break; +// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] + case HIP_API_ID_hipMemcpyHtoA: + if (data->args.hipMemcpyHtoA.dstArray) data->args.hipMemcpyHtoA.dstArray__val = *(data->args.hipMemcpyHtoA.dstArray); + break; +// hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyHtoD: + break; +// hipMemcpyHtoDAsync[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyHtoDAsync: + break; +// hipMemcpyParam2D[('const hip_Memcpy2D*', 'pCopy')] + case HIP_API_ID_hipMemcpyParam2D: + if (data->args.hipMemcpyParam2D.pCopy) data->args.hipMemcpyParam2D.pCopy__val = *(data->args.hipMemcpyParam2D.pCopy); + break; +// hipMemcpyParam2DAsync[('const hip_Memcpy2D*', 'pCopy'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyParam2DAsync: + if (data->args.hipMemcpyParam2DAsync.pCopy) data->args.hipMemcpyParam2DAsync.pCopy__val = *(data->args.hipMemcpyParam2DAsync.pCopy); + break; +// hipMemcpyPeer[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDeviceId'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemcpyPeer: + break; +// hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyPeerAsync: + break; +// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyToArray: + if (data->args.hipMemcpyToArray.dst) data->args.hipMemcpyToArray.dst__val = *(data->args.hipMemcpyToArray.dst); + break; +// hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] + case HIP_API_ID_hipMemcpyToSymbol: + break; +// hipMemcpyToSymbolAsync[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyToSymbolAsync: + break; +// hipMemcpyWithStream[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemcpyWithStream: + break; +// hipMemset[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes')] + case HIP_API_ID_hipMemset: + break; +// hipMemset2D[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height')] + case HIP_API_ID_hipMemset2D: + break; +// hipMemset2DAsync[('void*', 'dst'), ('size_t', 'pitch'), ('int', 'value'), ('size_t', 'width'), ('size_t', 'height'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemset2DAsync: + break; +// hipMemset3D[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent')] + case HIP_API_ID_hipMemset3D: + break; +// hipMemset3DAsync[('hipPitchedPtr', 'pitchedDevPtr'), ('int', 'value'), ('hipExtent', 'extent'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemset3DAsync: + break; +// hipMemsetAsync[('void*', 'dst'), ('int', 'value'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetAsync: + break; +// hipMemsetD16[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count')] + case HIP_API_ID_hipMemsetD16: + break; +// hipMemsetD16Async[('hipDeviceptr_t', 'dest'), ('unsigned short', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD16Async: + break; +// hipMemsetD32[('hipDeviceptr_t', 'dest'), ('int', 'value'), ('size_t', 'count')] + case HIP_API_ID_hipMemsetD32: + break; +// hipMemsetD32Async[('hipDeviceptr_t', 'dst'), ('int', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD32Async: + break; +// hipMemsetD8[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count')] + case HIP_API_ID_hipMemsetD8: + break; +// hipMemsetD8Async[('hipDeviceptr_t', 'dest'), ('unsigned char', 'value'), ('size_t', 'count'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipMemsetD8Async: + break; +// hipMipmappedArrayCreate[('hipMipmappedArray_t*', 'pHandle'), ('HIP_ARRAY3D_DESCRIPTOR*', 'pMipmappedArrayDesc'), ('unsigned int', 'numMipmapLevels')] + case HIP_API_ID_hipMipmappedArrayCreate: + if (data->args.hipMipmappedArrayCreate.pHandle) data->args.hipMipmappedArrayCreate.pHandle__val = *(data->args.hipMipmappedArrayCreate.pHandle); + if (data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc) data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc__val = *(data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc); + break; +// hipMipmappedArrayDestroy[('hipMipmappedArray_t', 'hMipmappedArray')] + case HIP_API_ID_hipMipmappedArrayDestroy: + break; +// hipMipmappedArrayGetLevel[('hipArray_t*', 'pLevelArray'), ('hipMipmappedArray_t', 'hMipMappedArray'), ('unsigned int', 'level')] + case HIP_API_ID_hipMipmappedArrayGetLevel: + if (data->args.hipMipmappedArrayGetLevel.pLevelArray) data->args.hipMipmappedArrayGetLevel.pLevelArray__val = *(data->args.hipMipmappedArrayGetLevel.pLevelArray); + break; +// hipModuleGetFunction[('hipFunction_t*', 'function'), ('hipModule_t', 'module'), ('const char*', 'kname')] + case HIP_API_ID_hipModuleGetFunction: + if (data->args.hipModuleGetFunction.function) data->args.hipModuleGetFunction.function__val = *(data->args.hipModuleGetFunction.function); + if (data->args.hipModuleGetFunction.kname) data->args.hipModuleGetFunction.kname__val = *(data->args.hipModuleGetFunction.kname); + break; +// hipModuleGetGlobal[('hipDeviceptr_t*', 'dptr'), ('size_t*', 'bytes'), ('hipModule_t', 'hmod'), ('const char*', 'name')] + case HIP_API_ID_hipModuleGetGlobal: + if (data->args.hipModuleGetGlobal.dptr) data->args.hipModuleGetGlobal.dptr__val = *(data->args.hipModuleGetGlobal.dptr); + if (data->args.hipModuleGetGlobal.bytes) data->args.hipModuleGetGlobal.bytes__val = *(data->args.hipModuleGetGlobal.bytes); + if (data->args.hipModuleGetGlobal.name) data->args.hipModuleGetGlobal.name__val = *(data->args.hipModuleGetGlobal.name); + break; +// hipModuleGetTexRef[('textureReference**', 'texRef'), ('hipModule_t', 'hmod'), ('const char*', 'name')] + case HIP_API_ID_hipModuleGetTexRef: + if (data->args.hipModuleGetTexRef.texRef) data->args.hipModuleGetTexRef.texRef__val = *(data->args.hipModuleGetTexRef.texRef); + if (data->args.hipModuleGetTexRef.name) data->args.hipModuleGetTexRef.name__val = *(data->args.hipModuleGetTexRef.name); + break; +// hipModuleLaunchCooperativeKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams')] + case HIP_API_ID_hipModuleLaunchCooperativeKernel: + if (data->args.hipModuleLaunchCooperativeKernel.kernelParams) data->args.hipModuleLaunchCooperativeKernel.kernelParams__val = *(data->args.hipModuleLaunchCooperativeKernel.kernelParams); + break; +// hipModuleLaunchCooperativeKernelMultiDevice[('hipFunctionLaunchParams*', 'launchParamsList'), ('unsigned int', 'numDevices'), ('unsigned int', 'flags')] + case HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice: + if (data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList) data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList__val = *(data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList); + break; +// hipModuleLaunchKernel[('hipFunction_t', 'f'), ('unsigned int', 'gridDimX'), ('unsigned int', 'gridDimY'), ('unsigned int', 'gridDimZ'), ('unsigned int', 'blockDimX'), ('unsigned int', 'blockDimY'), ('unsigned int', 'blockDimZ'), ('unsigned int', 'sharedMemBytes'), ('hipStream_t', 'stream'), ('void**', 'kernelParams'), ('void**', 'extra')] + case HIP_API_ID_hipModuleLaunchKernel: + if (data->args.hipModuleLaunchKernel.kernelParams) data->args.hipModuleLaunchKernel.kernelParams__val = *(data->args.hipModuleLaunchKernel.kernelParams); + if (data->args.hipModuleLaunchKernel.extra) data->args.hipModuleLaunchKernel.extra__val = *(data->args.hipModuleLaunchKernel.extra); + break; +// hipModuleLoad[('hipModule_t*', 'module'), ('const char*', 'fname')] + case HIP_API_ID_hipModuleLoad: + if (data->args.hipModuleLoad.module) data->args.hipModuleLoad.module__val = *(data->args.hipModuleLoad.module); + if (data->args.hipModuleLoad.fname) data->args.hipModuleLoad.fname__val = *(data->args.hipModuleLoad.fname); + break; +// hipModuleLoadData[('hipModule_t*', 'module'), ('const void*', 'image')] + case HIP_API_ID_hipModuleLoadData: + if (data->args.hipModuleLoadData.module) data->args.hipModuleLoadData.module__val = *(data->args.hipModuleLoadData.module); + break; +// hipModuleLoadDataEx[('hipModule_t*', 'module'), ('const void*', 'image'), ('unsigned int', 'numOptions'), ('hipJitOption*', 'options'), ('void**', 'optionsValues')] + case HIP_API_ID_hipModuleLoadDataEx: + if (data->args.hipModuleLoadDataEx.module) data->args.hipModuleLoadDataEx.module__val = *(data->args.hipModuleLoadDataEx.module); + if (data->args.hipModuleLoadDataEx.options) data->args.hipModuleLoadDataEx.options__val = *(data->args.hipModuleLoadDataEx.options); + if (data->args.hipModuleLoadDataEx.optionsValues) data->args.hipModuleLoadDataEx.optionsValues__val = *(data->args.hipModuleLoadDataEx.optionsValues); + break; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk')] + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks); + break; +// hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('hipFunction_t', 'f'), ('int', 'blockSize'), ('size_t', 'dynSharedMemPerBlk'), ('unsigned int', 'flags')] + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks); + break; +// hipModuleOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize); + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize); + break; +// hipModuleOccupancyMaxPotentialBlockSizeWithFlags[('int*', 'gridSize'), ('int*', 'blockSize'), ('hipFunction_t', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit'), ('unsigned int', 'flags')] + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize); + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize) data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val = *(data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize); + break; +// hipModuleUnload[('hipModule_t', 'module')] + case HIP_API_ID_hipModuleUnload: + break; +// hipOccupancyMaxActiveBlocksPerMultiprocessor[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize')] + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks); + break; +// hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags[('int*', 'numBlocks'), ('const void*', 'f'), ('int', 'blockSize'), ('size_t', 'dynamicSMemSize'), ('unsigned int', 'flags')] + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks) data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val = *(data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks); + break; +// hipOccupancyMaxPotentialBlockSize[('int*', 'gridSize'), ('int*', 'blockSize'), ('const void*', 'f'), ('size_t', 'dynSharedMemPerBlk'), ('int', 'blockSizeLimit')] + case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: + if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize) data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.gridSize); + if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize) data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val = *(data->args.hipOccupancyMaxPotentialBlockSize.blockSize); + break; +// hipPeekAtLastError[] + case HIP_API_ID_hipPeekAtLastError: + break; +// hipPointerGetAttribute[('void*', 'data'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] + case HIP_API_ID_hipPointerGetAttribute: + break; +// hipPointerGetAttributes[('hipPointerAttribute_t*', 'attributes'), ('const void*', 'ptr')] + case HIP_API_ID_hipPointerGetAttributes: + if (data->args.hipPointerGetAttributes.attributes) data->args.hipPointerGetAttributes.attributes__val = *(data->args.hipPointerGetAttributes.attributes); + break; +// hipPointerSetAttribute[('const void*', 'value'), ('hipPointer_attribute', 'attribute'), ('hipDeviceptr_t', 'ptr')] + case HIP_API_ID_hipPointerSetAttribute: + break; +// hipProfilerStart[] + case HIP_API_ID_hipProfilerStart: + break; +// hipProfilerStop[] + case HIP_API_ID_hipProfilerStop: + break; +// hipRuntimeGetVersion[('int*', 'runtimeVersion')] + case HIP_API_ID_hipRuntimeGetVersion: + if (data->args.hipRuntimeGetVersion.runtimeVersion) data->args.hipRuntimeGetVersion.runtimeVersion__val = *(data->args.hipRuntimeGetVersion.runtimeVersion); + break; +// hipSetDevice[('int', 'deviceId')] + case HIP_API_ID_hipSetDevice: + break; +// hipSetDeviceFlags[('unsigned int', 'flags')] + case HIP_API_ID_hipSetDeviceFlags: + break; +// hipSetupArgument[('const void*', 'arg'), ('size_t', 'size'), ('size_t', 'offset')] + case HIP_API_ID_hipSetupArgument: + break; +// hipSignalExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreSignalParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipSignalExternalSemaphoresAsync: + if (data->args.hipSignalExternalSemaphoresAsync.extSemArray) data->args.hipSignalExternalSemaphoresAsync.extSemArray__val = *(data->args.hipSignalExternalSemaphoresAsync.extSemArray); + if (data->args.hipSignalExternalSemaphoresAsync.paramsArray) data->args.hipSignalExternalSemaphoresAsync.paramsArray__val = *(data->args.hipSignalExternalSemaphoresAsync.paramsArray); + break; +// hipStreamAddCallback[('hipStream_t', 'stream'), ('hipStreamCallback_t', 'callback'), ('void*', 'userData'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamAddCallback: + break; +// hipStreamAttachMemAsync[('hipStream_t', 'stream'), ('void*', 'dev_ptr'), ('size_t', 'length'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamAttachMemAsync: + break; +// hipStreamBeginCapture[('hipStream_t', 'stream'), ('hipStreamCaptureMode', 'mode')] + case HIP_API_ID_hipStreamBeginCapture: + break; +// hipStreamCreate[('hipStream_t*', 'stream')] + case HIP_API_ID_hipStreamCreate: + if (data->args.hipStreamCreate.stream) data->args.hipStreamCreate.stream__val = *(data->args.hipStreamCreate.stream); + break; +// hipStreamCreateWithFlags[('hipStream_t*', 'stream'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamCreateWithFlags: + if (data->args.hipStreamCreateWithFlags.stream) data->args.hipStreamCreateWithFlags.stream__val = *(data->args.hipStreamCreateWithFlags.stream); + break; +// hipStreamCreateWithPriority[('hipStream_t*', 'stream'), ('unsigned int', 'flags'), ('int', 'priority')] + case HIP_API_ID_hipStreamCreateWithPriority: + if (data->args.hipStreamCreateWithPriority.stream) data->args.hipStreamCreateWithPriority.stream__val = *(data->args.hipStreamCreateWithPriority.stream); + break; +// hipStreamDestroy[('hipStream_t', 'stream')] + case HIP_API_ID_hipStreamDestroy: + break; +// hipStreamEndCapture[('hipStream_t', 'stream'), ('hipGraph_t*', 'pGraph')] + case HIP_API_ID_hipStreamEndCapture: + if (data->args.hipStreamEndCapture.pGraph) data->args.hipStreamEndCapture.pGraph__val = *(data->args.hipStreamEndCapture.pGraph); + break; +// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')] + case HIP_API_ID_hipStreamGetCaptureInfo: + if (data->args.hipStreamGetCaptureInfo.pCaptureStatus) data->args.hipStreamGetCaptureInfo.pCaptureStatus__val = *(data->args.hipStreamGetCaptureInfo.pCaptureStatus); + if (data->args.hipStreamGetCaptureInfo.pId) data->args.hipStreamGetCaptureInfo.pId__val = *(data->args.hipStreamGetCaptureInfo.pId); + break; +// hipStreamGetCaptureInfo_v2[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'captureStatus_out'), ('unsigned long long*', 'id_out'), ('hipGraph_t*', 'graph_out'), ('const hipGraphNode_t**', 'dependencies_out'), ('size_t*', 'numDependencies_out')] + case HIP_API_ID_hipStreamGetCaptureInfo_v2: + if (data->args.hipStreamGetCaptureInfo_v2.captureStatus_out) data->args.hipStreamGetCaptureInfo_v2.captureStatus_out__val = *(data->args.hipStreamGetCaptureInfo_v2.captureStatus_out); + if (data->args.hipStreamGetCaptureInfo_v2.id_out) data->args.hipStreamGetCaptureInfo_v2.id_out__val = *(data->args.hipStreamGetCaptureInfo_v2.id_out); + if (data->args.hipStreamGetCaptureInfo_v2.graph_out) data->args.hipStreamGetCaptureInfo_v2.graph_out__val = *(data->args.hipStreamGetCaptureInfo_v2.graph_out); + if (data->args.hipStreamGetCaptureInfo_v2.dependencies_out) data->args.hipStreamGetCaptureInfo_v2.dependencies_out__val = *(data->args.hipStreamGetCaptureInfo_v2.dependencies_out); + if (data->args.hipStreamGetCaptureInfo_v2.numDependencies_out) data->args.hipStreamGetCaptureInfo_v2.numDependencies_out__val = *(data->args.hipStreamGetCaptureInfo_v2.numDependencies_out); + break; +// hipStreamGetDevice[('hipStream_t', 'stream'), ('hipDevice_t*', 'device')] + case HIP_API_ID_hipStreamGetDevice: + if (data->args.hipStreamGetDevice.device) data->args.hipStreamGetDevice.device__val = *(data->args.hipStreamGetDevice.device); + break; +// hipStreamGetFlags[('hipStream_t', 'stream'), ('unsigned int*', 'flags')] + case HIP_API_ID_hipStreamGetFlags: + if (data->args.hipStreamGetFlags.flags) data->args.hipStreamGetFlags.flags__val = *(data->args.hipStreamGetFlags.flags); + break; +// hipStreamGetPriority[('hipStream_t', 'stream'), ('int*', 'priority')] + case HIP_API_ID_hipStreamGetPriority: + if (data->args.hipStreamGetPriority.priority) data->args.hipStreamGetPriority.priority__val = *(data->args.hipStreamGetPriority.priority); + break; +// hipStreamIsCapturing[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus')] + case HIP_API_ID_hipStreamIsCapturing: + if (data->args.hipStreamIsCapturing.pCaptureStatus) data->args.hipStreamIsCapturing.pCaptureStatus__val = *(data->args.hipStreamIsCapturing.pCaptureStatus); + break; +// hipStreamQuery[('hipStream_t', 'stream')] + case HIP_API_ID_hipStreamQuery: + break; +// hipStreamSynchronize[('hipStream_t', 'stream')] + case HIP_API_ID_hipStreamSynchronize: + break; +// hipStreamUpdateCaptureDependencies[('hipStream_t', 'stream'), ('hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamUpdateCaptureDependencies: + if (data->args.hipStreamUpdateCaptureDependencies.dependencies) data->args.hipStreamUpdateCaptureDependencies.dependencies__val = *(data->args.hipStreamUpdateCaptureDependencies.dependencies); + break; +// hipStreamWaitEvent[('hipStream_t', 'stream'), ('hipEvent_t', 'event'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamWaitEvent: + break; +// hipStreamWaitValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags'), ('unsigned int', 'mask')] + case HIP_API_ID_hipStreamWaitValue32: + break; +// hipStreamWaitValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags'), ('uint64_t', 'mask')] + case HIP_API_ID_hipStreamWaitValue64: + break; +// hipStreamWriteValue32[('hipStream_t', 'stream'), ('void*', 'ptr'), ('unsigned int', 'value'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamWriteValue32: + break; +// hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')] + case HIP_API_ID_hipStreamWriteValue64: + break; +// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetAddress: + if (data->args.hipTexRefGetAddress.dev_ptr) data->args.hipTexRefGetAddress.dev_ptr__val = *(data->args.hipTexRefGetAddress.dev_ptr); + if (data->args.hipTexRefGetAddress.texRef) data->args.hipTexRefGetAddress.texRef__val = *(data->args.hipTexRefGetAddress.texRef); + break; +// hipTexRefGetFlags[('unsigned int*', 'pFlags'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetFlags: + if (data->args.hipTexRefGetFlags.pFlags) data->args.hipTexRefGetFlags.pFlags__val = *(data->args.hipTexRefGetFlags.pFlags); + if (data->args.hipTexRefGetFlags.texRef) data->args.hipTexRefGetFlags.texRef__val = *(data->args.hipTexRefGetFlags.texRef); + break; +// hipTexRefGetFormat[('hipArray_Format*', 'pFormat'), ('int*', 'pNumChannels'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetFormat: + if (data->args.hipTexRefGetFormat.pFormat) data->args.hipTexRefGetFormat.pFormat__val = *(data->args.hipTexRefGetFormat.pFormat); + if (data->args.hipTexRefGetFormat.pNumChannels) data->args.hipTexRefGetFormat.pNumChannels__val = *(data->args.hipTexRefGetFormat.pNumChannels); + if (data->args.hipTexRefGetFormat.texRef) data->args.hipTexRefGetFormat.texRef__val = *(data->args.hipTexRefGetFormat.texRef); + break; +// hipTexRefGetMaxAnisotropy[('int*', 'pmaxAnsio'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMaxAnisotropy: + if (data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio) data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio__val = *(data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio); + if (data->args.hipTexRefGetMaxAnisotropy.texRef) data->args.hipTexRefGetMaxAnisotropy.texRef__val = *(data->args.hipTexRefGetMaxAnisotropy.texRef); + break; +// hipTexRefGetMipMappedArray[('hipMipmappedArray_t*', 'pArray'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMipMappedArray: + if (data->args.hipTexRefGetMipMappedArray.pArray) data->args.hipTexRefGetMipMappedArray.pArray__val = *(data->args.hipTexRefGetMipMappedArray.pArray); + if (data->args.hipTexRefGetMipMappedArray.texRef) data->args.hipTexRefGetMipMappedArray.texRef__val = *(data->args.hipTexRefGetMipMappedArray.texRef); + break; +// hipTexRefGetMipmapLevelBias[('float*', 'pbias'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMipmapLevelBias: + if (data->args.hipTexRefGetMipmapLevelBias.pbias) data->args.hipTexRefGetMipmapLevelBias.pbias__val = *(data->args.hipTexRefGetMipmapLevelBias.pbias); + if (data->args.hipTexRefGetMipmapLevelBias.texRef) data->args.hipTexRefGetMipmapLevelBias.texRef__val = *(data->args.hipTexRefGetMipmapLevelBias.texRef); + break; +// hipTexRefGetMipmapLevelClamp[('float*', 'pminMipmapLevelClamp'), ('float*', 'pmaxMipmapLevelClamp'), ('const textureReference*', 'texRef')] + case HIP_API_ID_hipTexRefGetMipmapLevelClamp: + if (data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp) data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp__val = *(data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp); + if (data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp) data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp__val = *(data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp); + if (data->args.hipTexRefGetMipmapLevelClamp.texRef) data->args.hipTexRefGetMipmapLevelClamp.texRef__val = *(data->args.hipTexRefGetMipmapLevelClamp.texRef); + break; +// hipTexRefSetAddress[('size_t*', 'ByteOffset'), ('textureReference*', 'texRef'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'bytes')] + case HIP_API_ID_hipTexRefSetAddress: + if (data->args.hipTexRefSetAddress.ByteOffset) data->args.hipTexRefSetAddress.ByteOffset__val = *(data->args.hipTexRefSetAddress.ByteOffset); + if (data->args.hipTexRefSetAddress.texRef) data->args.hipTexRefSetAddress.texRef__val = *(data->args.hipTexRefSetAddress.texRef); + break; +// hipTexRefSetAddress2D[('textureReference*', 'texRef'), ('const HIP_ARRAY_DESCRIPTOR*', 'desc'), ('hipDeviceptr_t', 'dptr'), ('size_t', 'Pitch')] + case HIP_API_ID_hipTexRefSetAddress2D: + if (data->args.hipTexRefSetAddress2D.texRef) data->args.hipTexRefSetAddress2D.texRef__val = *(data->args.hipTexRefSetAddress2D.texRef); + if (data->args.hipTexRefSetAddress2D.desc) data->args.hipTexRefSetAddress2D.desc__val = *(data->args.hipTexRefSetAddress2D.desc); + break; +// hipTexRefSetArray[('textureReference*', 'tex'), ('hipArray_const_t', 'array'), ('unsigned int', 'flags')] + case HIP_API_ID_hipTexRefSetArray: + if (data->args.hipTexRefSetArray.tex) data->args.hipTexRefSetArray.tex__val = *(data->args.hipTexRefSetArray.tex); + break; +// hipTexRefSetBorderColor[('textureReference*', 'texRef'), ('float*', 'pBorderColor')] + case HIP_API_ID_hipTexRefSetBorderColor: + if (data->args.hipTexRefSetBorderColor.texRef) data->args.hipTexRefSetBorderColor.texRef__val = *(data->args.hipTexRefSetBorderColor.texRef); + if (data->args.hipTexRefSetBorderColor.pBorderColor) data->args.hipTexRefSetBorderColor.pBorderColor__val = *(data->args.hipTexRefSetBorderColor.pBorderColor); + break; +// hipTexRefSetFlags[('textureReference*', 'texRef'), ('unsigned int', 'Flags')] + case HIP_API_ID_hipTexRefSetFlags: + if (data->args.hipTexRefSetFlags.texRef) data->args.hipTexRefSetFlags.texRef__val = *(data->args.hipTexRefSetFlags.texRef); + break; +// hipTexRefSetFormat[('textureReference*', 'texRef'), ('hipArray_Format', 'fmt'), ('int', 'NumPackedComponents')] + case HIP_API_ID_hipTexRefSetFormat: + if (data->args.hipTexRefSetFormat.texRef) data->args.hipTexRefSetFormat.texRef__val = *(data->args.hipTexRefSetFormat.texRef); + break; +// hipTexRefSetMaxAnisotropy[('textureReference*', 'texRef'), ('unsigned int', 'maxAniso')] + case HIP_API_ID_hipTexRefSetMaxAnisotropy: + if (data->args.hipTexRefSetMaxAnisotropy.texRef) data->args.hipTexRefSetMaxAnisotropy.texRef__val = *(data->args.hipTexRefSetMaxAnisotropy.texRef); + break; +// hipTexRefSetMipmapLevelBias[('textureReference*', 'texRef'), ('float', 'bias')] + case HIP_API_ID_hipTexRefSetMipmapLevelBias: + if (data->args.hipTexRefSetMipmapLevelBias.texRef) data->args.hipTexRefSetMipmapLevelBias.texRef__val = *(data->args.hipTexRefSetMipmapLevelBias.texRef); + break; +// hipTexRefSetMipmapLevelClamp[('textureReference*', 'texRef'), ('float', 'minMipMapLevelClamp'), ('float', 'maxMipMapLevelClamp')] + case HIP_API_ID_hipTexRefSetMipmapLevelClamp: + if (data->args.hipTexRefSetMipmapLevelClamp.texRef) data->args.hipTexRefSetMipmapLevelClamp.texRef__val = *(data->args.hipTexRefSetMipmapLevelClamp.texRef); + break; +// hipTexRefSetMipmappedArray[('textureReference*', 'texRef'), ('hipMipmappedArray*', 'mipmappedArray'), ('unsigned int', 'Flags')] + case HIP_API_ID_hipTexRefSetMipmappedArray: + if (data->args.hipTexRefSetMipmappedArray.texRef) data->args.hipTexRefSetMipmappedArray.texRef__val = *(data->args.hipTexRefSetMipmappedArray.texRef); + if (data->args.hipTexRefSetMipmappedArray.mipmappedArray) data->args.hipTexRefSetMipmappedArray.mipmappedArray__val = *(data->args.hipTexRefSetMipmappedArray.mipmappedArray); + break; +// hipThreadExchangeStreamCaptureMode[('hipStreamCaptureMode*', 'mode')] + case HIP_API_ID_hipThreadExchangeStreamCaptureMode: + if (data->args.hipThreadExchangeStreamCaptureMode.mode) data->args.hipThreadExchangeStreamCaptureMode.mode__val = *(data->args.hipThreadExchangeStreamCaptureMode.mode); + break; +// hipUserObjectCreate[('hipUserObject_t*', 'object_out'), ('void*', 'ptr'), ('hipHostFn_t', 'destroy'), ('unsigned int', 'initialRefcount'), ('unsigned int', 'flags')] + case HIP_API_ID_hipUserObjectCreate: + if (data->args.hipUserObjectCreate.object_out) data->args.hipUserObjectCreate.object_out__val = *(data->args.hipUserObjectCreate.object_out); + break; +// hipUserObjectRelease[('hipUserObject_t', 'object'), ('unsigned int', 'count')] + case HIP_API_ID_hipUserObjectRelease: + break; +// hipUserObjectRetain[('hipUserObject_t', 'object'), ('unsigned int', 'count')] + case HIP_API_ID_hipUserObjectRetain: + break; +// hipWaitExternalSemaphoresAsync[('const hipExternalSemaphore_t*', 'extSemArray'), ('const hipExternalSemaphoreWaitParams*', 'paramsArray'), ('unsigned int', 'numExtSems'), ('hipStream_t', 'stream')] + case HIP_API_ID_hipWaitExternalSemaphoresAsync: + if (data->args.hipWaitExternalSemaphoresAsync.extSemArray) data->args.hipWaitExternalSemaphoresAsync.extSemArray__val = *(data->args.hipWaitExternalSemaphoresAsync.extSemArray); + if (data->args.hipWaitExternalSemaphoresAsync.paramsArray) data->args.hipWaitExternalSemaphoresAsync.paramsArray__val = *(data->args.hipWaitExternalSemaphoresAsync.paramsArray); + break; + default: break; + }; +} + +#include +#include +// HIP API string method, method name and parameters +static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) { + std::ostringstream oss; + switch (id) { + case HIP_API_ID___hipPopCallConfiguration: + oss << "__hipPopCallConfiguration("; + if (data->args.__hipPopCallConfiguration.gridDim == NULL) oss << "gridDim=NULL"; + else { oss << "gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.gridDim__val); } + if (data->args.__hipPopCallConfiguration.blockDim == NULL) oss << ", blockDim=NULL"; + else { oss << ", blockDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.blockDim__val); } + if (data->args.__hipPopCallConfiguration.sharedMem == NULL) oss << ", sharedMem=NULL"; + else { oss << ", sharedMem="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.sharedMem__val); } + if (data->args.__hipPopCallConfiguration.stream == NULL) oss << ", stream=NULL"; + else { oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPopCallConfiguration.stream__val); } + oss << ")"; + break; + case HIP_API_ID___hipPushCallConfiguration: + oss << "__hipPushCallConfiguration("; + oss << "gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.gridDim); + oss << ", blockDim="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.blockDim); + oss << ", sharedMem="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.sharedMem); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.__hipPushCallConfiguration.stream); + oss << ")"; + break; + case HIP_API_ID_hipArray3DCreate: + oss << "hipArray3DCreate("; + if (data->args.hipArray3DCreate.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipArray3DCreate.array__val); } + if (data->args.hipArray3DCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL"; + else { oss << ", pAllocateArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DCreate.pAllocateArray__val); } + oss << ")"; + break; + case HIP_API_ID_hipArray3DGetDescriptor: + oss << "hipArray3DGetDescriptor("; + if (data->args.hipArray3DGetDescriptor.pArrayDescriptor == NULL) oss << "pArrayDescriptor=NULL"; + else { oss << "pArrayDescriptor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DGetDescriptor.pArrayDescriptor__val); } + if (data->args.hipArray3DGetDescriptor.array == NULL) oss << ", array=NULL"; + else { oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DGetDescriptor.array__val); } + oss << ")"; + break; + case HIP_API_ID_hipArrayCreate: + oss << "hipArrayCreate("; + if (data->args.hipArrayCreate.pHandle == NULL) oss << "pHandle=NULL"; + else { oss << "pHandle="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipArrayCreate.pHandle__val); } + if (data->args.hipArrayCreate.pAllocateArray == NULL) oss << ", pAllocateArray=NULL"; + else { oss << ", pAllocateArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayCreate.pAllocateArray__val); } + oss << ")"; + break; + case HIP_API_ID_hipArrayDestroy: + oss << "hipArrayDestroy("; + if (data->args.hipArrayDestroy.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayDestroy.array__val); } + oss << ")"; + break; + case HIP_API_ID_hipArrayGetDescriptor: + oss << "hipArrayGetDescriptor("; + if (data->args.hipArrayGetDescriptor.pArrayDescriptor == NULL) oss << "pArrayDescriptor=NULL"; + else { oss << "pArrayDescriptor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetDescriptor.pArrayDescriptor__val); } + if (data->args.hipArrayGetDescriptor.array == NULL) oss << ", array=NULL"; + else { oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetDescriptor.array__val); } + oss << ")"; + break; + case HIP_API_ID_hipArrayGetInfo: + oss << "hipArrayGetInfo("; + if (data->args.hipArrayGetInfo.desc == NULL) oss << "desc=NULL"; + else { oss << "desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.desc__val); } + if (data->args.hipArrayGetInfo.extent == NULL) oss << ", extent=NULL"; + else { oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.extent__val); } + if (data->args.hipArrayGetInfo.flags == NULL) oss << ", flags=NULL"; + else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.flags__val); } + if (data->args.hipArrayGetInfo.array == NULL) oss << ", array=NULL"; + else { oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.array__val); } + oss << ")"; + break; + case HIP_API_ID_hipChooseDevice: + oss << "hipChooseDevice("; + if (data->args.hipChooseDevice.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDevice.device__val); } + if (data->args.hipChooseDevice.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDevice.prop__val); } + oss << ")"; + break; + case HIP_API_ID_hipConfigureCall: + oss << "hipConfigureCall("; + oss << "gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.gridDim); + oss << ", blockDim="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.blockDim); + oss << ", sharedMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.sharedMem); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipConfigureCall.stream); + oss << ")"; + break; + case HIP_API_ID_hipCreateSurfaceObject: + oss << "hipCreateSurfaceObject("; + if (data->args.hipCreateSurfaceObject.pSurfObject == NULL) oss << "pSurfObject=NULL"; + else { oss << "pSurfObject="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCreateSurfaceObject.pSurfObject__val); } + if (data->args.hipCreateSurfaceObject.pResDesc == NULL) oss << ", pResDesc=NULL"; + else { oss << ", pResDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCreateSurfaceObject.pResDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxCreate: + oss << "hipCtxCreate("; + if (data->args.hipCtxCreate.ctx == NULL) oss << "ctx=NULL"; + else { oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxCreate.ctx__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxCreate.flags); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxCreate.device); + oss << ")"; + break; + case HIP_API_ID_hipCtxDestroy: + oss << "hipCtxDestroy("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxDestroy.ctx); + oss << ")"; + break; + case HIP_API_ID_hipCtxDisablePeerAccess: + oss << "hipCtxDisablePeerAccess("; + oss << "peerCtx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxDisablePeerAccess.peerCtx); + oss << ")"; + break; + case HIP_API_ID_hipCtxEnablePeerAccess: + oss << "hipCtxEnablePeerAccess("; + oss << "peerCtx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxEnablePeerAccess.peerCtx); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxEnablePeerAccess.flags); + oss << ")"; + break; + case HIP_API_ID_hipCtxGetApiVersion: + oss << "hipCtxGetApiVersion("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetApiVersion.ctx); + if (data->args.hipCtxGetApiVersion.apiVersion == NULL) oss << ", apiVersion=NULL"; + else { oss << ", apiVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetApiVersion.apiVersion__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetCacheConfig: + oss << "hipCtxGetCacheConfig("; + if (data->args.hipCtxGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL"; + else { oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetCacheConfig.cacheConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetCurrent: + oss << "hipCtxGetCurrent("; + if (data->args.hipCtxGetCurrent.ctx == NULL) oss << "ctx=NULL"; + else { oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetCurrent.ctx__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetDevice: + oss << "hipCtxGetDevice("; + if (data->args.hipCtxGetDevice.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetDevice.device__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetFlags: + oss << "hipCtxGetFlags("; + if (data->args.hipCtxGetFlags.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetFlags.flags__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxGetSharedMemConfig: + oss << "hipCtxGetSharedMemConfig("; + if (data->args.hipCtxGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL"; + else { oss << "pConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxGetSharedMemConfig.pConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxPopCurrent: + oss << "hipCtxPopCurrent("; + if (data->args.hipCtxPopCurrent.ctx == NULL) oss << "ctx=NULL"; + else { oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxPopCurrent.ctx__val); } + oss << ")"; + break; + case HIP_API_ID_hipCtxPushCurrent: + oss << "hipCtxPushCurrent("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxPushCurrent.ctx); + oss << ")"; + break; + case HIP_API_ID_hipCtxSetCacheConfig: + oss << "hipCtxSetCacheConfig("; + oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxSetCacheConfig.cacheConfig); + oss << ")"; + break; + case HIP_API_ID_hipCtxSetCurrent: + oss << "hipCtxSetCurrent("; + oss << "ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxSetCurrent.ctx); + oss << ")"; + break; + case HIP_API_ID_hipCtxSetSharedMemConfig: + oss << "hipCtxSetSharedMemConfig("; + oss << "config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipCtxSetSharedMemConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipCtxSynchronize: + oss << "hipCtxSynchronize("; + oss << ")"; + break; + case HIP_API_ID_hipDestroyExternalMemory: + oss << "hipDestroyExternalMemory("; + oss << "extMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDestroyExternalMemory.extMem); + oss << ")"; + break; + case HIP_API_ID_hipDestroyExternalSemaphore: + oss << "hipDestroyExternalSemaphore("; + oss << "extSem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDestroyExternalSemaphore.extSem); + oss << ")"; + break; + case HIP_API_ID_hipDestroySurfaceObject: + oss << "hipDestroySurfaceObject("; + oss << "surfaceObject="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDestroySurfaceObject.surfaceObject); + oss << ")"; + break; + case HIP_API_ID_hipDeviceCanAccessPeer: + oss << "hipDeviceCanAccessPeer("; + if (data->args.hipDeviceCanAccessPeer.canAccessPeer == NULL) oss << "canAccessPeer=NULL"; + else { oss << "canAccessPeer="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceCanAccessPeer.canAccessPeer__val); } + oss << ", deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceCanAccessPeer.deviceId); + oss << ", peerDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceCanAccessPeer.peerDeviceId); + oss << ")"; + break; + case HIP_API_ID_hipDeviceComputeCapability: + oss << "hipDeviceComputeCapability("; + if (data->args.hipDeviceComputeCapability.major == NULL) oss << "major=NULL"; + else { oss << "major="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceComputeCapability.major__val); } + if (data->args.hipDeviceComputeCapability.minor == NULL) oss << ", minor=NULL"; + else { oss << ", minor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceComputeCapability.minor__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceComputeCapability.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceDisablePeerAccess: + oss << "hipDeviceDisablePeerAccess("; + oss << "peerDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceDisablePeerAccess.peerDeviceId); + oss << ")"; + break; + case HIP_API_ID_hipDeviceEnablePeerAccess: + oss << "hipDeviceEnablePeerAccess("; + oss << "peerDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceEnablePeerAccess.peerDeviceId); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceEnablePeerAccess.flags); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGet: + oss << "hipDeviceGet("; + if (data->args.hipDeviceGet.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGet.device__val); } + oss << ", ordinal="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGet.ordinal); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetAttribute: + oss << "hipDeviceGetAttribute("; + if (data->args.hipDeviceGetAttribute.pi == NULL) oss << "pi=NULL"; + else { oss << "pi="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetAttribute.pi__val); } + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetAttribute.attr); + oss << ", deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetAttribute.deviceId); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetByPCIBusId: + oss << "hipDeviceGetByPCIBusId("; + if (data->args.hipDeviceGetByPCIBusId.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetByPCIBusId.device__val); } + if (data->args.hipDeviceGetByPCIBusId.pciBusId == NULL) oss << ", pciBusId=NULL"; + else { oss << ", pciBusId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetByPCIBusId.pciBusId__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetCacheConfig: + oss << "hipDeviceGetCacheConfig("; + if (data->args.hipDeviceGetCacheConfig.cacheConfig == NULL) oss << "cacheConfig=NULL"; + else { oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetCacheConfig.cacheConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetDefaultMemPool: + oss << "hipDeviceGetDefaultMemPool("; + if (data->args.hipDeviceGetDefaultMemPool.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetDefaultMemPool.mem_pool__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetDefaultMemPool.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetGraphMemAttribute: + oss << "hipDeviceGetGraphMemAttribute("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetGraphMemAttribute.device); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetGraphMemAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetGraphMemAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetLimit: + oss << "hipDeviceGetLimit("; + if (data->args.hipDeviceGetLimit.pValue == NULL) oss << "pValue=NULL"; + else { oss << "pValue="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetLimit.pValue__val); } + oss << ", limit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetLimit.limit); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetMemPool: + oss << "hipDeviceGetMemPool("; + if (data->args.hipDeviceGetMemPool.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetMemPool.mem_pool__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetMemPool.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetName: + oss << "hipDeviceGetName("; + if (data->args.hipDeviceGetName.name == NULL) oss << "name=NULL"; + else { oss << "name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetName.name__val); } + oss << ", len="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetName.len); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetName.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetP2PAttribute: + oss << "hipDeviceGetP2PAttribute("; + if (data->args.hipDeviceGetP2PAttribute.value == NULL) oss << "value=NULL"; + else { oss << "value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.value__val); } + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.attr); + oss << ", srcDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.srcDevice); + oss << ", dstDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetP2PAttribute.dstDevice); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetPCIBusId: + oss << "hipDeviceGetPCIBusId("; + if (data->args.hipDeviceGetPCIBusId.pciBusId == NULL) oss << "pciBusId=NULL"; + else { oss << "pciBusId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetPCIBusId.pciBusId__val); } + oss << ", len="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetPCIBusId.len); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetPCIBusId.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetSharedMemConfig: + oss << "hipDeviceGetSharedMemConfig("; + if (data->args.hipDeviceGetSharedMemConfig.pConfig == NULL) oss << "pConfig=NULL"; + else { oss << "pConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetSharedMemConfig.pConfig__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetStreamPriorityRange: + oss << "hipDeviceGetStreamPriorityRange("; + if (data->args.hipDeviceGetStreamPriorityRange.leastPriority == NULL) oss << "leastPriority=NULL"; + else { oss << "leastPriority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetStreamPriorityRange.leastPriority__val); } + if (data->args.hipDeviceGetStreamPriorityRange.greatestPriority == NULL) oss << ", greatestPriority=NULL"; + else { oss << ", greatestPriority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetStreamPriorityRange.greatestPriority__val); } + oss << ")"; + break; + case HIP_API_ID_hipDeviceGetUuid: + oss << "hipDeviceGetUuid("; + if (data->args.hipDeviceGetUuid.uuid == NULL) oss << "uuid=NULL"; + else { oss << "uuid="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetUuid.uuid__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGetUuid.device); + oss << ")"; + break; + case HIP_API_ID_hipDeviceGraphMemTrim: + oss << "hipDeviceGraphMemTrim("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceGraphMemTrim.device); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxGetState: + oss << "hipDevicePrimaryCtxGetState("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxGetState.dev); + if (data->args.hipDevicePrimaryCtxGetState.flags == NULL) oss << ", flags=NULL"; + else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxGetState.flags__val); } + if (data->args.hipDevicePrimaryCtxGetState.active == NULL) oss << ", active=NULL"; + else { oss << ", active="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxGetState.active__val); } + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxRelease: + oss << "hipDevicePrimaryCtxRelease("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxRelease.dev); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxReset: + oss << "hipDevicePrimaryCtxReset("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxReset.dev); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxRetain: + oss << "hipDevicePrimaryCtxRetain("; + if (data->args.hipDevicePrimaryCtxRetain.pctx == NULL) oss << "pctx=NULL"; + else { oss << "pctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxRetain.pctx__val); } + oss << ", dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxRetain.dev); + oss << ")"; + break; + case HIP_API_ID_hipDevicePrimaryCtxSetFlags: + oss << "hipDevicePrimaryCtxSetFlags("; + oss << "dev="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxSetFlags.dev); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDevicePrimaryCtxSetFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipDeviceReset: + oss << "hipDeviceReset("; + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetCacheConfig: + oss << "hipDeviceSetCacheConfig("; + oss << "cacheConfig="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetCacheConfig.cacheConfig); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetGraphMemAttribute: + oss << "hipDeviceSetGraphMemAttribute("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetGraphMemAttribute.device); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetGraphMemAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetGraphMemAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetLimit: + oss << "hipDeviceSetLimit("; + oss << "limit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetLimit.limit); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetLimit.value); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetMemPool: + oss << "hipDeviceSetMemPool("; + oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetMemPool.device); + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetMemPool.mem_pool); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSetSharedMemConfig: + oss << "hipDeviceSetSharedMemConfig("; + oss << "config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceSetSharedMemConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipDeviceSynchronize: + oss << "hipDeviceSynchronize("; + oss << ")"; + break; + case HIP_API_ID_hipDeviceTotalMem: + oss << "hipDeviceTotalMem("; + if (data->args.hipDeviceTotalMem.bytes == NULL) oss << "bytes=NULL"; + else { oss << "bytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceTotalMem.bytes__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDeviceTotalMem.device); + oss << ")"; + break; + case HIP_API_ID_hipDriverGetVersion: + oss << "hipDriverGetVersion("; + if (data->args.hipDriverGetVersion.driverVersion == NULL) oss << "driverVersion=NULL"; + else { oss << "driverVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDriverGetVersion.driverVersion__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvMemcpy2DUnaligned: + oss << "hipDrvMemcpy2DUnaligned("; + if (data->args.hipDrvMemcpy2DUnaligned.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy2DUnaligned.pCopy__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvMemcpy3D: + oss << "hipDrvMemcpy3D("; + if (data->args.hipDrvMemcpy3D.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy3D.pCopy__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvMemcpy3DAsync: + oss << "hipDrvMemcpy3DAsync("; + if (data->args.hipDrvMemcpy3DAsync.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy3DAsync.pCopy__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvMemcpy3DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipDrvPointerGetAttributes: + oss << "hipDrvPointerGetAttributes("; + oss << "numAttributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.numAttributes); + if (data->args.hipDrvPointerGetAttributes.attributes == NULL) oss << ", attributes=NULL"; + else { oss << ", attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.attributes__val); } + if (data->args.hipDrvPointerGetAttributes.data == NULL) oss << ", data=NULL"; + else { oss << ", data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.data__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvPointerGetAttributes.ptr); + oss << ")"; + break; + case HIP_API_ID_hipEventCreate: + oss << "hipEventCreate("; + if (data->args.hipEventCreate.event == NULL) oss << "event=NULL"; + else { oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventCreate.event__val); } + oss << ")"; + break; + case HIP_API_ID_hipEventCreateWithFlags: + oss << "hipEventCreateWithFlags("; + if (data->args.hipEventCreateWithFlags.event == NULL) oss << "event=NULL"; + else { oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventCreateWithFlags.event__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventCreateWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipEventDestroy: + oss << "hipEventDestroy("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventDestroy.event); + oss << ")"; + break; + case HIP_API_ID_hipEventElapsedTime: + oss << "hipEventElapsedTime("; + if (data->args.hipEventElapsedTime.ms == NULL) oss << "ms=NULL"; + else { oss << "ms="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventElapsedTime.ms__val); } + oss << ", start="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventElapsedTime.start); + oss << ", stop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventElapsedTime.stop); + oss << ")"; + break; + case HIP_API_ID_hipEventQuery: + oss << "hipEventQuery("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventQuery.event); + oss << ")"; + break; + case HIP_API_ID_hipEventRecord: + oss << "hipEventRecord("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventRecord.event); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventRecord.stream); + oss << ")"; + break; + case HIP_API_ID_hipEventSynchronize: + oss << "hipEventSynchronize("; + oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventSynchronize.event); + oss << ")"; + break; + case HIP_API_ID_hipExtGetLinkTypeAndHopCount: + oss << "hipExtGetLinkTypeAndHopCount("; + oss << "device1="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.device1); + oss << ", device2="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.device2); + if (data->args.hipExtGetLinkTypeAndHopCount.linktype == NULL) oss << ", linktype=NULL"; + else { oss << ", linktype="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.linktype__val); } + if (data->args.hipExtGetLinkTypeAndHopCount.hopcount == NULL) oss << ", hopcount=NULL"; + else { oss << ", hopcount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.hopcount__val); } + oss << ")"; + break; + case HIP_API_ID_hipExtLaunchKernel: + oss << "hipExtLaunchKernel("; + oss << "function_address="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.function_address); + oss << ", numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.numBlocks); + oss << ", dimBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.dimBlocks); + if (data->args.hipExtLaunchKernel.args == NULL) oss << ", args=NULL"; + else { oss << ", args="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.args__val); } + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.stream); + oss << ", startEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.startEvent); + oss << ", stopEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.stopEvent); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchKernel.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: + oss << "hipExtLaunchMultiKernelMultiDevice("; + if (data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL"; + else { oss << "launchParamsList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList__val); } + oss << ", numDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchMultiKernelMultiDevice.numDevices); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtLaunchMultiKernelMultiDevice.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtMallocWithFlags: + oss << "hipExtMallocWithFlags("; + if (data->args.hipExtMallocWithFlags.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtMallocWithFlags.ptr__val); } + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtMallocWithFlags.sizeBytes); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtMallocWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtModuleLaunchKernel: + oss << "hipExtModuleLaunchKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.f); + oss << ", globalWorkSizeX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.globalWorkSizeX); + oss << ", globalWorkSizeY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.globalWorkSizeY); + oss << ", globalWorkSizeZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.globalWorkSizeZ); + oss << ", localWorkSizeX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.localWorkSizeX); + oss << ", localWorkSizeY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.localWorkSizeY); + oss << ", localWorkSizeZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.localWorkSizeZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.sharedMemBytes); + oss << ", hStream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.hStream); + if (data->args.hipExtModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.kernelParams__val); } + if (data->args.hipExtModuleLaunchKernel.extra == NULL) oss << ", extra=NULL"; + else { oss << ", extra="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.extra__val); } + oss << ", startEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.startEvent); + oss << ", stopEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.stopEvent); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtModuleLaunchKernel.flags); + oss << ")"; + break; + case HIP_API_ID_hipExtStreamCreateWithCUMask: + oss << "hipExtStreamCreateWithCUMask("; + if (data->args.hipExtStreamCreateWithCUMask.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamCreateWithCUMask.stream__val); } + oss << ", cuMaskSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamCreateWithCUMask.cuMaskSize); + if (data->args.hipExtStreamCreateWithCUMask.cuMask == NULL) oss << ", cuMask=NULL"; + else { oss << ", cuMask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamCreateWithCUMask.cuMask__val); } + oss << ")"; + break; + case HIP_API_ID_hipExtStreamGetCUMask: + oss << "hipExtStreamGetCUMask("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamGetCUMask.stream); + oss << ", cuMaskSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamGetCUMask.cuMaskSize); + if (data->args.hipExtStreamGetCUMask.cuMask == NULL) oss << ", cuMask=NULL"; + else { oss << ", cuMask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtStreamGetCUMask.cuMask__val); } + oss << ")"; + break; + case HIP_API_ID_hipExternalMemoryGetMappedBuffer: + oss << "hipExternalMemoryGetMappedBuffer("; + if (data->args.hipExternalMemoryGetMappedBuffer.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedBuffer.devPtr__val); } + oss << ", extMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedBuffer.extMem); + if (data->args.hipExternalMemoryGetMappedBuffer.bufferDesc == NULL) oss << ", bufferDesc=NULL"; + else { oss << ", bufferDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedBuffer.bufferDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipFree: + oss << "hipFree("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFree.ptr); + oss << ")"; + break; + case HIP_API_ID_hipFreeArray: + oss << "hipFreeArray("; + if (data->args.hipFreeArray.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeArray.array__val); } + oss << ")"; + break; + case HIP_API_ID_hipFreeAsync: + oss << "hipFreeAsync("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeAsync.dev_ptr); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipFreeHost: + oss << "hipFreeHost("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeHost.ptr); + oss << ")"; + break; + case HIP_API_ID_hipFreeMipmappedArray: + oss << "hipFreeMipmappedArray("; + oss << "mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeMipmappedArray.mipmappedArray); + oss << ")"; + break; + case HIP_API_ID_hipFuncGetAttribute: + oss << "hipFuncGetAttribute("; + if (data->args.hipFuncGetAttribute.value == NULL) oss << "value=NULL"; + else { oss << "value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttribute.value__val); } + oss << ", attrib="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttribute.attrib); + oss << ", hfunc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttribute.hfunc); + oss << ")"; + break; + case HIP_API_ID_hipFuncGetAttributes: + oss << "hipFuncGetAttributes("; + if (data->args.hipFuncGetAttributes.attr == NULL) oss << "attr=NULL"; + else { oss << "attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttributes.attr__val); } + oss << ", func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncGetAttributes.func); + oss << ")"; + break; + case HIP_API_ID_hipFuncSetAttribute: + oss << "hipFuncSetAttribute("; + oss << "func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetAttribute.func); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipFuncSetCacheConfig: + oss << "hipFuncSetCacheConfig("; + oss << "func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetCacheConfig.func); + oss << ", config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetCacheConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipFuncSetSharedMemConfig: + oss << "hipFuncSetSharedMemConfig("; + oss << "func="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetSharedMemConfig.func); + oss << ", config="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFuncSetSharedMemConfig.config); + oss << ")"; + break; + case HIP_API_ID_hipGLGetDevices: + oss << "hipGLGetDevices("; + if (data->args.hipGLGetDevices.pHipDeviceCount == NULL) oss << "pHipDeviceCount=NULL"; + else { oss << "pHipDeviceCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.pHipDeviceCount__val); } + if (data->args.hipGLGetDevices.pHipDevices == NULL) oss << ", pHipDevices=NULL"; + else { oss << ", pHipDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.pHipDevices__val); } + oss << ", hipDeviceCount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.hipDeviceCount); + oss << ", deviceList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGLGetDevices.deviceList); + oss << ")"; + break; + case HIP_API_ID_hipGetChannelDesc: + oss << "hipGetChannelDesc("; + if (data->args.hipGetChannelDesc.desc == NULL) oss << "desc=NULL"; + else { oss << "desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetChannelDesc.desc__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetChannelDesc.array); + oss << ")"; + break; + case HIP_API_ID_hipGetDevice: + oss << "hipGetDevice("; + if (data->args.hipGetDevice.deviceId == NULL) oss << "deviceId=NULL"; + else { oss << "deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevice.deviceId__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetDeviceCount: + oss << "hipGetDeviceCount("; + if (data->args.hipGetDeviceCount.count == NULL) oss << "count=NULL"; + else { oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceCount.count__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetDeviceFlags: + oss << "hipGetDeviceFlags("; + if (data->args.hipGetDeviceFlags.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceFlags.flags__val); } + oss << ")"; + break; + case HIP_API_ID_hipGetDeviceProperties: + oss << "hipGetDeviceProperties("; + if (data->args.hipGetDeviceProperties.props == NULL) oss << "props=NULL"; + else { oss << "props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.props__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.device); + oss << ")"; + break; + case HIP_API_ID_hipGetErrorString: + oss << "hipGetErrorString("; + oss << ")"; + break; + case HIP_API_ID_hipGetLastError: + oss << "hipGetLastError("; + oss << ")"; + break; + case HIP_API_ID_hipGetMipmappedArrayLevel: + oss << "hipGetMipmappedArrayLevel("; + if (data->args.hipGetMipmappedArrayLevel.levelArray == NULL) oss << "levelArray=NULL"; + else { oss << "levelArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetMipmappedArrayLevel.levelArray__val); } + oss << ", mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetMipmappedArrayLevel.mipmappedArray); + oss << ", level="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetMipmappedArrayLevel.level); + oss << ")"; + break; + case HIP_API_ID_hipGetSymbolAddress: + oss << "hipGetSymbolAddress("; + if (data->args.hipGetSymbolAddress.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolAddress.devPtr__val); } + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolAddress.symbol); + oss << ")"; + break; + case HIP_API_ID_hipGetSymbolSize: + oss << "hipGetSymbolSize("; + if (data->args.hipGetSymbolSize.size == NULL) oss << "size=NULL"; + else { oss << "size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolSize.size__val); } + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetSymbolSize.symbol); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddChildGraphNode: + oss << "hipGraphAddChildGraphNode("; + if (data->args.hipGraphAddChildGraphNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.graph); + if (data->args.hipGraphAddChildGraphNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.numDependencies); + oss << ", childGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddChildGraphNode.childGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddDependencies: + oss << "hipGraphAddDependencies("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.graph); + if (data->args.hipGraphAddDependencies.from == NULL) oss << ", from=NULL"; + else { oss << ", from="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.from__val); } + if (data->args.hipGraphAddDependencies.to == NULL) oss << ", to=NULL"; + else { oss << ", to="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.to__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddDependencies.numDependencies); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddEmptyNode: + oss << "hipGraphAddEmptyNode("; + if (data->args.hipGraphAddEmptyNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.graph); + if (data->args.hipGraphAddEmptyNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEmptyNode.numDependencies); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddEventRecordNode: + oss << "hipGraphAddEventRecordNode("; + if (data->args.hipGraphAddEventRecordNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.graph); + if (data->args.hipGraphAddEventRecordNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.numDependencies); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventRecordNode.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddEventWaitNode: + oss << "hipGraphAddEventWaitNode("; + if (data->args.hipGraphAddEventWaitNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.graph); + if (data->args.hipGraphAddEventWaitNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.numDependencies); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddHostNode: + oss << "hipGraphAddHostNode("; + if (data->args.hipGraphAddHostNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.graph); + if (data->args.hipGraphAddHostNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.numDependencies); + if (data->args.hipGraphAddHostNode.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddHostNode.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddKernelNode: + oss << "hipGraphAddKernelNode("; + if (data->args.hipGraphAddKernelNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.graph); + if (data->args.hipGraphAddKernelNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.numDependencies); + if (data->args.hipGraphAddKernelNode.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddKernelNode.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemAllocNode: + oss << "hipGraphAddMemAllocNode("; + if (data->args.hipGraphAddMemAllocNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.graph); + if (data->args.hipGraphAddMemAllocNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.numDependencies); + if (data->args.hipGraphAddMemAllocNode.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemAllocNode.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemFreeNode: + oss << "hipGraphAddMemFreeNode("; + if (data->args.hipGraphAddMemFreeNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.graph); + if (data->args.hipGraphAddMemFreeNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.numDependencies); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemFreeNode.dev_ptr); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNode: + oss << "hipGraphAddMemcpyNode("; + if (data->args.hipGraphAddMemcpyNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.graph); + if (data->args.hipGraphAddMemcpyNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.numDependencies); + if (data->args.hipGraphAddMemcpyNode.pCopyParams == NULL) oss << ", pCopyParams=NULL"; + else { oss << ", pCopyParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode.pCopyParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNode1D: + oss << "hipGraphAddMemcpyNode1D("; + if (data->args.hipGraphAddMemcpyNode1D.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.graph); + if (data->args.hipGraphAddMemcpyNode1D.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.numDependencies); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNode1D.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNodeFromSymbol: + oss << "hipGraphAddMemcpyNodeFromSymbol("; + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.graph); + if (data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.numDependencies); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.symbol); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemcpyNodeToSymbol: + oss << "hipGraphAddMemcpyNodeToSymbol("; + if (data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.graph); + if (data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.numDependencies); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemcpyNodeToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphAddMemsetNode: + oss << "hipGraphAddMemsetNode("; + if (data->args.hipGraphAddMemsetNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.graph); + if (data->args.hipGraphAddMemsetNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.numDependencies); + if (data->args.hipGraphAddMemsetNode.pMemsetParams == NULL) oss << ", pMemsetParams=NULL"; + else { oss << ", pMemsetParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddMemsetNode.pMemsetParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphChildGraphNodeGetGraph: + oss << "hipGraphChildGraphNodeGetGraph("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphChildGraphNodeGetGraph.node); + if (data->args.hipGraphChildGraphNodeGetGraph.pGraph == NULL) oss << ", pGraph=NULL"; + else { oss << ", pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphChildGraphNodeGetGraph.pGraph__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphClone: + oss << "hipGraphClone("; + if (data->args.hipGraphClone.pGraphClone == NULL) oss << "pGraphClone=NULL"; + else { oss << "pGraphClone="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphClone.pGraphClone__val); } + oss << ", originalGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphClone.originalGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphCreate: + oss << "hipGraphCreate("; + if (data->args.hipGraphCreate.pGraph == NULL) oss << "pGraph=NULL"; + else { oss << "pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphCreate.pGraph__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphCreate.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphDebugDotPrint: + oss << "hipGraphDebugDotPrint("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDebugDotPrint.graph); + if (data->args.hipGraphDebugDotPrint.path == NULL) oss << ", path=NULL"; + else { oss << ", path="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDebugDotPrint.path__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDebugDotPrint.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphDestroy: + oss << "hipGraphDestroy("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDestroy.graph); + oss << ")"; + break; + case HIP_API_ID_hipGraphDestroyNode: + oss << "hipGraphDestroyNode("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphDestroyNode.node); + oss << ")"; + break; + case HIP_API_ID_hipGraphEventRecordNodeGetEvent: + oss << "hipGraphEventRecordNodeGetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeGetEvent.node); + if (data->args.hipGraphEventRecordNodeGetEvent.event_out == NULL) oss << ", event_out=NULL"; + else { oss << ", event_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeGetEvent.event_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphEventRecordNodeSetEvent: + oss << "hipGraphEventRecordNodeSetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeSetEvent.node); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventRecordNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphEventWaitNodeGetEvent: + oss << "hipGraphEventWaitNodeGetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeGetEvent.node); + if (data->args.hipGraphEventWaitNodeGetEvent.event_out == NULL) oss << ", event_out=NULL"; + else { oss << ", event_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeGetEvent.event_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphEventWaitNodeSetEvent: + oss << "hipGraphEventWaitNodeSetEvent("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeSetEvent.node); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphEventWaitNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecChildGraphNodeSetParams: + oss << "hipGraphExecChildGraphNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecChildGraphNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecChildGraphNodeSetParams.node); + oss << ", childGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecChildGraphNodeSetParams.childGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecDestroy: + oss << "hipGraphExecDestroy("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecDestroy.graphExec); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecEventRecordNodeSetEvent: + oss << "hipGraphExecEventRecordNodeSetEvent("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventRecordNodeSetEvent.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventRecordNodeSetEvent.hNode); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventRecordNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: + oss << "hipGraphExecEventWaitNodeSetEvent("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventWaitNodeSetEvent.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventWaitNodeSetEvent.hNode); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventWaitNodeSetEvent.event); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecHostNodeSetParams: + oss << "hipGraphExecHostNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecHostNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecHostNodeSetParams.node); + if (data->args.hipGraphExecHostNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecHostNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecKernelNodeSetParams: + oss << "hipGraphExecKernelNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecKernelNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecKernelNodeSetParams.node); + if (data->args.hipGraphExecKernelNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecKernelNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams: + oss << "hipGraphExecMemcpyNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams.node); + if (data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParams1D: + oss << "hipGraphExecMemcpyNodeSetParams1D("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParams1D.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsFromSymbol: + oss << "hipGraphExecMemcpyNodeSetParamsFromSymbol("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.symbol); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol: + oss << "hipGraphExecMemcpyNodeSetParamsToSymbol("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.node); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemcpyNodeSetParamsToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphExecMemsetNodeSetParams: + oss << "hipGraphExecMemsetNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemsetNodeSetParams.hGraphExec); + oss << ", node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemsetNodeSetParams.node); + if (data->args.hipGraphExecMemsetNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecMemsetNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecUpdate: + oss << "hipGraphExecUpdate("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.hGraphExec); + oss << ", hGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.hGraph); + if (data->args.hipGraphExecUpdate.hErrorNode_out == NULL) oss << ", hErrorNode_out=NULL"; + else { oss << ", hErrorNode_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.hErrorNode_out__val); } + if (data->args.hipGraphExecUpdate.updateResult_out == NULL) oss << ", updateResult_out=NULL"; + else { oss << ", updateResult_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.updateResult_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphGetEdges: + oss << "hipGraphGetEdges("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.graph); + if (data->args.hipGraphGetEdges.from == NULL) oss << ", from=NULL"; + else { oss << ", from="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.from__val); } + if (data->args.hipGraphGetEdges.to == NULL) oss << ", to=NULL"; + else { oss << ", to="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.to__val); } + if (data->args.hipGraphGetEdges.numEdges == NULL) oss << ", numEdges=NULL"; + else { oss << ", numEdges="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.numEdges__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphGetNodes: + oss << "hipGraphGetNodes("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetNodes.graph); + if (data->args.hipGraphGetNodes.nodes == NULL) oss << ", nodes=NULL"; + else { oss << ", nodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetNodes.nodes__val); } + if (data->args.hipGraphGetNodes.numNodes == NULL) oss << ", numNodes=NULL"; + else { oss << ", numNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetNodes.numNodes__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphGetRootNodes: + oss << "hipGraphGetRootNodes("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetRootNodes.graph); + if (data->args.hipGraphGetRootNodes.pRootNodes == NULL) oss << ", pRootNodes=NULL"; + else { oss << ", pRootNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetRootNodes.pRootNodes__val); } + if (data->args.hipGraphGetRootNodes.pNumRootNodes == NULL) oss << ", pNumRootNodes=NULL"; + else { oss << ", pNumRootNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetRootNodes.pNumRootNodes__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphHostNodeGetParams: + oss << "hipGraphHostNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeGetParams.node); + if (data->args.hipGraphHostNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphHostNodeSetParams: + oss << "hipGraphHostNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeSetParams.node); + if (data->args.hipGraphHostNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphHostNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphInstantiate: + oss << "hipGraphInstantiate("; + if (data->args.hipGraphInstantiate.pGraphExec == NULL) oss << "pGraphExec=NULL"; + else { oss << "pGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.pGraphExec__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.graph); + if (data->args.hipGraphInstantiate.pErrorNode == NULL) oss << ", pErrorNode=NULL"; + else { oss << ", pErrorNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.pErrorNode__val); } + if (data->args.hipGraphInstantiate.pLogBuffer == NULL) oss << ", pLogBuffer=NULL"; + else { oss << ", pLogBuffer="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.pLogBuffer__val); } + oss << ", bufferSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiate.bufferSize); + oss << ")"; + break; + case HIP_API_ID_hipGraphInstantiateWithFlags: + oss << "hipGraphInstantiateWithFlags("; + if (data->args.hipGraphInstantiateWithFlags.pGraphExec == NULL) oss << "pGraphExec=NULL"; + else { oss << "pGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithFlags.pGraphExec__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithFlags.graph); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphInstantiateWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeCopyAttributes: + oss << "hipGraphKernelNodeCopyAttributes("; + oss << "hSrc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeCopyAttributes.hSrc); + oss << ", hDst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeCopyAttributes.hDst); + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeGetAttribute: + oss << "hipGraphKernelNodeGetAttribute("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetAttribute.hNode); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetAttribute.attr); + if (data->args.hipGraphKernelNodeGetAttribute.value == NULL) oss << ", value=NULL"; + else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetAttribute.value__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeGetParams: + oss << "hipGraphKernelNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetParams.node); + if (data->args.hipGraphKernelNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeSetAttribute: + oss << "hipGraphKernelNodeSetAttribute("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetAttribute.hNode); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetAttribute.attr); + if (data->args.hipGraphKernelNodeSetAttribute.value == NULL) oss << ", value=NULL"; + else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetAttribute.value__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphKernelNodeSetParams: + oss << "hipGraphKernelNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetParams.node); + if (data->args.hipGraphKernelNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphKernelNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphLaunch: + oss << "hipGraphLaunch("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphLaunch.graphExec); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphLaunch.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemAllocNodeGetParams: + oss << "hipGraphMemAllocNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemAllocNodeGetParams.node); + if (data->args.hipGraphMemAllocNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemAllocNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemFreeNodeGetParams: + oss << "hipGraphMemFreeNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemFreeNodeGetParams.node); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemFreeNodeGetParams.dev_ptr); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeGetParams: + oss << "hipGraphMemcpyNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeGetParams.node); + if (data->args.hipGraphMemcpyNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParams: + oss << "hipGraphMemcpyNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams.node); + if (data->args.hipGraphMemcpyNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParams1D: + oss << "hipGraphMemcpyNodeSetParams1D("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParams1D.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsFromSymbol: + oss << "hipGraphMemcpyNodeSetParamsFromSymbol("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.node); + oss << ", dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.symbol); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemcpyNodeSetParamsToSymbol: + oss << "hipGraphMemcpyNodeSetParamsToSymbol("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.node); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.count); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemcpyNodeSetParamsToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipGraphMemsetNodeGetParams: + oss << "hipGraphMemsetNodeGetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeGetParams.node); + if (data->args.hipGraphMemsetNodeGetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeGetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphMemsetNodeSetParams: + oss << "hipGraphMemsetNodeSetParams("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeSetParams.node); + if (data->args.hipGraphMemsetNodeSetParams.pNodeParams == NULL) oss << ", pNodeParams=NULL"; + else { oss << ", pNodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphMemsetNodeSetParams.pNodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeFindInClone: + oss << "hipGraphNodeFindInClone("; + if (data->args.hipGraphNodeFindInClone.pNode == NULL) oss << "pNode=NULL"; + else { oss << "pNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeFindInClone.pNode__val); } + oss << ", originalNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeFindInClone.originalNode); + oss << ", clonedGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeFindInClone.clonedGraph); + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetDependencies: + oss << "hipGraphNodeGetDependencies("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependencies.node); + if (data->args.hipGraphNodeGetDependencies.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependencies.pDependencies__val); } + if (data->args.hipGraphNodeGetDependencies.pNumDependencies == NULL) oss << ", pNumDependencies=NULL"; + else { oss << ", pNumDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependencies.pNumDependencies__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetDependentNodes: + oss << "hipGraphNodeGetDependentNodes("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependentNodes.node); + if (data->args.hipGraphNodeGetDependentNodes.pDependentNodes == NULL) oss << ", pDependentNodes=NULL"; + else { oss << ", pDependentNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependentNodes.pDependentNodes__val); } + if (data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes == NULL) oss << ", pNumDependentNodes=NULL"; + else { oss << ", pNumDependentNodes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetDependentNodes.pNumDependentNodes__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetEnabled: + oss << "hipGraphNodeGetEnabled("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetEnabled.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetEnabled.hNode); + if (data->args.hipGraphNodeGetEnabled.isEnabled == NULL) oss << ", isEnabled=NULL"; + else { oss << ", isEnabled="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetEnabled.isEnabled__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeGetType: + oss << "hipGraphNodeGetType("; + oss << "node="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetType.node); + if (data->args.hipGraphNodeGetType.pType == NULL) oss << ", pType=NULL"; + else { oss << ", pType="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeGetType.pType__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphNodeSetEnabled: + oss << "hipGraphNodeSetEnabled("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetEnabled.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetEnabled.hNode); + oss << ", isEnabled="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphNodeSetEnabled.isEnabled); + oss << ")"; + break; + case HIP_API_ID_hipGraphReleaseUserObject: + oss << "hipGraphReleaseUserObject("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphReleaseUserObject.graph); + oss << ", object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphReleaseUserObject.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphReleaseUserObject.count); + oss << ")"; + break; + case HIP_API_ID_hipGraphRemoveDependencies: + oss << "hipGraphRemoveDependencies("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.graph); + if (data->args.hipGraphRemoveDependencies.from == NULL) oss << ", from=NULL"; + else { oss << ", from="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.from__val); } + if (data->args.hipGraphRemoveDependencies.to == NULL) oss << ", to=NULL"; + else { oss << ", to="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.to__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRemoveDependencies.numDependencies); + oss << ")"; + break; + case HIP_API_ID_hipGraphRetainUserObject: + oss << "hipGraphRetainUserObject("; + oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.graph); + oss << ", object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.count); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphRetainUserObject.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphUpload: + oss << "hipGraphUpload("; + oss << "graphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphUpload.graphExec); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphUpload.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsGLRegisterBuffer: + oss << "hipGraphicsGLRegisterBuffer("; + if (data->args.hipGraphicsGLRegisterBuffer.resource == NULL) oss << "resource=NULL"; + else { oss << "resource="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipGraphicsGLRegisterBuffer.resource__val); } + oss << ", buffer="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterBuffer.buffer); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterBuffer.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsGLRegisterImage: + oss << "hipGraphicsGLRegisterImage("; + if (data->args.hipGraphicsGLRegisterImage.resource == NULL) oss << "resource=NULL"; + else { oss << "resource="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipGraphicsGLRegisterImage.resource__val); } + oss << ", image="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterImage.image); + oss << ", target="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterImage.target); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsGLRegisterImage.flags); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsMapResources: + oss << "hipGraphicsMapResources("; + oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsMapResources.count); + if (data->args.hipGraphicsMapResources.resources == NULL) oss << ", resources=NULL"; + else { oss << ", resources="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsMapResources.resources__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsMapResources.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsResourceGetMappedPointer: + oss << "hipGraphicsResourceGetMappedPointer("; + if (data->args.hipGraphicsResourceGetMappedPointer.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsResourceGetMappedPointer.devPtr__val); } + if (data->args.hipGraphicsResourceGetMappedPointer.size == NULL) oss << ", size=NULL"; + else { oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsResourceGetMappedPointer.size__val); } + oss << ", resource="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsResourceGetMappedPointer.resource); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsSubResourceGetMappedArray: + oss << "hipGraphicsSubResourceGetMappedArray("; + if (data->args.hipGraphicsSubResourceGetMappedArray.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.array__val); } + oss << ", resource="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.resource); + oss << ", arrayIndex="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.arrayIndex); + oss << ", mipLevel="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsSubResourceGetMappedArray.mipLevel); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsUnmapResources: + oss << "hipGraphicsUnmapResources("; + oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnmapResources.count); + if (data->args.hipGraphicsUnmapResources.resources == NULL) oss << ", resources=NULL"; + else { oss << ", resources="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnmapResources.resources__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnmapResources.stream); + oss << ")"; + break; + case HIP_API_ID_hipGraphicsUnregisterResource: + oss << "hipGraphicsUnregisterResource("; + oss << "resource="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphicsUnregisterResource.resource); + oss << ")"; + break; + case HIP_API_ID_hipHccModuleLaunchKernel: + oss << "hipHccModuleLaunchKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.f); + oss << ", globalWorkSizeX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.globalWorkSizeX); + oss << ", globalWorkSizeY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.globalWorkSizeY); + oss << ", globalWorkSizeZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.globalWorkSizeZ); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.blockDimX); + oss << ", blockDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.blockDimY); + oss << ", blockDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.blockDimZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.sharedMemBytes); + oss << ", hStream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.hStream); + if (data->args.hipHccModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.kernelParams__val); } + if (data->args.hipHccModuleLaunchKernel.extra == NULL) oss << ", extra=NULL"; + else { oss << ", extra="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.extra__val); } + oss << ", startEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.startEvent); + oss << ", stopEvent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHccModuleLaunchKernel.stopEvent); + oss << ")"; + break; + case HIP_API_ID_hipHostAlloc: + oss << "hipHostAlloc("; + if (data->args.hipHostAlloc.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostAlloc.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostAlloc.size); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostAlloc.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostFree: + oss << "hipHostFree("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostFree.ptr); + oss << ")"; + break; + case HIP_API_ID_hipHostGetDevicePointer: + oss << "hipHostGetDevicePointer("; + if (data->args.hipHostGetDevicePointer.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetDevicePointer.devPtr__val); } + oss << ", hstPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetDevicePointer.hstPtr); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetDevicePointer.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostGetFlags: + oss << "hipHostGetFlags("; + if (data->args.hipHostGetFlags.flagsPtr == NULL) oss << "flagsPtr=NULL"; + else { oss << "flagsPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetFlags.flagsPtr__val); } + oss << ", hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostGetFlags.hostPtr); + oss << ")"; + break; + case HIP_API_ID_hipHostMalloc: + oss << "hipHostMalloc("; + if (data->args.hipHostMalloc.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.size); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostMalloc.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostRegister: + oss << "hipHostRegister("; + oss << "hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.hostPtr); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.sizeBytes); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostRegister.flags); + oss << ")"; + break; + case HIP_API_ID_hipHostUnregister: + oss << "hipHostUnregister("; + oss << "hostPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipHostUnregister.hostPtr); + oss << ")"; + break; + case HIP_API_ID_hipImportExternalMemory: + oss << "hipImportExternalMemory("; + if (data->args.hipImportExternalMemory.extMem_out == NULL) oss << "extMem_out=NULL"; + else { oss << "extMem_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalMemory.extMem_out__val); } + if (data->args.hipImportExternalMemory.memHandleDesc == NULL) oss << ", memHandleDesc=NULL"; + else { oss << ", memHandleDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalMemory.memHandleDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipImportExternalSemaphore: + oss << "hipImportExternalSemaphore("; + if (data->args.hipImportExternalSemaphore.extSem_out == NULL) oss << "extSem_out=NULL"; + else { oss << "extSem_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalSemaphore.extSem_out__val); } + if (data->args.hipImportExternalSemaphore.semHandleDesc == NULL) oss << ", semHandleDesc=NULL"; + else { oss << ", semHandleDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipImportExternalSemaphore.semHandleDesc__val); } + oss << ")"; + break; + case HIP_API_ID_hipInit: + oss << "hipInit("; + oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipInit.flags); + oss << ")"; + break; + case HIP_API_ID_hipIpcCloseMemHandle: + oss << "hipIpcCloseMemHandle("; + oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcCloseMemHandle.devPtr); + oss << ")"; + break; + case HIP_API_ID_hipIpcGetEventHandle: + oss << "hipIpcGetEventHandle("; + if (data->args.hipIpcGetEventHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetEventHandle.handle__val); } + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetEventHandle.event); + oss << ")"; + break; + case HIP_API_ID_hipIpcGetMemHandle: + oss << "hipIpcGetMemHandle("; + if (data->args.hipIpcGetMemHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetMemHandle.handle__val); } + oss << ", devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcGetMemHandle.devPtr); + oss << ")"; + break; + case HIP_API_ID_hipIpcOpenEventHandle: + oss << "hipIpcOpenEventHandle("; + if (data->args.hipIpcOpenEventHandle.event == NULL) oss << "event=NULL"; + else { oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenEventHandle.event__val); } + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenEventHandle.handle); + oss << ")"; + break; + case HIP_API_ID_hipIpcOpenMemHandle: + oss << "hipIpcOpenMemHandle("; + if (data->args.hipIpcOpenMemHandle.devPtr == NULL) oss << "devPtr=NULL"; + else { oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenMemHandle.devPtr__val); } + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenMemHandle.handle); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipIpcOpenMemHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipLaunchByPtr: + oss << "hipLaunchByPtr("; + oss << "hostFunction="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchByPtr.hostFunction); + oss << ")"; + break; + case HIP_API_ID_hipLaunchCooperativeKernel: + oss << "hipLaunchCooperativeKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.f); + oss << ", gridDim="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.gridDim); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.blockDimX); + if (data->args.hipLaunchCooperativeKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.kernelParams__val); } + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernel.stream); + oss << ")"; + break; + case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: + oss << "hipLaunchCooperativeKernelMultiDevice("; + if (data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL"; + else { oss << "launchParamsList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val); } + oss << ", numDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernelMultiDevice.numDevices); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchCooperativeKernelMultiDevice.flags); + oss << ")"; + break; + case HIP_API_ID_hipLaunchHostFunc: + oss << "hipLaunchHostFunc("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchHostFunc.stream); + oss << ", fn="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchHostFunc.fn); + oss << ", userData="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchHostFunc.userData); + oss << ")"; + break; + case HIP_API_ID_hipLaunchKernel: + oss << "hipLaunchKernel("; + oss << "function_address="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.function_address); + oss << ", numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.numBlocks); + oss << ", dimBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.dimBlocks); + if (data->args.hipLaunchKernel.args == NULL) oss << ", args=NULL"; + else { oss << ", args="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.args__val); } + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipLaunchKernel.stream); + oss << ")"; + break; + case HIP_API_ID_hipMalloc: + oss << "hipMalloc("; + if (data->args.hipMalloc.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc.size); + oss << ")"; + break; + case HIP_API_ID_hipMalloc3D: + oss << "hipMalloc3D("; + if (data->args.hipMalloc3D.pitchedDevPtr == NULL) oss << "pitchedDevPtr=NULL"; + else { oss << "pitchedDevPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3D.pitchedDevPtr__val); } + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3D.extent); + oss << ")"; + break; + case HIP_API_ID_hipMalloc3DArray: + oss << "hipMalloc3DArray("; + if (data->args.hipMalloc3DArray.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.array__val); } + if (data->args.hipMalloc3DArray.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.desc__val); } + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.extent); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMalloc3DArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocArray: + oss << "hipMallocArray("; + if (data->args.hipMallocArray.array == NULL) oss << "array=NULL"; + else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipMallocArray.array__val); } + if (data->args.hipMallocArray.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.desc__val); } + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.height); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocAsync: + oss << "hipMallocAsync("; + if (data->args.hipMallocAsync.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocAsync.dev_ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocAsync.size); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMallocFromPoolAsync: + oss << "hipMallocFromPoolAsync("; + if (data->args.hipMallocFromPoolAsync.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.dev_ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.size); + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.mem_pool); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocFromPoolAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMallocHost: + oss << "hipMallocHost("; + if (data->args.hipMallocHost.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocHost.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocHost.size); + oss << ")"; + break; + case HIP_API_ID_hipMallocManaged: + oss << "hipMallocManaged("; + if (data->args.hipMallocManaged.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocManaged.dev_ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocManaged.size); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocManaged.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocMipmappedArray: + oss << "hipMallocMipmappedArray("; + if (data->args.hipMallocMipmappedArray.mipmappedArray == NULL) oss << "mipmappedArray=NULL"; + else { oss << "mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.mipmappedArray__val); } + if (data->args.hipMallocMipmappedArray.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.desc__val); } + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.extent); + oss << ", numLevels="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.numLevels); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocMipmappedArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipMallocPitch: + oss << "hipMallocPitch("; + if (data->args.hipMallocPitch.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.ptr__val); } + if (data->args.hipMallocPitch.pitch == NULL) oss << ", pitch=NULL"; + else { oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.pitch__val); } + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMallocPitch.height); + oss << ")"; + break; + case HIP_API_ID_hipMemAddressFree: + oss << "hipMemAddressFree("; + oss << "devPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressFree.devPtr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressFree.size); + oss << ")"; + break; + case HIP_API_ID_hipMemAddressReserve: + oss << "hipMemAddressReserve("; + if (data->args.hipMemAddressReserve.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.size); + oss << ", alignment="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.alignment); + oss << ", addr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.addr); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAddressReserve.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemAdvise: + oss << "hipMemAdvise("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.count); + oss << ", advice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.advice); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAdvise.device); + oss << ")"; + break; + case HIP_API_ID_hipMemAllocHost: + oss << "hipMemAllocHost("; + if (data->args.hipMemAllocHost.ptr == NULL) oss << "ptr=NULL"; + else { oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocHost.ptr__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocHost.size); + oss << ")"; + break; + case HIP_API_ID_hipMemAllocPitch: + oss << "hipMemAllocPitch("; + if (data->args.hipMemAllocPitch.dptr == NULL) oss << "dptr=NULL"; + else { oss << "dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.dptr__val); } + if (data->args.hipMemAllocPitch.pitch == NULL) oss << ", pitch=NULL"; + else { oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.pitch__val); } + oss << ", widthInBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.widthInBytes); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.height); + oss << ", elementSizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemAllocPitch.elementSizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemCreate: + oss << "hipMemCreate("; + if (data->args.hipMemCreate.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.handle__val); } + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.size); + if (data->args.hipMemCreate.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.prop__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemCreate.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemExportToShareableHandle: + oss << "hipMemExportToShareableHandle("; + oss << "shareableHandle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.shareableHandle); + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.handle); + oss << ", handleType="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.handleType); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemExportToShareableHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAccess: + oss << "hipMemGetAccess("; + if (data->args.hipMemGetAccess.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAccess.flags__val); } + if (data->args.hipMemGetAccess.location == NULL) oss << ", location=NULL"; + else { oss << ", location="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAccess.location__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAccess.ptr); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAddressRange: + oss << "hipMemGetAddressRange("; + if (data->args.hipMemGetAddressRange.pbase == NULL) oss << "pbase=NULL"; + else { oss << "pbase="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAddressRange.pbase__val); } + if (data->args.hipMemGetAddressRange.psize == NULL) oss << ", psize=NULL"; + else { oss << ", psize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAddressRange.psize__val); } + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAddressRange.dptr); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAllocationGranularity: + oss << "hipMemGetAllocationGranularity("; + if (data->args.hipMemGetAllocationGranularity.granularity == NULL) oss << "granularity=NULL"; + else { oss << "granularity="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationGranularity.granularity__val); } + if (data->args.hipMemGetAllocationGranularity.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationGranularity.prop__val); } + oss << ", option="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationGranularity.option); + oss << ")"; + break; + case HIP_API_ID_hipMemGetAllocationPropertiesFromHandle: + oss << "hipMemGetAllocationPropertiesFromHandle("; + if (data->args.hipMemGetAllocationPropertiesFromHandle.prop == NULL) oss << "prop=NULL"; + else { oss << "prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationPropertiesFromHandle.prop__val); } + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetAllocationPropertiesFromHandle.handle); + oss << ")"; + break; + case HIP_API_ID_hipMemGetInfo: + oss << "hipMemGetInfo("; + if (data->args.hipMemGetInfo.free == NULL) oss << "free=NULL"; + else { oss << "free="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetInfo.free__val); } + if (data->args.hipMemGetInfo.total == NULL) oss << ", total=NULL"; + else { oss << ", total="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemGetInfo.total__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemImportFromShareableHandle: + oss << "hipMemImportFromShareableHandle("; + if (data->args.hipMemImportFromShareableHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemImportFromShareableHandle.handle__val); } + oss << ", osHandle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemImportFromShareableHandle.osHandle); + oss << ", shHandleType="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemImportFromShareableHandle.shHandleType); + oss << ")"; + break; + case HIP_API_ID_hipMemMap: + oss << "hipMemMap("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.ptr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.size); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.offset); + oss << ", handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.handle); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMap.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemMapArrayAsync: + oss << "hipMemMapArrayAsync("; + if (data->args.hipMemMapArrayAsync.mapInfoList == NULL) oss << "mapInfoList=NULL"; + else { oss << "mapInfoList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMapArrayAsync.mapInfoList__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMapArrayAsync.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemMapArrayAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolCreate: + oss << "hipMemPoolCreate("; + if (data->args.hipMemPoolCreate.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolCreate.mem_pool__val); } + if (data->args.hipMemPoolCreate.pool_props == NULL) oss << ", pool_props=NULL"; + else { oss << ", pool_props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolCreate.pool_props__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemPoolDestroy: + oss << "hipMemPoolDestroy("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolDestroy.mem_pool); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolExportPointer: + oss << "hipMemPoolExportPointer("; + if (data->args.hipMemPoolExportPointer.export_data == NULL) oss << "export_data=NULL"; + else { oss << "export_data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportPointer.export_data__val); } + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportPointer.dev_ptr); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolExportToShareableHandle: + oss << "hipMemPoolExportToShareableHandle("; + oss << "shared_handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.shared_handle); + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.mem_pool); + oss << ", handle_type="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.handle_type); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolExportToShareableHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolGetAccess: + oss << "hipMemPoolGetAccess("; + if (data->args.hipMemPoolGetAccess.flags == NULL) oss << "flags=NULL"; + else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAccess.flags__val); } + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAccess.mem_pool); + if (data->args.hipMemPoolGetAccess.location == NULL) oss << ", location=NULL"; + else { oss << ", location="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAccess.location__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemPoolGetAttribute: + oss << "hipMemPoolGetAttribute("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAttribute.mem_pool); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolGetAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolImportFromShareableHandle: + oss << "hipMemPoolImportFromShareableHandle("; + if (data->args.hipMemPoolImportFromShareableHandle.mem_pool == NULL) oss << "mem_pool=NULL"; + else { oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.mem_pool__val); } + oss << ", shared_handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.shared_handle); + oss << ", handle_type="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.handle_type); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportFromShareableHandle.flags); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolImportPointer: + oss << "hipMemPoolImportPointer("; + if (data->args.hipMemPoolImportPointer.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportPointer.dev_ptr__val); } + oss << ", mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportPointer.mem_pool); + if (data->args.hipMemPoolImportPointer.export_data == NULL) oss << ", export_data=NULL"; + else { oss << ", export_data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolImportPointer.export_data__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemPoolSetAccess: + oss << "hipMemPoolSetAccess("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAccess.mem_pool); + if (data->args.hipMemPoolSetAccess.desc_list == NULL) oss << ", desc_list=NULL"; + else { oss << ", desc_list="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAccess.desc_list__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAccess.count); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolSetAttribute: + oss << "hipMemPoolSetAttribute("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAttribute.mem_pool); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAttribute.attr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolSetAttribute.value); + oss << ")"; + break; + case HIP_API_ID_hipMemPoolTrimTo: + oss << "hipMemPoolTrimTo("; + oss << "mem_pool="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolTrimTo.mem_pool); + oss << ", min_bytes_to_hold="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPoolTrimTo.min_bytes_to_hold); + oss << ")"; + break; + case HIP_API_ID_hipMemPrefetchAsync: + oss << "hipMemPrefetchAsync("; + oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.count); + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.device); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPrefetchAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemPtrGetInfo: + oss << "hipMemPtrGetInfo("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPtrGetInfo.ptr); + if (data->args.hipMemPtrGetInfo.size == NULL) oss << ", size=NULL"; + else { oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemPtrGetInfo.size__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemRangeGetAttribute: + oss << "hipMemRangeGetAttribute("; + oss << "data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.data); + oss << ", data_size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.data_size); + oss << ", attribute="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.attribute); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttribute.count); + oss << ")"; + break; + case HIP_API_ID_hipMemRangeGetAttributes: + oss << "hipMemRangeGetAttributes("; + if (data->args.hipMemRangeGetAttributes.data == NULL) oss << "data=NULL"; + else { oss << "data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.data__val); } + if (data->args.hipMemRangeGetAttributes.data_sizes == NULL) oss << ", data_sizes=NULL"; + else { oss << ", data_sizes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.data_sizes__val); } + if (data->args.hipMemRangeGetAttributes.attributes == NULL) oss << ", attributes=NULL"; + else { oss << ", attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.attributes__val); } + oss << ", num_attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.num_attributes); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.dev_ptr); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRangeGetAttributes.count); + oss << ")"; + break; + case HIP_API_ID_hipMemRelease: + oss << "hipMemRelease("; + oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRelease.handle); + oss << ")"; + break; + case HIP_API_ID_hipMemRetainAllocationHandle: + oss << "hipMemRetainAllocationHandle("; + if (data->args.hipMemRetainAllocationHandle.handle == NULL) oss << "handle=NULL"; + else { oss << "handle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRetainAllocationHandle.handle__val); } + oss << ", addr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemRetainAllocationHandle.addr); + oss << ")"; + break; + case HIP_API_ID_hipMemSetAccess: + oss << "hipMemSetAccess("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.ptr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.size); + if (data->args.hipMemSetAccess.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.desc__val); } + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemSetAccess.count); + oss << ")"; + break; + case HIP_API_ID_hipMemUnmap: + oss << "hipMemUnmap("; + oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemUnmap.ptr); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemUnmap.size); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy: + oss << "hipMemcpy("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.sizeBytes); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2D: + oss << "hipMemcpy2D("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2D.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DAsync: + oss << "hipMemcpy2DAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DFromArray: + oss << "hipMemcpy2DFromArray("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.src); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.hOffset); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DFromArrayAsync: + oss << "hipMemcpy2DFromArrayAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.dst); + oss << ", dpitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.dpitch); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.src); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.hOffset); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DFromArrayAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DToArray: + oss << "hipMemcpy2DToArray("; + if (data->args.hipMemcpy2DToArray.dst == NULL) oss << "dst=NULL"; + else { oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.dst__val); } + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.hOffset); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy2DToArrayAsync: + oss << "hipMemcpy2DToArrayAsync("; + if (data->args.hipMemcpy2DToArrayAsync.dst == NULL) oss << "dst=NULL"; + else { oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.dst__val); } + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.hOffset); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.src); + oss << ", spitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.spitch); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.height); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3D: + oss << "hipMemcpy3D("; + if (data->args.hipMemcpy3D.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3D.p__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemcpy3DAsync: + oss << "hipMemcpy3DAsync("; + if (data->args.hipMemcpy3DAsync.p == NULL) oss << "p=NULL"; + else { oss << "p="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DAsync.p__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy3DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyAsync: + oss << "hipMemcpyAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.sizeBytes); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyAtoH: + oss << "hipMemcpyAtoH("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.dst); + if (data->args.hipMemcpyAtoH.srcArray == NULL) oss << ", srcArray=NULL"; + else { oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.srcArray__val); } + oss << ", srcOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.srcOffset); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.count); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoD: + oss << "hipMemcpyDtoD("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoD.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoD.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoD.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoDAsync: + oss << "hipMemcpyDtoDAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoDAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoH: + oss << "hipMemcpyDtoH("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoH.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoH.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoH.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyDtoHAsync: + oss << "hipMemcpyDtoHAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyDtoHAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyFromArray: + oss << "hipMemcpyFromArray("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.dst); + oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.srcArray); + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.hOffset); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyFromSymbol: + oss << "hipMemcpyFromSymbol("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.symbol); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyFromSymbolAsync: + oss << "hipMemcpyFromSymbolAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.dst); + oss << ", symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.symbol); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyFromSymbolAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyHtoA: + oss << "hipMemcpyHtoA("; + if (data->args.hipMemcpyHtoA.dstArray == NULL) oss << "dstArray=NULL"; + else { oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.dstArray__val); } + oss << ", dstOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.dstOffset); + oss << ", srcHost="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.srcHost); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.count); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyHtoD: + oss << "hipMemcpyHtoD("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoD.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoD.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoD.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyHtoDAsync: + oss << "hipMemcpyHtoDAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoDAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyParam2D: + oss << "hipMemcpyParam2D("; + if (data->args.hipMemcpyParam2D.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyParam2D.pCopy__val); } + oss << ")"; + break; + case HIP_API_ID_hipMemcpyParam2DAsync: + oss << "hipMemcpyParam2DAsync("; + if (data->args.hipMemcpyParam2DAsync.pCopy == NULL) oss << "pCopy=NULL"; + else { oss << "pCopy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyParam2DAsync.pCopy__val); } + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyParam2DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyPeer: + oss << "hipMemcpyPeer("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.dst); + oss << ", dstDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.dstDeviceId); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.src); + oss << ", srcDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.srcDeviceId); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeer.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyPeerAsync: + oss << "hipMemcpyPeerAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.dst); + oss << ", dstDeviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.dstDeviceId); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.src); + oss << ", srcDevice="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.srcDevice); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyPeerAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyToArray: + oss << "hipMemcpyToArray("; + if (data->args.hipMemcpyToArray.dst == NULL) oss << "dst=NULL"; + else { oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.dst__val); } + oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.wOffset); + oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.hOffset); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.src); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.count); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyToSymbol: + oss << "hipMemcpyToSymbol("; + oss << "symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbol.kind); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyToSymbolAsync: + oss << "hipMemcpyToSymbolAsync("; + oss << "symbol="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.symbol); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.sizeBytes); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.offset); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToSymbolAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemcpyWithStream: + oss << "hipMemcpyWithStream("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.dst); + oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.src); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.sizeBytes); + oss << ", kind="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.kind); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyWithStream.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemset: + oss << "hipMemset("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset.dst); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset.value); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset.sizeBytes); + oss << ")"; + break; + case HIP_API_ID_hipMemset2D: + oss << "hipMemset2D("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.dst); + oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.pitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2D.height); + oss << ")"; + break; + case HIP_API_ID_hipMemset2DAsync: + oss << "hipMemset2DAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.dst); + oss << ", pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.pitch); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.value); + oss << ", width="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.width); + oss << ", height="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.height); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset2DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemset3D: + oss << "hipMemset3D("; + oss << "pitchedDevPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3D.pitchedDevPtr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3D.value); + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3D.extent); + oss << ")"; + break; + case HIP_API_ID_hipMemset3DAsync: + oss << "hipMemset3DAsync("; + oss << "pitchedDevPtr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.pitchedDevPtr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.value); + oss << ", extent="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.extent); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemset3DAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetAsync: + oss << "hipMemsetAsync("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.dst); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.value); + oss << ", sizeBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.sizeBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD16: + oss << "hipMemsetD16("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16.count); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD16Async: + oss << "hipMemsetD16Async("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD16Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD32: + oss << "hipMemsetD32("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32.count); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD32Async: + oss << "hipMemsetD32Async("; + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.dst); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD32Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD8: + oss << "hipMemsetD8("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8.count); + oss << ")"; + break; + case HIP_API_ID_hipMemsetD8Async: + oss << "hipMemsetD8Async("; + oss << "dest="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.dest); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.value); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.count); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemsetD8Async.stream); + oss << ")"; + break; + case HIP_API_ID_hipMipmappedArrayCreate: + oss << "hipMipmappedArrayCreate("; + if (data->args.hipMipmappedArrayCreate.pHandle == NULL) oss << "pHandle=NULL"; + else { oss << "pHandle="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayCreate.pHandle__val); } + if (data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc == NULL) oss << ", pMipmappedArrayDesc=NULL"; + else { oss << ", pMipmappedArrayDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayCreate.pMipmappedArrayDesc__val); } + oss << ", numMipmapLevels="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayCreate.numMipmapLevels); + oss << ")"; + break; + case HIP_API_ID_hipMipmappedArrayDestroy: + oss << "hipMipmappedArrayDestroy("; + oss << "hMipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayDestroy.hMipmappedArray); + oss << ")"; + break; + case HIP_API_ID_hipMipmappedArrayGetLevel: + oss << "hipMipmappedArrayGetLevel("; + if (data->args.hipMipmappedArrayGetLevel.pLevelArray == NULL) oss << "pLevelArray=NULL"; + else { oss << "pLevelArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayGetLevel.pLevelArray__val); } + oss << ", hMipMappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayGetLevel.hMipMappedArray); + oss << ", level="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMipmappedArrayGetLevel.level); + oss << ")"; + break; + case HIP_API_ID_hipModuleGetFunction: + oss << "hipModuleGetFunction("; + if (data->args.hipModuleGetFunction.function == NULL) oss << "function=NULL"; + else { oss << "function="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunction.function__val); } + oss << ", module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunction.module); + if (data->args.hipModuleGetFunction.kname == NULL) oss << ", kname=NULL"; + else { oss << ", kname="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunction.kname__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleGetGlobal: + oss << "hipModuleGetGlobal("; + if (data->args.hipModuleGetGlobal.dptr == NULL) oss << "dptr=NULL"; + else { oss << "dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.dptr__val); } + if (data->args.hipModuleGetGlobal.bytes == NULL) oss << ", bytes=NULL"; + else { oss << ", bytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.bytes__val); } + oss << ", hmod="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.hmod); + if (data->args.hipModuleGetGlobal.name == NULL) oss << ", name=NULL"; + else { oss << ", name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetGlobal.name__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleGetTexRef: + oss << "hipModuleGetTexRef("; + if (data->args.hipModuleGetTexRef.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipModuleGetTexRef.texRef__val); } + oss << ", hmod="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetTexRef.hmod); + if (data->args.hipModuleGetTexRef.name == NULL) oss << ", name=NULL"; + else { oss << ", name="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetTexRef.name__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLaunchCooperativeKernel: + oss << "hipModuleLaunchCooperativeKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.f); + oss << ", gridDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.gridDimX); + oss << ", gridDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.gridDimY); + oss << ", gridDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.gridDimZ); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.blockDimX); + oss << ", blockDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.blockDimY); + oss << ", blockDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.blockDimZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.stream); + if (data->args.hipModuleLaunchCooperativeKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernel.kernelParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLaunchCooperativeKernelMultiDevice: + oss << "hipModuleLaunchCooperativeKernelMultiDevice("; + if (data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList == NULL) oss << "launchParamsList=NULL"; + else { oss << "launchParamsList="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernelMultiDevice.launchParamsList__val); } + oss << ", numDevices="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernelMultiDevice.numDevices); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchCooperativeKernelMultiDevice.flags); + oss << ")"; + break; + case HIP_API_ID_hipModuleLaunchKernel: + oss << "hipModuleLaunchKernel("; + oss << "f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.f); + oss << ", gridDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.gridDimX); + oss << ", gridDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.gridDimY); + oss << ", gridDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.gridDimZ); + oss << ", blockDimX="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.blockDimX); + oss << ", blockDimY="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.blockDimY); + oss << ", blockDimZ="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.blockDimZ); + oss << ", sharedMemBytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.sharedMemBytes); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.stream); + if (data->args.hipModuleLaunchKernel.kernelParams == NULL) oss << ", kernelParams=NULL"; + else { oss << ", kernelParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.kernelParams__val); } + if (data->args.hipModuleLaunchKernel.extra == NULL) oss << ", extra=NULL"; + else { oss << ", extra="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLaunchKernel.extra__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLoad: + oss << "hipModuleLoad("; + if (data->args.hipModuleLoad.module == NULL) oss << "module=NULL"; + else { oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoad.module__val); } + if (data->args.hipModuleLoad.fname == NULL) oss << ", fname=NULL"; + else { oss << ", fname="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoad.fname__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleLoadData: + oss << "hipModuleLoadData("; + if (data->args.hipModuleLoadData.module == NULL) oss << "module=NULL"; + else { oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadData.module__val); } + oss << ", image="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadData.image); + oss << ")"; + break; + case HIP_API_ID_hipModuleLoadDataEx: + oss << "hipModuleLoadDataEx("; + if (data->args.hipModuleLoadDataEx.module == NULL) oss << "module=NULL"; + else { oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.module__val); } + oss << ", image="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.image); + oss << ", numOptions="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.numOptions); + if (data->args.hipModuleLoadDataEx.options == NULL) oss << ", options=NULL"; + else { oss << ", options="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.options__val); } + if (data->args.hipModuleLoadDataEx.optionsValues == NULL) oss << ", optionsValues=NULL"; + else { oss << ", optionsValues="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleLoadDataEx.optionsValues__val); } + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor: + oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessor("; + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.blockSize); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessor.dynSharedMemPerBlk); + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + oss << "hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags("; + if (data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynSharedMemPerBlk); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSize: + oss << "hipModuleOccupancyMaxPotentialBlockSize("; + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL"; + else { oss << "gridSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.gridSize__val); } + if (data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL"; + else { oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSize__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.f); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk); + oss << ", blockSizeLimit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSize.blockSizeLimit); + oss << ")"; + break; + case HIP_API_ID_hipModuleOccupancyMaxPotentialBlockSizeWithFlags: + oss << "hipModuleOccupancyMaxPotentialBlockSizeWithFlags("; + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize == NULL) oss << "gridSize=NULL"; + else { oss << "gridSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.gridSize__val); } + if (data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize == NULL) oss << ", blockSize=NULL"; + else { oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSize__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.f); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.dynSharedMemPerBlk); + oss << ", blockSizeLimit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.blockSizeLimit); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleOccupancyMaxPotentialBlockSizeWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipModuleUnload: + oss << "hipModuleUnload("; + oss << "module="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleUnload.module); + oss << ")"; + break; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessor: + oss << "hipOccupancyMaxActiveBlocksPerMultiprocessor("; + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.blockSize); + oss << ", dynamicSMemSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessor.dynamicSMemSize); + oss << ")"; + break; + case HIP_API_ID_hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags: + oss << "hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags("; + if (data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks == NULL) oss << "numBlocks=NULL"; + else { oss << "numBlocks="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.numBlocks__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.f); + oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.blockSize); + oss << ", dynamicSMemSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.dynamicSMemSize); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipOccupancyMaxPotentialBlockSize: + oss << "hipOccupancyMaxPotentialBlockSize("; + if (data->args.hipOccupancyMaxPotentialBlockSize.gridSize == NULL) oss << "gridSize=NULL"; + else { oss << "gridSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.gridSize__val); } + if (data->args.hipOccupancyMaxPotentialBlockSize.blockSize == NULL) oss << ", blockSize=NULL"; + else { oss << ", blockSize="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.blockSize__val); } + oss << ", f="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.f); + oss << ", dynSharedMemPerBlk="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.dynSharedMemPerBlk); + oss << ", blockSizeLimit="; roctracer::hip_support::detail::operator<<(oss, data->args.hipOccupancyMaxPotentialBlockSize.blockSizeLimit); + oss << ")"; + break; + case HIP_API_ID_hipPeekAtLastError: + oss << "hipPeekAtLastError("; + oss << ")"; + break; + case HIP_API_ID_hipPointerGetAttribute: + oss << "hipPointerGetAttribute("; + oss << "data="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttribute.data); + oss << ", attribute="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttribute.attribute); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttribute.ptr); + oss << ")"; + break; + case HIP_API_ID_hipPointerGetAttributes: + oss << "hipPointerGetAttributes("; + if (data->args.hipPointerGetAttributes.attributes == NULL) oss << "attributes=NULL"; + else { oss << "attributes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttributes.attributes__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerGetAttributes.ptr); + oss << ")"; + break; + case HIP_API_ID_hipPointerSetAttribute: + oss << "hipPointerSetAttribute("; + oss << "value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerSetAttribute.value); + oss << ", attribute="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerSetAttribute.attribute); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipPointerSetAttribute.ptr); + oss << ")"; + break; + case HIP_API_ID_hipProfilerStart: + oss << "hipProfilerStart("; + oss << ")"; + break; + case HIP_API_ID_hipProfilerStop: + oss << "hipProfilerStop("; + oss << ")"; + break; + case HIP_API_ID_hipRuntimeGetVersion: + oss << "hipRuntimeGetVersion("; + if (data->args.hipRuntimeGetVersion.runtimeVersion == NULL) oss << "runtimeVersion=NULL"; + else { oss << "runtimeVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipRuntimeGetVersion.runtimeVersion__val); } + oss << ")"; + break; + case HIP_API_ID_hipSetDevice: + oss << "hipSetDevice("; + oss << "deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetDevice.deviceId); + oss << ")"; + break; + case HIP_API_ID_hipSetDeviceFlags: + oss << "hipSetDeviceFlags("; + oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetDeviceFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipSetupArgument: + oss << "hipSetupArgument("; + oss << "arg="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetupArgument.arg); + oss << ", size="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetupArgument.size); + oss << ", offset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSetupArgument.offset); + oss << ")"; + break; + case HIP_API_ID_hipSignalExternalSemaphoresAsync: + oss << "hipSignalExternalSemaphoresAsync("; + if (data->args.hipSignalExternalSemaphoresAsync.extSemArray == NULL) oss << "extSemArray=NULL"; + else { oss << "extSemArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.extSemArray__val); } + if (data->args.hipSignalExternalSemaphoresAsync.paramsArray == NULL) oss << ", paramsArray=NULL"; + else { oss << ", paramsArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.paramsArray__val); } + oss << ", numExtSems="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.numExtSems); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipSignalExternalSemaphoresAsync.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamAddCallback: + oss << "hipStreamAddCallback("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.stream); + oss << ", callback="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.callback); + oss << ", userData="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.userData); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAddCallback.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamAttachMemAsync: + oss << "hipStreamAttachMemAsync("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.stream); + oss << ", dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.dev_ptr); + oss << ", length="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.length); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamAttachMemAsync.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamBeginCapture: + oss << "hipStreamBeginCapture("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCapture.stream); + oss << ", mode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBeginCapture.mode); + oss << ")"; + break; + case HIP_API_ID_hipStreamCreate: + oss << "hipStreamCreate("; + if (data->args.hipStreamCreate.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreate.stream__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamCreateWithFlags: + oss << "hipStreamCreateWithFlags("; + if (data->args.hipStreamCreateWithFlags.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithFlags.stream__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithFlags.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamCreateWithPriority: + oss << "hipStreamCreateWithPriority("; + if (data->args.hipStreamCreateWithPriority.stream == NULL) oss << "stream=NULL"; + else { oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithPriority.stream__val); } + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithPriority.flags); + oss << ", priority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamCreateWithPriority.priority); + oss << ")"; + break; + case HIP_API_ID_hipStreamDestroy: + oss << "hipStreamDestroy("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamDestroy.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamEndCapture: + oss << "hipStreamEndCapture("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamEndCapture.stream); + if (data->args.hipStreamEndCapture.pGraph == NULL) oss << ", pGraph=NULL"; + else { oss << ", pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamEndCapture.pGraph__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetCaptureInfo: + oss << "hipStreamGetCaptureInfo("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.stream); + if (data->args.hipStreamGetCaptureInfo.pCaptureStatus == NULL) oss << ", pCaptureStatus=NULL"; + else { oss << ", pCaptureStatus="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.pCaptureStatus__val); } + if (data->args.hipStreamGetCaptureInfo.pId == NULL) oss << ", pId=NULL"; + else { oss << ", pId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.pId__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetCaptureInfo_v2: + oss << "hipStreamGetCaptureInfo_v2("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.stream); + if (data->args.hipStreamGetCaptureInfo_v2.captureStatus_out == NULL) oss << ", captureStatus_out=NULL"; + else { oss << ", captureStatus_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.captureStatus_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.id_out == NULL) oss << ", id_out=NULL"; + else { oss << ", id_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.id_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.graph_out == NULL) oss << ", graph_out=NULL"; + else { oss << ", graph_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.graph_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.dependencies_out == NULL) oss << ", dependencies_out=NULL"; + else { oss << ", dependencies_out="; roctracer::hip_support::detail::operator<<(oss, (void*)data->args.hipStreamGetCaptureInfo_v2.dependencies_out__val); } + if (data->args.hipStreamGetCaptureInfo_v2.numDependencies_out == NULL) oss << ", numDependencies_out=NULL"; + else { oss << ", numDependencies_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo_v2.numDependencies_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetDevice: + oss << "hipStreamGetDevice("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetDevice.stream); + if (data->args.hipStreamGetDevice.device == NULL) oss << ", device=NULL"; + else { oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetDevice.device__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetFlags: + oss << "hipStreamGetFlags("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetFlags.stream); + if (data->args.hipStreamGetFlags.flags == NULL) oss << ", flags=NULL"; + else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetFlags.flags__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamGetPriority: + oss << "hipStreamGetPriority("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetPriority.stream); + if (data->args.hipStreamGetPriority.priority == NULL) oss << ", priority=NULL"; + else { oss << ", priority="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetPriority.priority__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamIsCapturing: + oss << "hipStreamIsCapturing("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamIsCapturing.stream); + if (data->args.hipStreamIsCapturing.pCaptureStatus == NULL) oss << ", pCaptureStatus=NULL"; + else { oss << ", pCaptureStatus="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamIsCapturing.pCaptureStatus__val); } + oss << ")"; + break; + case HIP_API_ID_hipStreamQuery: + oss << "hipStreamQuery("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamQuery.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamSynchronize: + oss << "hipStreamSynchronize("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSynchronize.stream); + oss << ")"; + break; + case HIP_API_ID_hipStreamUpdateCaptureDependencies: + oss << "hipStreamUpdateCaptureDependencies("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.stream); + if (data->args.hipStreamUpdateCaptureDependencies.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.dependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.numDependencies); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamUpdateCaptureDependencies.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamWaitEvent: + oss << "hipStreamWaitEvent("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitEvent.stream); + oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitEvent.event); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitEvent.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamWaitValue32: + oss << "hipStreamWaitValue32("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.flags); + oss << ", mask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue32.mask); + oss << ")"; + break; + case HIP_API_ID_hipStreamWaitValue64: + oss << "hipStreamWaitValue64("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.flags); + oss << ", mask="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWaitValue64.mask); + oss << ")"; + break; + case HIP_API_ID_hipStreamWriteValue32: + oss << "hipStreamWriteValue32("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue32.flags); + oss << ")"; + break; + case HIP_API_ID_hipStreamWriteValue64: + oss << "hipStreamWriteValue64("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.stream); + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.ptr); + oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.value); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.flags); + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetAddress: + oss << "hipTexRefGetAddress("; + if (data->args.hipTexRefGetAddress.dev_ptr == NULL) oss << "dev_ptr=NULL"; + else { oss << "dev_ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetAddress.dev_ptr__val); } + if (data->args.hipTexRefGetAddress.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetAddress.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetFlags: + oss << "hipTexRefGetFlags("; + if (data->args.hipTexRefGetFlags.pFlags == NULL) oss << "pFlags=NULL"; + else { oss << "pFlags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFlags.pFlags__val); } + if (data->args.hipTexRefGetFlags.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFlags.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetFormat: + oss << "hipTexRefGetFormat("; + if (data->args.hipTexRefGetFormat.pFormat == NULL) oss << "pFormat=NULL"; + else { oss << "pFormat="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFormat.pFormat__val); } + if (data->args.hipTexRefGetFormat.pNumChannels == NULL) oss << ", pNumChannels=NULL"; + else { oss << ", pNumChannels="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFormat.pNumChannels__val); } + if (data->args.hipTexRefGetFormat.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetFormat.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMaxAnisotropy: + oss << "hipTexRefGetMaxAnisotropy("; + if (data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio == NULL) oss << "pmaxAnsio=NULL"; + else { oss << "pmaxAnsio="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMaxAnisotropy.pmaxAnsio__val); } + if (data->args.hipTexRefGetMaxAnisotropy.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMaxAnisotropy.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMipMappedArray: + oss << "hipTexRefGetMipMappedArray("; + if (data->args.hipTexRefGetMipMappedArray.pArray == NULL) oss << "pArray=NULL"; + else { oss << "pArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipMappedArray.pArray__val); } + if (data->args.hipTexRefGetMipMappedArray.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipMappedArray.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMipmapLevelBias: + oss << "hipTexRefGetMipmapLevelBias("; + if (data->args.hipTexRefGetMipmapLevelBias.pbias == NULL) oss << "pbias=NULL"; + else { oss << "pbias="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelBias.pbias__val); } + if (data->args.hipTexRefGetMipmapLevelBias.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelBias.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefGetMipmapLevelClamp: + oss << "hipTexRefGetMipmapLevelClamp("; + if (data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp == NULL) oss << "pminMipmapLevelClamp=NULL"; + else { oss << "pminMipmapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelClamp.pminMipmapLevelClamp__val); } + if (data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp == NULL) oss << ", pmaxMipmapLevelClamp=NULL"; + else { oss << ", pmaxMipmapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelClamp.pmaxMipmapLevelClamp__val); } + if (data->args.hipTexRefGetMipmapLevelClamp.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefGetMipmapLevelClamp.texRef__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetAddress: + oss << "hipTexRefSetAddress("; + if (data->args.hipTexRefSetAddress.ByteOffset == NULL) oss << "ByteOffset=NULL"; + else { oss << "ByteOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.ByteOffset__val); } + if (data->args.hipTexRefSetAddress.texRef == NULL) oss << ", texRef=NULL"; + else { oss << ", texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.texRef__val); } + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.dptr); + oss << ", bytes="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress.bytes); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetAddress2D: + oss << "hipTexRefSetAddress2D("; + if (data->args.hipTexRefSetAddress2D.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.texRef__val); } + if (data->args.hipTexRefSetAddress2D.desc == NULL) oss << ", desc=NULL"; + else { oss << ", desc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.desc__val); } + oss << ", dptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.dptr); + oss << ", Pitch="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetAddress2D.Pitch); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetArray: + oss << "hipTexRefSetArray("; + if (data->args.hipTexRefSetArray.tex == NULL) oss << "tex=NULL"; + else { oss << "tex="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetArray.tex__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetArray.array); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetArray.flags); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetBorderColor: + oss << "hipTexRefSetBorderColor("; + if (data->args.hipTexRefSetBorderColor.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetBorderColor.texRef__val); } + if (data->args.hipTexRefSetBorderColor.pBorderColor == NULL) oss << ", pBorderColor=NULL"; + else { oss << ", pBorderColor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetBorderColor.pBorderColor__val); } + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetFlags: + oss << "hipTexRefSetFlags("; + if (data->args.hipTexRefSetFlags.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFlags.texRef__val); } + oss << ", Flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFlags.Flags); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetFormat: + oss << "hipTexRefSetFormat("; + if (data->args.hipTexRefSetFormat.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFormat.texRef__val); } + oss << ", fmt="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFormat.fmt); + oss << ", NumPackedComponents="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetFormat.NumPackedComponents); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMaxAnisotropy: + oss << "hipTexRefSetMaxAnisotropy("; + if (data->args.hipTexRefSetMaxAnisotropy.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMaxAnisotropy.texRef__val); } + oss << ", maxAniso="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMaxAnisotropy.maxAniso); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMipmapLevelBias: + oss << "hipTexRefSetMipmapLevelBias("; + if (data->args.hipTexRefSetMipmapLevelBias.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelBias.texRef__val); } + oss << ", bias="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelBias.bias); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMipmapLevelClamp: + oss << "hipTexRefSetMipmapLevelClamp("; + if (data->args.hipTexRefSetMipmapLevelClamp.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelClamp.texRef__val); } + oss << ", minMipMapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelClamp.minMipMapLevelClamp); + oss << ", maxMipMapLevelClamp="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmapLevelClamp.maxMipMapLevelClamp); + oss << ")"; + break; + case HIP_API_ID_hipTexRefSetMipmappedArray: + oss << "hipTexRefSetMipmappedArray("; + if (data->args.hipTexRefSetMipmappedArray.texRef == NULL) oss << "texRef=NULL"; + else { oss << "texRef="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmappedArray.texRef__val); } + if (data->args.hipTexRefSetMipmappedArray.mipmappedArray == NULL) oss << ", mipmappedArray=NULL"; + else { oss << ", mipmappedArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmappedArray.mipmappedArray__val); } + oss << ", Flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipTexRefSetMipmappedArray.Flags); + oss << ")"; + break; + case HIP_API_ID_hipThreadExchangeStreamCaptureMode: + oss << "hipThreadExchangeStreamCaptureMode("; + if (data->args.hipThreadExchangeStreamCaptureMode.mode == NULL) oss << "mode=NULL"; + else { oss << "mode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipThreadExchangeStreamCaptureMode.mode__val); } + oss << ")"; + break; + case HIP_API_ID_hipUserObjectCreate: + oss << "hipUserObjectCreate("; + if (data->args.hipUserObjectCreate.object_out == NULL) oss << "object_out=NULL"; + else { oss << "object_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.object_out__val); } + oss << ", ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.ptr); + oss << ", destroy="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.destroy); + oss << ", initialRefcount="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.initialRefcount); + oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectCreate.flags); + oss << ")"; + break; + case HIP_API_ID_hipUserObjectRelease: + oss << "hipUserObjectRelease("; + oss << "object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRelease.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRelease.count); + oss << ")"; + break; + case HIP_API_ID_hipUserObjectRetain: + oss << "hipUserObjectRetain("; + oss << "object="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRetain.object); + oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipUserObjectRetain.count); + oss << ")"; + break; + case HIP_API_ID_hipWaitExternalSemaphoresAsync: + oss << "hipWaitExternalSemaphoresAsync("; + if (data->args.hipWaitExternalSemaphoresAsync.extSemArray == NULL) oss << "extSemArray=NULL"; + else { oss << "extSemArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.extSemArray__val); } + if (data->args.hipWaitExternalSemaphoresAsync.paramsArray == NULL) oss << ", paramsArray=NULL"; + else { oss << ", paramsArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.paramsArray__val); } + oss << ", numExtSems="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.numExtSems); + oss << ", stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipWaitExternalSemaphoresAsync.stream); + oss << ")"; + break; + default: oss << "unknown"; + }; + return strdup(oss.str().c_str()); +} +#endif // HIP_PROF_HIP_API_STRING +#endif // _HIP_PROF_STR_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_runtime_prof.h b/projects/clr/hipamd/include/hip/amd_detail/hip_runtime_prof.h new file mode 100644 index 0000000000..d201ab517c --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_runtime_prof.h @@ -0,0 +1,77 @@ +/* +Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_PROF_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_PROF_H + +// HIP ROCclr Op IDs enumeration +enum HipVdiOpId { + kHipVdiOpIdDispatch = 0, + kHipVdiOpIdCopy = 1, + kHipVdiOpIdBarrier = 2, + kHipVdiOpIdNumber = 3 +}; + +// Types of ROCclr commands +enum HipVdiCommandKind { + kHipVdiCommandKernel = 0x11F0, + kHipVdiMemcpyDeviceToHost = 0x11F3, + kHipHipVdiMemcpyHostToDevice = 0x11F4, + kHipVdiMemcpyDeviceToDevice = 0x11F5, + kHipVidMemcpyDeviceToHostRect = 0x1201, + kHipVdiMemcpyHostToDeviceRect = 0x1202, + kHipVdiMemcpyDeviceToDeviceRect = 0x1203, + kHipVdiFillMemory = 0x1207, +}; + +/** + * @brief Initializes activity callback + * + * @param [input] id_callback Event ID callback function + * @param [input] op_callback Event operation callback function + * @param [input] arg Arguments passed into callback + * + * @returns None + */ +void hipInitActivityCallback(void* id_callback, void* op_callback, void* arg); + +/** + * @brief Enables activity callback + * + * @param [input] op Operation, which will trigger a callback (@see HipVdiOpId) + * @param [input] enable Enable state for the callback + * + * @returns True if successful + */ +bool hipEnableActivityCallback(uint32_t op, bool enable); + +/** + * @brief Returns the description string for the operation kind + * + * @param [input] id Command kind id (@see HipVdiCommandKind) + * + * @returns A pointer to a const string with the command description + */ +const char* hipGetCmdName(uint32_t id); + +#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_PROF_H + diff --git a/projects/clr/hipamd/include/hip/amd_detail/host_defines.h b/projects/clr/hipamd/include/hip/amd_detail/host_defines.h new file mode 100644 index 0000000000..8caed1d182 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/host_defines.h @@ -0,0 +1,184 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file amd_detail/host_defines.h + * @brief TODO-doc + */ + +#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HOST_DEFINES_H +#define HIP_INCLUDE_HIP_AMD_DETAIL_HOST_DEFINES_H + +// The follow macro should be removed after upstream updation. +// It's defined here for workarround of rocThrust building failure. +#define HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H + +// Add guard to Generic Grid Launch method +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif + +#if defined(__clang__) && defined(__HIP__) + +namespace __hip_internal { +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef signed long long int64_t; + +template struct integral_constant { + static constexpr const _Tp value = __v; + typedef _Tp value_type; + typedef integral_constant type; + constexpr operator value_type() const { return value; } + constexpr value_type operator()() const { return value; } +}; +template constexpr const _Tp integral_constant<_Tp, __v>::value; + +typedef integral_constant true_type; +typedef integral_constant false_type; + +template using bool_constant = integral_constant; +typedef bool_constant true_type; +typedef bool_constant false_type; + +template struct enable_if {}; +template struct enable_if { typedef __T type; }; + +template struct true_or_false_type : public false_type {}; +template<> struct true_or_false_type : public true_type {}; + +template struct is_integral : public false_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; +template <> struct is_integral : public true_type {}; + +template struct is_arithmetic : public false_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; +template <> struct is_arithmetic : public true_type {}; + +template struct is_floating_point : public false_type {}; +template<> struct is_floating_point : public true_type {}; +template<> struct is_floating_point : public true_type {}; +template<> struct is_floating_point : public true_type {}; + +template struct is_same : public false_type {}; +template struct is_same<__T, __T> : public true_type {}; + +template::value> + struct is_signed : public false_type {}; +template + struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {}; + +template struct char_traits; +template> class basic_istream; +template> class basic_ostream; +typedef basic_istream istream; +typedef basic_ostream ostream; + +template + struct is_standard_layout + : public integral_constant + { }; + +template + struct is_trivial + : public integral_constant + { }; +} +typedef __hip_internal::uint8_t __hip_uint8_t; +typedef __hip_internal::uint16_t __hip_uint16_t; +typedef __hip_internal::uint32_t __hip_uint32_t; +typedef __hip_internal::uint64_t __hip_uint64_t; +typedef __hip_internal::int8_t __hip_int8_t; +typedef __hip_internal::int16_t __hip_int16_t; +typedef __hip_internal::int32_t __hip_int32_t; +typedef __hip_internal::int64_t __hip_int64_t; + +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +#define __host__ __attribute__((host)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ + +#if !defined(__has_feature) || !__has_feature(cuda_noinline_keyword) +#define __noinline__ __attribute__((noinline)) +#endif + +#define __forceinline__ inline __attribute__((always_inline)) + +#if __HIP_NO_IMAGE_SUPPORT +#define __hip_img_chk__ __attribute__((unavailable("The image/texture API not supported on the device"))) +#else +#define __hip_img_chk__ +#endif + +#else + +// Non-HCC compiler +/** + * Function and kernel markers + */ +#define __host__ +#define __device__ + +#define __global__ + +#define __noinline__ +#define __forceinline__ inline + +#define __shared__ +#define __constant__ + +#define __hip_img_chk__ +#endif + +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/hsa_helpers.hpp b/projects/clr/hipamd/include/hip/amd_detail/hsa_helpers.hpp new file mode 100644 index 0000000000..0c17085022 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/hsa_helpers.hpp @@ -0,0 +1,102 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include + +#include +#include +#include + +namespace hip_impl { +inline void* address(hsa_executable_symbol_t x) { + void* r = nullptr; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r); + + return r; +} + +inline hsa_agent_t agent(hsa_executable_symbol_t x) { + hsa_agent_t r = {}; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_AGENT, &r); + + return r; +} + +inline std::uint32_t group_size(hsa_executable_symbol_t x) { + std::uint32_t r = 0u; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &r); + + return r; +} + +inline hsa_isa_t isa(hsa_agent_t x) { + hsa_isa_t r = {}; + hsa_agent_iterate_isas(x, + [](hsa_isa_t i, void* o) { + *static_cast(o) = i; // Pick the first. + + return HSA_STATUS_INFO_BREAK; + }, + &r); + + return r; +} + +inline std::uint64_t kernel_object(hsa_executable_symbol_t x) { + std::uint64_t r = 0u; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &r); + + return r; +} + +inline std::string name(hsa_executable_symbol_t x) { + std::uint32_t sz = 0u; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz); + + std::string r(sz, '\0'); + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front()); + + return r; +} + +inline std::uint32_t private_size(hsa_executable_symbol_t x) { + std::uint32_t r = 0u; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &r); + + return r; +} + +inline std::uint32_t size(hsa_executable_symbol_t x) { + std::uint32_t r = 0; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r); + + return r; +} + +inline hsa_symbol_kind_t type(hsa_executable_symbol_t x) { + hsa_symbol_kind_t r = {}; + hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &r); + + return r; +} +} // namespace hip_impl \ No newline at end of file diff --git a/projects/clr/hipamd/include/hip/amd_detail/macro_based_grid_launch.hpp b/projects/clr/hipamd/include/hip/amd_detail/macro_based_grid_launch.hpp new file mode 100644 index 0000000000..d631e4d5cf --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/macro_based_grid_launch.hpp @@ -0,0 +1,798 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "concepts.hpp" +#include "helpers.hpp" + +#include "hc.hpp" +#include "hip/hip_ext.h" +#include "hip_runtime.h" + +#include +#include +#include +#include +#include + +namespace hip_impl { +namespace { +struct New_grid_launch_tag {}; +struct Old_grid_launch_tag {}; + +template +class RAII_guard { + D dtor_; + + public: + RAII_guard() = default; + + RAII_guard(const C& ctor, D dtor) : dtor_{std::move(dtor)} { ctor(); } + + RAII_guard(const RAII_guard&) = default; + RAII_guard(RAII_guard&&) = default; + + RAII_guard& operator=(const RAII_guard&) = default; + RAII_guard& operator=(RAII_guard&&) = default; + + ~RAII_guard() { dtor_(); } +}; + +template +RAII_guard make_RAII_guard(const C& ctor, D dtor) { + return RAII_guard{ctor, std::move(dtor)}; +} + +template +using is_new_grid_launch_t = typename std::conditional{}, New_grid_launch_tag, + Old_grid_launch_tag>::type; +} // namespace + +// TODO: - dispatch rank should be derived from the domain dimensions passed +// in, and not always assumed to be 3; + +template +requires(Domain == + {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, dim3 num_blocks, + dim3 dim_blocks, int group_mem_bytes, + const hc::accelerator_view& acc_v, K k) { + const auto d = + hc::extent<3>{num_blocks.z * dim_blocks.z, num_blocks.y * dim_blocks.y, + num_blocks.x * dim_blocks.x} + .tile_with_dynamic(dim_blocks.z, dim_blocks.y, dim_blocks.x, group_mem_bytes); + + try { + hc::parallel_for_each(acc_v, d, k); + } catch (std::exception& ex) { + std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; + hip_throw(ex); + } +} + +// TODO: these are workarounds, they should be removed. + +hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&); +void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t); +void unlock_stream_hip_(hipStream_t, void*, const char*, hc::accelerator_view*); + +template +requires(Domain == {Ts...}) inline void grid_launch_hip_impl_(New_grid_launch_tag, + dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + const char* kernel_name, K k) { + void* lck_stream = nullptr; + auto acc_v = lock_stream_hip_(stream, lck_stream); + auto stream_guard = + make_RAII_guard(std::bind(print_prelaunch_trace_, kernel_name, num_blocks, dim_blocks, + group_mem_bytes, stream), + std::bind(unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v)); + + try { + grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), + group_mem_bytes, acc_v, std::move(k)); + } catch (std::exception& ex) { + std::cerr << "Failed in " << __func__ << ", with exception: " << ex.what() << std::endl; + hip_throw(ex); + } +} + +template +requires(Domain == + {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_(Old_grid_launch_tag, + dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, K k) { + grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), + group_mem_bytes, std::move(stream), std::move(k)); +} + +template +requires(Domain == {hipLaunchParm, Ts...}) inline void grid_launch_hip_impl_( + Old_grid_launch_tag, dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, hipStream_t stream, + const char* kernel_name, K k) { + grid_launch_hip_impl_(New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), + group_mem_bytes, std::move(stream), kernel_name, std::move(k)); +} + +template +requires(Domain == {Ts...}) inline std::enable_if_t< + !std::is_function::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, hipStream_t stream, + const char* kernel_name, K k) { + grid_launch_hip_impl_(is_new_grid_launch_t{}, std::move(num_blocks), + std::move(dim_blocks), group_mem_bytes, std::move(stream), kernel_name, + std::move(k)); +} + +template +requires(Domain == {Ts...}) inline std::enable_if_t< + !std::is_function::value> grid_launch_hip_(dim3 num_blocks, dim3 dim_blocks, + int group_mem_bytes, hipStream_t stream, K k) { + grid_launch_hip_impl_(is_new_grid_launch_t{}, std::move(num_blocks), + std::move(dim_blocks), group_mem_bytes, std::move(stream), std::move(k)); +} + +// TODO: these are temporary and purposefully noisy and disruptive. +#define make_kernel_name_hip(k, n) \ + HIP_kernel_functor_name_begin##_##k##_##HIP_kernel_functor_name_end##_##n + +#define make_kernel_functor_hip_30(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24, p25, p26, p27) \ + struct make_kernel_name_hip(function_name, 28) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + std::decay_t _p25_; \ + std::decay_t _p26_; \ + std::decay_t _p27_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_, _p25_, _p26_, _p27_); \ + } \ + } +#define make_kernel_functor_hip_29(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24, p25, p26) \ + struct make_kernel_name_hip(function_name, 27) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + std::decay_t _p25_; \ + std::decay_t _p26_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_, _p25_, _p26_); \ + } \ + } +#define make_kernel_functor_hip_28(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24, p25) \ + struct make_kernel_name_hip(function_name, 26) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + std::decay_t _p25_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_, _p25_); \ + } \ + } +#define make_kernel_functor_hip_27(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23, p24) \ + struct make_kernel_name_hip(function_name, 25) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + std::decay_t _p24_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_, _p24_); \ + } \ + } +#define make_kernel_functor_hip_26(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22, p23) \ + struct make_kernel_name_hip(function_name, 24) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + std::decay_t _p23_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_, _p23_); \ + } \ + } +#define make_kernel_functor_hip_25(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, \ + p22) \ + struct make_kernel_name_hip(function_name, 23) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + std::decay_t _p22_; \ + __attribute__((used, flatten)) void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_, \ + _p22_); \ + } \ + } +#define make_kernel_functor_hip_24(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21) \ + struct make_kernel_name_hip(function_name, 22) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + std::decay_t _p21_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_, _p21_); \ + } \ + } +#define make_kernel_functor_hip_23(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20) \ + struct make_kernel_name_hip(function_name, 21) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + std::decay_t _p20_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_, _p20_); \ + } \ + } +#define make_kernel_functor_hip_22(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19) \ + struct make_kernel_name_hip(function_name, 20) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + std::decay_t _p19_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_, _p19_); \ + } \ + } +#define make_kernel_functor_hip_21(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17, p18) \ + struct make_kernel_name_hip(function_name, 19) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + std::decay_t _p18_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, _p18_); \ + } \ + } +#define make_kernel_functor_hip_20(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16, p17) \ + struct make_kernel_name_hip(function_name, 18) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + std::decay_t _p17_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_, _p17_); \ + } \ + } +#define make_kernel_functor_hip_19(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15, p16) \ + struct make_kernel_name_hip(function_name, 17) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + std::decay_t _p16_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_, _p16_); \ + } \ + } +#define make_kernel_functor_hip_18(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14, p15) \ + struct make_kernel_name_hip(function_name, 16) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + std::decay_t _p15_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_, _p15_); \ + } \ + } +#define make_kernel_functor_hip_17(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13, p14) \ + struct make_kernel_name_hip(function_name, 15) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + std::decay_t _p14_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_, _p14_); \ + } \ + } +#define make_kernel_functor_hip_16(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12, p13) \ + struct make_kernel_name_hip(function_name, 14) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + std::decay_t _p13_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_, _p13_); \ + } \ + } +#define make_kernel_functor_hip_15(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11, p12) \ + struct make_kernel_name_hip(function_name, 13) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + std::decay_t _p12_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_, \ + _p12_); \ + } \ + } +#define make_kernel_functor_hip_14(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10, p11) \ + struct make_kernel_name_hip(function_name, 12) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + std::decay_t _p11_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_, _p11_); \ + } \ + } +#define make_kernel_functor_hip_13(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9, p10) \ + struct make_kernel_name_hip(function_name, 11) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + std::decay_t _p10_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, _p10_); \ + } \ + } +#define make_kernel_functor_hip_12(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, \ + p9) \ + struct make_kernel_name_hip(function_name, 10) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + std::decay_t _p9_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_); } \ + } +#define make_kernel_functor_hip_11(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8) \ + struct make_kernel_name_hip(function_name, 9) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + std::decay_t _p8_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_); } \ + } +#define make_kernel_functor_hip_10(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7) \ + struct make_kernel_name_hip(function_name, 8) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + std::decay_t _p7_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_); } \ + } +#define make_kernel_functor_hip_9(function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6) \ + struct make_kernel_name_hip(function_name, 7) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + std::decay_t _p6_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_); } \ + } +#define make_kernel_functor_hip_8(function_name, kernel_name, p0, p1, p2, p3, p4, p5) \ + struct make_kernel_name_hip(function_name, 6) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + std::decay_t _p5_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_); } \ + } +#define make_kernel_functor_hip_7(function_name, kernel_name, p0, p1, p2, p3, p4) \ + struct make_kernel_name_hip(function_name, 5) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + std::decay_t _p4_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_); } \ + } +#define make_kernel_functor_hip_6(function_name, kernel_name, p0, p1, p2, p3) \ + struct make_kernel_name_hip(function_name, 4) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + std::decay_t _p3_; \ + void operator()(const hc::tiled_index<3>&) const \ + [[hc]] { kernel_name(_p0_, _p1_, _p2_, _p3_); } \ + } +#define make_kernel_functor_hip_5(function_name, kernel_name, p0, p1, p2) \ + struct make_kernel_name_hip(function_name, 3) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + std::decay_t _p2_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_, _p2_); } \ + } +#define make_kernel_functor_hip_4(function_name, kernel_name, p0, p1) \ + struct make_kernel_name_hip(function_name, 2) { \ + std::decay_t _p0_; \ + std::decay_t _p1_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_, _p1_); } \ + } +#define fofo(f, n) kernel_prefix_hip##f##kernel_suffix_hip##n +#define make_kernel_functor_hip_3(function_name, kernel_name, p0) \ + struct make_kernel_name_hip(function_name, 1) { \ + std::decay_t _p0_; \ + void operator()(const hc::tiled_index<3>&) const [[hc]] { kernel_name(_p0_); } \ + } +#define make_kernel_functor_hip_2(function_name, kernel_name) \ + struct make_kernel_name_hip(function_name, 0) { \ + void operator()(const hc::tiled_index<3>&)[[hc]] { return kernel_name(hipLaunchParm{}); } \ + } +#define make_kernel_functor_hip_1(...) +#define make_kernel_functor_hip_0(...) +#define make_kernel_functor_hip_(...) overload_macro_hip_(make_kernel_functor_hip_, __VA_ARGS__) + + +#define hipLaunchNamedKernelGGL(function_name, kernel_name, num_blocks, dim_blocks, \ + group_mem_bytes, stream, ...) \ + do { \ + make_kernel_functor_hip_(function_name, kernel_name, __VA_ARGS__) \ + hip_kernel_functor_impl_{__VA_ARGS__}; \ + hip_impl::grid_launch_hip_(num_blocks, dim_blocks, group_mem_bytes, stream, #kernel_name, \ + hip_kernel_functor_impl_); \ + } while (0) + +#define hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \ + do { \ + hipLaunchNamedKernelGGL(unnamed, kernel_name, num_blocks, dim_blocks, group_mem_bytes, \ + stream, ##__VA_ARGS__); \ + } while (0) + +#define hipLaunchKernel(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...) \ + do { \ + hipLaunchKernelGGL(kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, \ + hipLaunchParm{}, ##__VA_ARGS__); \ + } while (0) +} // namespace hip_impl diff --git a/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h b/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h new file mode 100644 index 0000000000..9e999268ea --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h @@ -0,0 +1,694 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "host_defines.h" +#if defined(__cplusplus) + extern "C" { +#endif + +// DOT FUNCTIONS +#if __HIP_CLANG_ONLY__ +__device__ +__attribute__((const)) +int __ockl_sdot2( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot2( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + unsigned int, bool); + +__device__ +__attribute__((const)) +int __ockl_sdot4( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot4( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + unsigned int, bool); + +__device__ +__attribute__((const)) +int __ockl_sdot8(int, int, int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool); +#endif + +#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +// BEGIN FLOAT +__device__ +__attribute__((const)) +float __ocml_acos_f32(float); +__device__ +__attribute__((pure)) +float __ocml_acosh_f32(float); +__device__ +__attribute__((const)) +float __ocml_asin_f32(float); +__device__ +__attribute__((pure)) +float __ocml_asinh_f32(float); +__device__ +__attribute__((const)) +float __ocml_atan2_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_atan_f32(float); +__device__ +__attribute__((pure)) +float __ocml_atanh_f32(float); +__device__ +__attribute__((pure)) +float __ocml_cbrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_ceil_f32(float); +__device__ +__attribute__((const)) +__device__ +float __ocml_copysign_f32(float, float); +__device__ +float __ocml_cos_f32(float); +__device__ +float __ocml_native_cos_f32(float); +__device__ +__attribute__((pure)) +__device__ +float __ocml_cosh_f32(float); +__device__ +float __ocml_cospi_f32(float); +__device__ +float __ocml_i0_f32(float); +__device__ +float __ocml_i1_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfc_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfcinv_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfcx_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erf_f32(float); +__device__ +__attribute__((pure)) +float __ocml_erfinv_f32(float); +__device__ +__attribute__((pure)) +float __ocml_exp10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_exp10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_exp2_f32(float); +__device__ +__attribute__((pure)) +float __ocml_exp_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_exp_f32(float); +__device__ +__attribute__((pure)) +float __ocml_expm1_f32(float); +__device__ +__attribute__((const)) +float __ocml_fabs_f32(float); +__device__ +__attribute__((const)) +float __ocml_fdim_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_floor_f32(float); +__device__ +__attribute__((const)) +float __ocml_fma_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fmax_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_fmin_f32(float, float); +__device__ +__attribute__((const)) +__device__ +float __ocml_fmod_f32(float, float); +__device__ +float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +float __ocml_hypot_f32(float, float); +__device__ +__attribute__((const)) +int __ocml_ilogb_f32(float); +__device__ +__attribute__((const)) +int __ocml_isfinite_f32(float); +__device__ +__attribute__((const)) +int __ocml_isinf_f32(float); +__device__ +__attribute__((const)) +int __ocml_isnan_f32(float); +__device__ +float __ocml_j0_f32(float); +__device__ +float __ocml_j1_f32(float); +__device__ +__attribute__((const)) +float __ocml_ldexp_f32(float, int); +__device__ +float __ocml_lgamma_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_log10_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log1p_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log2_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_log2_f32(float); +__device__ +__attribute__((const)) +float __ocml_logb_f32(float); +__device__ +__attribute__((pure)) +float __ocml_log_f32(float); +__device__ +__attribute__((pure)) +float __ocml_native_log_f32(float); +__device__ +float __ocml_modf_f32(float, __attribute__((address_space(5))) float*); +__device__ +__attribute__((const)) +float __ocml_nearbyint_f32(float); +__device__ +__attribute__((const)) +float __ocml_nextafter_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_len3_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_len4_f32(float, float, float, float); +__device__ +__attribute__((pure)) +float __ocml_ncdf_f32(float); +__device__ +__attribute__((pure)) +float __ocml_ncdfinv_f32(float); +__device__ +__attribute__((pure)) +float __ocml_pow_f32(float, float); +__device__ +__attribute__((pure)) +float __ocml_pown_f32(float, int); +__device__ +__attribute__((pure)) +float __ocml_rcbrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_remainder_f32(float, float); +__device__ +float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +float __ocml_rhypot_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_rint_f32(float); +__device__ +__attribute__((const)) +float __ocml_rlen3_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_rlen4_f32(float, float, float, float); +__device__ +__attribute__((const)) +float __ocml_round_f32(float); +__device__ +__attribute__((pure)) +float __ocml_rsqrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_scalb_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_scalbn_f32(float, int); +__device__ +__attribute__((const)) +int __ocml_signbit_f32(float); +__device__ +float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*); +__device__ +float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*); +__device__ +float __ocml_sin_f32(float); +__device__ +float __ocml_native_sin_f32(float); +__device__ +__attribute__((pure)) +float __ocml_sinh_f32(float); +__device__ +float __ocml_sinpi_f32(float); +__device__ +__attribute__((const)) +float __ocml_sqrt_f32(float); +__device__ +__attribute__((const)) +float __ocml_native_sqrt_f32(float); +__device__ +float __ocml_tan_f32(float); +__device__ +__attribute__((pure)) +float __ocml_tanh_f32(float); +__device__ +float __ocml_tgamma_f32(float); +__device__ +__attribute__((const)) +float __ocml_trunc_f32(float); +__device__ +float __ocml_y0_f32(float); +__device__ +float __ocml_y1_f32(float); + +// BEGIN INTRINSICS +__device__ +__attribute__((const)) +float __ocml_add_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_add_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_add_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_add_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sub_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_mul_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rte_f32(float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtn_f32(float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtp_f32(float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtz_f32(float); +__device__ +__attribute__((const)) +float __ocml_fma_rte_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fma_rtn_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fma_rtp_f32(float, float, float); +__device__ +__attribute__((const)) +float __ocml_fma_rtz_f32(float, float, float); +// END INTRINSICS +// END FLOAT + +// BEGIN DOUBLE +__device__ +__attribute__((const)) +double __ocml_acos_f64(double); +__device__ +__attribute__((pure)) +double __ocml_acosh_f64(double); +__device__ +__attribute__((const)) +double __ocml_asin_f64(double); +__device__ +__attribute__((pure)) +double __ocml_asinh_f64(double); +__device__ +__attribute__((const)) +double __ocml_atan2_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_atan_f64(double); +__device__ +__attribute__((pure)) +double __ocml_atanh_f64(double); +__device__ +__attribute__((pure)) +double __ocml_cbrt_f64(double); +__device__ +__attribute__((const)) +double __ocml_ceil_f64(double); +__device__ +__attribute__((const)) +double __ocml_copysign_f64(double, double); +__device__ +double __ocml_cos_f64(double); +__device__ +__attribute__((pure)) +double __ocml_cosh_f64(double); +__device__ +double __ocml_cospi_f64(double); +__device__ +double __ocml_i0_f64(double); +__device__ +double __ocml_i1_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfc_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfcinv_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfcx_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erf_f64(double); +__device__ +__attribute__((pure)) +double __ocml_erfinv_f64(double); +__device__ +__attribute__((pure)) +double __ocml_exp10_f64(double); +__device__ +__attribute__((pure)) +double __ocml_exp2_f64(double); +__device__ +__attribute__((pure)) +double __ocml_exp_f64(double); +__device__ +__attribute__((pure)) +double __ocml_expm1_f64(double); +__device__ +__attribute__((const)) +double __ocml_fabs_f64(double); +__device__ +__attribute__((const)) +double __ocml_fdim_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_floor_f64(double); +__device__ +__attribute__((const)) +double __ocml_fma_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fmax_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_fmin_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_fmod_f64(double, double); +__device__ +double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +double __ocml_hypot_f64(double, double); +__device__ +__attribute__((const)) +int __ocml_ilogb_f64(double); +__device__ +__attribute__((const)) +int __ocml_isfinite_f64(double); +__device__ +__attribute__((const)) +int __ocml_isinf_f64(double); +__device__ +__attribute__((const)) +int __ocml_isnan_f64(double); +__device__ +double __ocml_j0_f64(double); +__device__ +double __ocml_j1_f64(double); +__device__ +__attribute__((const)) +double __ocml_ldexp_f64(double, int); +__device__ +double __ocml_lgamma_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log10_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log1p_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log2_f64(double); +__device__ +__attribute__((const)) +double __ocml_logb_f64(double); +__device__ +__attribute__((pure)) +double __ocml_log_f64(double); +__device__ +double __ocml_modf_f64(double, __attribute__((address_space(5))) double*); +__device__ +__attribute__((const)) +double __ocml_nearbyint_f64(double); +__device__ +__attribute__((const)) +double __ocml_nextafter_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_len3_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_len4_f64(double, double, double, double); +__device__ +__attribute__((pure)) +double __ocml_ncdf_f64(double); +__device__ +__attribute__((pure)) +double __ocml_ncdfinv_f64(double); +__device__ +__attribute__((pure)) +double __ocml_pow_f64(double, double); +__device__ +__attribute__((pure)) +double __ocml_pown_f64(double, int); +__device__ +__attribute__((pure)) +double __ocml_rcbrt_f64(double); +__device__ +__attribute__((const)) +double __ocml_remainder_f64(double, double); +__device__ +double __ocml_remquo_f64( + double, double, __attribute__((address_space(5))) int*); +__device__ +__attribute__((const)) +double __ocml_rhypot_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_rint_f64(double); +__device__ +__attribute__((const)) +double __ocml_rlen3_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_rlen4_f64(double, double, double, double); +__device__ +__attribute__((const)) +double __ocml_round_f64(double); +__device__ +__attribute__((pure)) +double __ocml_rsqrt_f64(double); +__device__ +__attribute__((const)) +double __ocml_scalb_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_scalbn_f64(double, int); +__device__ +__attribute__((const)) +int __ocml_signbit_f64(double); +__device__ +double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*); +__device__ +double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*); +__device__ +double __ocml_sin_f64(double); +__device__ +__attribute__((pure)) +double __ocml_sinh_f64(double); +__device__ +double __ocml_sinpi_f64(double); +__device__ +__attribute__((const)) +double __ocml_sqrt_f64(double); +__device__ +double __ocml_tan_f64(double); +__device__ +__attribute__((pure)) +double __ocml_tanh_f64(double); +__device__ +double __ocml_tgamma_f64(double); +__device__ +__attribute__((const)) +double __ocml_trunc_f64(double); +__device__ +double __ocml_y0_f64(double); +__device__ +double __ocml_y1_f64(double); + +// BEGIN INTRINSICS +__device__ +__attribute__((const)) +double __ocml_add_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_add_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_add_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_add_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sub_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_mul_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rte_f64(double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtn_f64(double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtp_f64(double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtz_f64(double); +__device__ +__attribute__((const)) +double __ocml_fma_rte_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fma_rtn_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fma_rtp_f64(double, double, double); +__device__ +__attribute__((const)) +double __ocml_fma_rtz_f64(double, double, double); +// END INTRINSICS +// END DOUBLE + +#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ + +#if defined(__cplusplus) + } // extern "C" +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h b/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h new file mode 100644 index 0000000000..a3fa616cc5 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h @@ -0,0 +1,175 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +extern "C" { + +#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4))) + +__device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT*i, int c); + +__device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l); + +__device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l); + +__device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, float4::Native_vec_ p); + +__device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l, float4::Native_vec_ p); + +__device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l, float4::Native_vec_ p); + +__device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c); + +__device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l); + +__device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c); + +__device__ int __ockl_image_channel_data_type_1D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_3D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_CM(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_data_type_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_3D(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_CM(unsigned int ADDRESS_SPACE_CONSTANT* i); + +__device__ int __ockl_image_channel_order_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); + +}; \ No newline at end of file diff --git a/projects/clr/hipamd/include/hip/amd_detail/program_state.hpp b/projects/clr/hipamd/include/hip/amd_detail/program_state.hpp new file mode 100644 index 0000000000..e06504f711 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/program_state.hpp @@ -0,0 +1,107 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include + +struct ihipModuleSymbol_t; +using hipFunction_t = ihipModuleSymbol_t*; + +namespace hip_impl { + +// This section contains internal APIs that +// needs to be exported +#ifdef __GNUC__ +#pragma GCC visibility push (default) +#endif + +struct kernarg_impl; +class kernarg { +public: + kernarg(); + kernarg(kernarg&&); + ~kernarg(); + std::uint8_t* data(); + std::size_t size(); + void reserve(std::size_t); + void resize(std::size_t); +private: + kernarg_impl* impl; +}; + +class kernargs_size_align; +class program_state_impl; +class program_state { +public: + program_state(); + ~program_state(); + program_state(const program_state&) = delete; + + hipFunction_t kernel_descriptor(std::uintptr_t, + hsa_agent_t); + + kernargs_size_align get_kernargs_size_align(std::uintptr_t); + hsa_executable_t load_executable(const char*, const size_t, + hsa_executable_t, + hsa_agent_t); + hsa_executable_t load_executable_no_copy(const char*, const size_t, + hsa_executable_t, + hsa_agent_t); + + void* global_addr_by_name(const char* name); + +private: + friend class agent_globals_impl; + program_state_impl* impl; +}; + +class kernargs_size_align { +public: + std::size_t size(std::size_t n) const; + std::size_t alignment(std::size_t n) const; + const void* getHandle() const {return handle;}; +private: + const void* handle; + friend kernargs_size_align program_state::get_kernargs_size_align(std::uintptr_t); +}; + +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + +inline +__attribute__((visibility("hidden"))) +program_state& get_program_state() { + static program_state ps; + return ps; +} +} // Namespace hip_impl. diff --git a/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h b/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h new file mode 100644 index 0000000000..dcf5f2fdf8 --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h @@ -0,0 +1,388 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#if !defined(__HIPCC_RTC__) +#include +#endif // !defined(__HIPCC_RTC__) + +#define TEXTURE_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +struct __hip_is_tex_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_tex_channel_type> +{ + static constexpr bool value = + __hip_is_tex_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template +struct __hip_is_tex_normalized_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_tex_normalized_channel_type> +{ + static constexpr bool value = + __hip_is_tex_normalized_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template < + typename T, + hipTextureReadMode readMode, + typename Enable = void> +struct __hip_tex_ret +{ + static_assert(std::is_same::value, "Invalid channel type!"); +}; + +template < + typename T, + hipTextureReadMode readMode> +using __hip_tex_ret_t = typename __hip_tex_ret::type; + +template +struct __hip_tex_ret< + T, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type::value, bool>::type> +{ + using type = T; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex_ret< + HIP_vector_type, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + +template +struct __hip_tex_ret< + T, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type::value, bool>::type> +{ + using type = float; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex_ret< + HIP_vector_type, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type<__hip_tex_ret_t, rank>; +}; + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1Dfetch(texture t, int x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_load_1Db(i, x); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1D(texture t, float x) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1D(i, s, x); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2D(texture t, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayered(texture t, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayered(texture t, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3D(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemap(texture t, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLod(texture t, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLod(texture t, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayeredLod(texture t, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayeredLod(texture t, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3DLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLod(texture t, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLayered(texture t, float x, float y, float z, int layer) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLayeredLod(texture t, float x, float y, float z, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return mapFrom<__hip_tex_ret_t>(tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t texCubemapLayeredGrad(texture t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return mapFrom<__hip_tex_ret_t>(tmp); + return {}; +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DGrad(texture t, float x, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DGrad(texture t, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex1DLayeredGrad(texture t, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex2DLayeredGrad(texture t, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t tex3DGrad(texture t, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_PARAMETERS_INIT; + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return mapFrom<__hip_tex_ret_t>(tmp); +} + +template < + typename T, + hipTextureReadMode readMode, + typename Enable = void> +struct __hip_tex2dgather_ret +{ + static_assert(std::is_same::value, "Invalid channel type!"); +}; + +template < + typename T, + hipTextureReadMode readMode> +using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret::type; + +template +struct __hip_tex2dgather_ret< + T, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type::value, bool>::type> +{ + using type = HIP_vector_type; +}; + +template< + typename T, + unsigned int rank> +struct __hip_tex2dgather_ret< + HIP_vector_type, + hipReadModeElementType, + typename std::enable_if<__hip_is_tex_channel_type>::value, bool>::type> +{ + using type = HIP_vector_type; +}; + +template +struct __hip_tex2dgather_ret< + T, + hipReadModeNormalizedFloat, + typename std::enable_if<__hip_is_tex_normalized_channel_type::value, bool>::type> +{ + using type = float4; +}; + +template +static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t tex2Dgather(texture t, float x, float y, int comp=0) +{ + TEXTURE_PARAMETERS_INIT; + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return mapFrom<__hip_tex2dgather_ret_t>(tmp); + } + case 2: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return mapFrom<__hip_tex2dgather_ret_t>(tmp); + } + case 3: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return mapFrom<__hip_tex2dgather_ret_t>(tmp); + } + default: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return mapFrom<__hip_tex2dgather_ret_t>(tmp); + } + } + return {}; +} + +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/texture_indirect_functions.h b/projects/clr/hipamd/include/hip/amd_detail/texture_indirect_functions.h new file mode 100644 index 0000000000..0dd04d74dd --- /dev/null +++ b/projects/clr/hipamd/include/hip/amd_detail/texture_indirect_functions.h @@ -0,0 +1,503 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#if defined(__cplusplus) + +#include +#include +#include + +#if !defined(__HIPCC_RTC__) +#include +#endif // !defined(__HIPCC_RTC__) + +#define TEXTURE_OBJECT_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ + unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; + +template +struct __hip_is_itex_channel_type +{ + static constexpr bool value = + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +}; + +template< + typename T, + unsigned int rank> +struct __hip_is_itex_channel_type> +{ + static constexpr bool value = + __hip_is_itex_channel_type::value && + ((rank == 1) || + (rank == 2) || + (rank == 4)); +}; + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1Dfetch(hipTextureObject_t textureObject, int x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_load_1Db(i, x); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x) +{ + *ptr = tex1Dfetch(textureObject, x); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1D(hipTextureObject_t textureObject, float x) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1D(i, s, x); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x) +{ + *ptr = tex1D(textureObject, x); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2D(hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y) +{ + *ptr = tex2D(textureObject, x, y); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = tex3D(textureObject, x, y, z); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer) +{ + *ptr = tex1DLayered(textureObject, x, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer) +{ + *ptr = tex1DLayered(textureObject, x, y, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z) +{ + *ptr = texCubemap(textureObject, x, y, z); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer) +{ + *ptr = texCubemapLayered(textureObject, x, y, z, layer); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + switch (comp) { + case 1: { + auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data); + return mapFrom(tmp); + break; + } + case 2: { + auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data); + return mapFrom(tmp); + break; + } + case 3: { + auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data); + return mapFrom(tmp); + break; + } + default: { + auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data); + return mapFrom(tmp); + break; + } + }; + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0) +{ + *ptr = texCubemapLayered(textureObject, x, y, comp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_1D(i, s, x, level); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level) +{ + *ptr = tex1DLod(textureObject, x, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level) +{ + *ptr = tex2DLod(textureObject, x, y, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = tex3DLod(textureObject, x, y, z, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level) +{ + *ptr = tex1DLayeredLod(textureObject, x, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + *ptr = tex2DLayeredLod(textureObject, x, y, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + *ptr = texCubemapLod(textureObject, x, y, z, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return mapFrom(tmp); + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level) +{ + *ptr = texCubemapLayeredLod(textureObject, x, y, z, layer, level); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy) +{ + *ptr = tex1DGrad(textureObject, x, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DGrad(textureObject, x, y, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy) +{ + *ptr = tex3DGrad(textureObject, x, y, z, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy) +{ + *ptr = tex1DLayeredGrad(textureObject, x, layer, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data); + return mapFrom(tmp); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy) +{ + *ptr = tex2DLayeredGrad(textureObject, x, y, layer, dPdx, dPdy); +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + TEXTURE_OBJECT_PARAMETERS_INIT + // TODO missing in device libs. + // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data); + // return mapFrom(tmp); + return {}; +} + +template < + typename T, + typename std::enable_if<__hip_is_itex_channel_type::value>::type* = nullptr> +static __device__ __hip_img_chk__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy) +{ + *ptr = texCubemapLayeredGrad(textureObject, x, y, z, layer, dPdx, dPdy); +} + +#endif diff --git a/projects/clr/hipamd/include/hip/hcc_detail b/projects/clr/hipamd/include/hip/hcc_detail new file mode 120000 index 0000000000..4931d48978 --- /dev/null +++ b/projects/clr/hipamd/include/hip/hcc_detail @@ -0,0 +1 @@ +amd_detail \ No newline at end of file diff --git a/projects/clr/hipamd/include/hip/nvcc_detail b/projects/clr/hipamd/include/hip/nvcc_detail new file mode 120000 index 0000000000..e02ee85e4c --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvcc_detail @@ -0,0 +1 @@ +nvidia_detail \ No newline at end of file diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h new file mode 100644 index 0000000000..b5873be174 --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h @@ -0,0 +1,28 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_CHANNEL_DESCRIPTOR_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_CHANNEL_DESCRIPTOR_H + +#include "channel_descriptor.h" + +#endif diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_atomics.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_atomics.h new file mode 100644 index 0000000000..f9a92d582a --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_atomics.h @@ -0,0 +1,75 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_ATOMICS_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_ATOMICS_H + + +__device__ inline float atomicMax(float* addr, float val) { + unsigned int *uaddr = (unsigned int *)addr; + float value = __uint_as_float(*uaddr); + + while (value < val) { + value = __uint_as_float(atomicCAS(uaddr, __float_as_uint(value), + __float_as_uint(val))); + } + return value; +} + +__device__ inline double atomicMax(double* addr, double val) { + unsigned long long* uaddr = (unsigned long long *)addr; + double value = __longlong_as_double(*uaddr); + + while (value < val) { + value = __longlong_as_double(atomicCAS(uaddr, + __double_as_longlong(value), + __double_as_longlong(val))); + } + + return value; +} + +__device__ inline float atomicMin(float* addr, float val) { + unsigned int *uaddr = (unsigned int *)addr; + float value = __uint_as_float(*uaddr); + + while (value > val) { + value = __uint_as_float(atomicCAS(uaddr, __float_as_uint(value), + __float_as_uint(val))); + } + return value; +} + +__device__ inline double atomicMin(double* addr, double val) { + unsigned long long* uaddr = (unsigned long long *)addr; + double value = __longlong_as_double(*uaddr); + + while (value > val) { + value = __longlong_as_double(atomicCAS(uaddr, + __double_as_longlong(value), + __double_as_longlong(val))); + } + + return value; +} + +#endif diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h new file mode 100644 index 0000000000..2e14e893a3 --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h @@ -0,0 +1,119 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COMPLEX_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COMPLEX_H + +#include "cuComplex.h" + +typedef cuFloatComplex hipFloatComplex; + +__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return cuCrealf(z); } + +__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return cuCimagf(z); } + +__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { + return make_cuFloatComplex(a, b); +} + +__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { return cuConjf(z); } + +__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { + return cuCabsf(z) * cuCabsf(z); +} + +__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { + return cuCaddf(p, q); +} + +__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { + return cuCsubf(p, q); +} + +__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { + return cuCmulf(p, q); +} + +__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { + return cuCdivf(p, q); +} + +__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return cuCabsf(z); } + +typedef cuDoubleComplex hipDoubleComplex; + +__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return cuCreal(z); } + +__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return cuCimag(z); } + +__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { + return make_cuDoubleComplex(a, b); +} + +__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { return cuConj(z); } + +__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { + return cuCabs(z) * cuCabs(z); +} + +__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { + return cuCadd(p, q); +} + +__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { + return cuCsub(p, q); +} + +__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { + return cuCmul(p, q); +} + +__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { + return cuCdiv(p, q); +} + +__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return cuCabs(z); } + +typedef cuFloatComplex hipComplex; + +__device__ __host__ static inline hipComplex make_Complex(float x, float y) { + return make_cuComplex(x, y); +} + +__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { + return cuComplexDoubleToFloat(z); +} + +__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { + return cuComplexFloatToDouble(z); +} + +__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { + return cuCfmaf(p, q, r); +} + +__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, + hipDoubleComplex r) { + return cuCfma(p, q, r); +} + +#endif diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h new file mode 100644 index 0000000000..fc98ae2281 --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h @@ -0,0 +1,12 @@ +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COOPERATIVE_GROUPS_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COOPERATIVE_GROUPS_H + +// Include CUDA headers +#include +#include + +// Include HIP wrapper headers around CUDA +#include +#include + +#endif // HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COOPERATIVE_GROUPS_H diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_math_constants.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_math_constants.h new file mode 100644 index 0000000000..7650bb0dec --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_math_constants.h @@ -0,0 +1,62 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef NVIDIA_HIP_MATH_CONSTANTS_H +#define NVIDIA_HIP_MATH_CONSTANTS_H +#include +#define HIP_INF_F CUDART_INF_F +#define HIP_NAN_F CUDART_NAN_F +#define HIP_MIN_DENORM_F CUDART_MIN_DENORM_F +#define HIP_MAX_NORMAL_F CUDART_MAX_NORMAL_F +#define HIP_NEG_ZERO_F CUDART_NEG_ZERO_F +#define HIP_ZERO_F CUDART_ZERO_F +#define HIP_ONE_F CUDART_ONE_F +#define HIP_SQRT_HALF_F CUDART_SQRT_HALF_F +#define HIP_SQRT_HALF_HI_F CUDART_SQRT_HALF_HI_F +#define HIP_SQRT_HALF_LO_F CUDART_SQRT_HALF_LO_F +#define HIP_SQRT_TWO_F CUDART_SQRT_TWO_F +#define HIP_THIRD_F CUDART_THIRD_F +#define HIP_PIO4_F CUDART_PIO4_F +#define HIP_PIO2_F CUDART_PIO2_F +#define HIP_3PIO4_F CUDART_3PIO4_F +#define HIP_2_OVER_PI_F CUDART_2_OVER_PI_F +#define HIP_SQRT_2_OVER_PI_F CUDART_SQRT_2_OVER_PI_F +#define HIP_PI_F CUDART_PI_F +#define HIP_L2E_F CUDART_L2E_F +#define HIP_L2T_F CUDART_L2T_F +#define HIP_LG2_F CUDART_LG2_F +#define HIP_LGE_F CUDART_LGE_F +#define HIP_LN2_F CUDART_LN2_F +#define HIP_LNT_F CUDART_LNT_F +#define HIP_LNPI_F CUDART_LNPI_F +#define HIP_TWO_TO_M126_F CUDART_TWO_TO_M126_F +#define HIP_TWO_TO_126_F CUDART_TWO_TO_126_F +#define HIP_NORM_HUGE_F CUDART_NORM_HUGE_F +#define HIP_TWO_TO_23_F CUDART_TWO_TO_23_F +#define HIP_TWO_TO_24_F CUDART_TWO_TO_24_F +#define HIP_TWO_TO_31_F CUDART_TWO_TO_31_F +#define HIP_TWO_TO_32_F CUDART_TWO_TO_32_F +#define HIP_REMQUO_BITS_F CUDART_REMQUO_BITS_F +#define HIP_REMQUO_MASK_F CUDART_REMQUO_MASK_F +#define HIP_TRIG_PLOSS_F CUDART_TRIG_PLOSS_F +#endif + + diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h new file mode 100644 index 0000000000..c63e35700b --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h @@ -0,0 +1,124 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_H + +#include + +#include + +#define HIP_KERNEL_NAME(...) __VA_ARGS__ + +typedef int hipLaunchParm; + +#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \ + do { \ + kernelName<<>>(__VA_ARGS__); \ + } while (0) + +#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__) + +#define hipReadModeElementType cudaReadModeElementType + +#ifdef __CUDA_ARCH__ + + +// 32-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (__CUDA_ARCH__ >= 110) +#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110) +#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120) +#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120) +#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200) + +// 64-bit Atomics: +#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (__CUDA_ARCH__ >= 120) + +// Doubles +#define __HIP_ARCH_HAS_DOUBLES__ (__CUDA_ARCH__ >= 120) + +// warp cross-lane operations: +#define __HIP_ARCH_HAS_WARP_VOTE__ (__CUDA_ARCH__ >= 120) +#define __HIP_ARCH_HAS_WARP_BALLOT__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (__CUDA_ARCH__ >= 300) +#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (__CUDA_ARCH__ >= 350) + +// sync +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (__CUDA_ARCH__ >= 200) + +// misc +#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_3DGRID__ (__CUDA_ARCH__ >= 200) +#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (__CUDA_ARCH__ >= 350) + +#endif + +#ifdef __CUDACC__ + +#include "nvidia_hip_atomics.h" +#include "nvidia_hip_unsafe_atomics.h" + +#define hipThreadIdx_x threadIdx.x +#define hipThreadIdx_y threadIdx.y +#define hipThreadIdx_z threadIdx.z + +#define hipBlockIdx_x blockIdx.x +#define hipBlockIdx_y blockIdx.y +#define hipBlockIdx_z blockIdx.z + +#define hipBlockDim_x blockDim.x +#define hipBlockDim_y blockDim.y +#define hipBlockDim_z blockDim.z + +#define hipGridDim_x gridDim.x +#define hipGridDim_y gridDim.y +#define hipGridDim_z gridDim.z + +#define HIP_SYMBOL(X) &X + +/** + * Map HIP_DYNAMIC_SHARED to "extern __shared__" for compatibility with old HIP applications + * To be removed in a future release. + */ +#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; +#define HIP_DYNAMIC_SHARED_ATTRIBUTE + +#ifdef __HIP_DEVICE_COMPILE__ +#define abort_() \ + { asm("trap;"); } +#undef assert +#define assert(COND) \ + { \ + if (!COND) { \ + abort_(); \ + } \ + } +#endif + +#define __clock() clock() +#define __clock64() clock64() + +#endif + +#endif diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h new file mode 100644 index 0000000000..0522cc8ef7 --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h @@ -0,0 +1,3754 @@ +/* +Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H + +#include +#include +#include +#include +#include + +#include + +#define CUDA_9000 9000 +#define CUDA_10010 10010 +#define CUDA_10020 10020 +#define CUDA_11010 11010 +#define CUDA_11020 11020 +#define CUDA_11030 11030 +#define CUDA_11040 11040 +#define CUDA_11060 11060 + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +#define __dparm(x) = x +#else +#define __dparm(x) +#endif + +// Add Deprecated Support for CUDA Mapped HIP APIs +#if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED) +#define __HIP_DEPRECATED +#elif defined(_MSC_VER) +#define __HIP_DEPRECATED __declspec(deprecated) +#elif defined(__GNUC__) +#define __HIP_DEPRECATED __attribute__((deprecated)) +#else +#define __HIP_DEPRECATED +#endif + +// Add Deprecated Support for CUDA Mapped HIP APIs +#if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED) +#define __HIP_DEPRECATED_MSG(msg) +#elif defined(_MSC_VER) +#define __HIP_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) +#elif defined(__GNUC__) +#define __HIP_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#else +#define __HIP_DEPRECATED_MSG(msg) +#endif + + +// TODO -move to include/hip_runtime_api.h as a common implementation. +/** + * Memory copy types + * + */ +typedef enum cudaMemcpyKind hipMemcpyKind; +#define hipMemcpyHostToHost cudaMemcpyHostToHost +#define hipMemcpyHostToDevice cudaMemcpyHostToDevice +#define hipMemcpyDeviceToHost cudaMemcpyDeviceToHost +#define hipMemcpyDeviceToDevice cudaMemcpyDeviceToDevice +#define hipMemcpyDefault cudaMemcpyDefault + +typedef enum hipMemoryAdvise { + hipMemAdviseSetReadMostly, + hipMemAdviseUnsetReadMostly, + hipMemAdviseSetPreferredLocation, + hipMemAdviseUnsetPreferredLocation, + hipMemAdviseSetAccessedBy, + hipMemAdviseUnsetAccessedBy +} hipMemoryAdvise; + +// hipDataType +#define hipDataType cudaDataType +#define HIP_R_16F CUDA_R_16F +#define HIP_C_16F CUDA_C_16F +#define HIP_R_16BF CUDA_R_16BF +#define HIP_C_16BF CUDA_C_16BF +#define HIP_R_32F CUDA_R_32F +#define HIP_C_32F CUDA_C_32F +#define HIP_R_64F CUDA_R_64F +#define HIP_C_64F CUDA_C_64F +#define HIP_R_4I CUDA_R_4I +#define HIP_C_4I CUDA_C_4I +#define HIP_R_4U CUDA_R_4U +#define HIP_C_4U CUDA_C_4U +#define HIP_R_8I CUDA_R_8I +#define HIP_C_8I CUDA_C_8I +#define HIP_R_8U CUDA_R_8U +#define HIP_C_8U CUDA_C_8U +#define HIP_R_16I CUDA_R_16I +#define HIP_C_16I CUDA_C_16I +#define HIP_R_16U CUDA_R_16U +#define HIP_C_16U CUDA_C_16U +#define HIP_R_32I CUDA_R_32I +#define HIP_C_32I CUDA_C_32I +#define HIP_R_32U CUDA_R_32U +#define HIP_C_32U CUDA_C_32U +#define HIP_R_64I CUDA_R_64I +#define HIP_C_64I CUDA_C_64I +#define HIP_R_64U CUDA_R_64U +#define HIP_C_64U CUDA_C_64U + +// hip stream operation masks +#define STREAM_OPS_WAIT_MASK_32 0xFFFFFFFF +#define STREAM_OPS_WAIT_MASK_64 0xFFFFFFFFFFFFFFFF + +// stream operation flags +#define hipStreamWaitValueGte CU_STREAM_WAIT_VALUE_GEQ +#define hipStreamWaitValueEq CU_STREAM_WAIT_VALUE_EQ +#define hipStreamWaitValueAnd CU_STREAM_WAIT_VALUE_AND +#define hipStreamWaitValueNor CU_STREAM_WAIT_VALUE_NOR + +// hipLibraryPropertyType +#define hipLibraryPropertyType libraryPropertyType +#define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION +#define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION +#define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL + +#define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR +#define HIP_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR + +//hipArray_Format +#define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8 +#define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16 +#define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32 +#define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8 +#define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16 +#define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32 +#define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF +#define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT + +// hipArray_Format +#define hipArray_Format CUarray_format + +inline static CUarray_format hipArray_FormatToCUarray_format( + hipArray_Format format) { + switch (format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return CU_AD_FORMAT_UNSIGNED_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return CU_AD_FORMAT_UNSIGNED_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return CU_AD_FORMAT_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT8: + return CU_AD_FORMAT_SIGNED_INT8; + case HIP_AD_FORMAT_SIGNED_INT16: + return CU_AD_FORMAT_SIGNED_INT16; + case HIP_AD_FORMAT_SIGNED_INT32: + return CU_AD_FORMAT_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return CU_AD_FORMAT_HALF; + case HIP_AD_FORMAT_FLOAT: + return CU_AD_FORMAT_FLOAT; + default: + return CU_AD_FORMAT_UNSIGNED_INT8; + } +} + +#define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP +#define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP +#define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR +#define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER + +// hipAddress_mode +#define hipAddress_mode CUaddress_mode + +inline static CUaddress_mode hipAddress_modeToCUaddress_mode( + hipAddress_mode mode) { + switch (mode) { + case HIP_TR_ADDRESS_MODE_WRAP: + return CU_TR_ADDRESS_MODE_WRAP; + case HIP_TR_ADDRESS_MODE_CLAMP: + return CU_TR_ADDRESS_MODE_CLAMP; + case HIP_TR_ADDRESS_MODE_MIRROR: + return CU_TR_ADDRESS_MODE_MIRROR; + case HIP_TR_ADDRESS_MODE_BORDER: + return CU_TR_ADDRESS_MODE_BORDER; + default: + return CU_TR_ADDRESS_MODE_WRAP; + } +} + +#define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT +#define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR + +// hipFilter_mode +#define hipFilter_mode CUfilter_mode + +inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode( + hipFilter_mode mode) { + switch (mode) { + case HIP_TR_FILTER_MODE_POINT: + return CU_TR_FILTER_MODE_POINT; + case HIP_TR_FILTER_MODE_LINEAR: + return CU_TR_FILTER_MODE_LINEAR; + default: + return CU_TR_FILTER_MODE_POINT; + } +} + +//hipResourcetype +#define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY +#define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY +#define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR +#define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D + +// hipResourcetype +#define hipResourcetype CUresourcetype + +inline static CUresourcetype hipResourcetype_enumToCUresourcetype( + hipResourcetype resType) { + switch (resType) { + case HIP_RESOURCE_TYPE_ARRAY: + return CU_RESOURCE_TYPE_ARRAY; + case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY: + return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; + case HIP_RESOURCE_TYPE_LINEAR: + return CU_RESOURCE_TYPE_LINEAR; + case HIP_RESOURCE_TYPE_PITCH2D: + return CU_RESOURCE_TYPE_PITCH2D; + default: + return CU_RESOURCE_TYPE_ARRAY; + } +} + +// hipStreamPerThread +#define hipStreamPerThread ((cudaStream_t)2) + +#define hipTexRef CUtexref +#define hiparray CUarray +typedef CUmipmappedArray hipMipmappedArray_t; + +#define HIP_TRSA_OVERRIDE_FORMAT CU_TRSA_OVERRIDE_FORMAT +#define HIP_TRSF_READ_AS_INTEGER CU_TRSF_READ_AS_INTEGER +#define HIP_TRSF_NORMALIZED_COORDINATES CU_TRSF_NORMALIZED_COORDINATES +#define HIP_TRSF_SRGB CU_TRSF_SRGB + +// hipTextureAddressMode +typedef enum cudaTextureAddressMode hipTextureAddressMode; +#define hipAddressModeWrap cudaAddressModeWrap +#define hipAddressModeClamp cudaAddressModeClamp +#define hipAddressModeMirror cudaAddressModeMirror +#define hipAddressModeBorder cudaAddressModeBorder + +// hipTextureFilterMode +typedef enum cudaTextureFilterMode hipTextureFilterMode; +#define hipFilterModePoint cudaFilterModePoint +#define hipFilterModeLinear cudaFilterModeLinear + +// hipTextureReadMode +typedef enum cudaTextureReadMode hipTextureReadMode; +#define hipReadModeElementType cudaReadModeElementType +#define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat + +// hipChannelFormatKind +typedef enum cudaChannelFormatKind hipChannelFormatKind; +#define hipChannelFormatKindSigned cudaChannelFormatKindSigned +#define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned +#define hipChannelFormatKindFloat cudaChannelFormatKindFloat +#define hipChannelFormatKindNone cudaChannelFormatKindNone + +// hipMemRangeAttribute +typedef enum cudaMemRangeAttribute hipMemRangeAttribute; +#define hipMemRangeAttributeReadMostly cudaMemRangeAttributeReadMostly +#define hipMemRangeAttributePreferredLocation cudaMemRangeAttributePreferredLocation +#define hipMemRangeAttributeAccessedBy cudaMemRangeAttributeAccessedBy +#define hipMemRangeAttributeLastPrefetchLocation cudaMemRangeAttributeLastPrefetchLocation + +#define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode +#define hipBoundaryModeZero cudaBoundaryModeZero +#define hipBoundaryModeTrap cudaBoundaryModeTrap +#define hipBoundaryModeClamp cudaBoundaryModeClamp + +// hipFuncCache +#define hipFuncCachePreferNone cudaFuncCachePreferNone +#define hipFuncCachePreferShared cudaFuncCachePreferShared +#define hipFuncCachePreferL1 cudaFuncCachePreferL1 +#define hipFuncCachePreferEqual cudaFuncCachePreferEqual + +// hipResourceType +#define hipResourceType cudaResourceType +#define hipResourceTypeArray cudaResourceTypeArray +#define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray +#define hipResourceTypeLinear cudaResourceTypeLinear +#define hipResourceTypePitch2D cudaResourceTypePitch2D +// +// hipErrorNoDevice. + +// hipResourceViewFormat +typedef enum cudaResourceViewFormat hipResourceViewFormat; +#define hipResViewFormatNone cudaResViewFormatNone +#define hipResViewFormatUnsignedChar1 cudaResViewFormatUnsignedChar1 +#define hipResViewFormatUnsignedChar2 cudaResViewFormatUnsignedChar2 +#define hipResViewFormatUnsignedChar4 cudaResViewFormatUnsignedChar4 +#define hipResViewFormatSignedChar1 cudaResViewFormatSignedChar1 +#define hipResViewFormatSignedChar2 cudaResViewFormatSignedChar2 +#define hipResViewFormatSignedChar4 cudaResViewFormatSignedChar4 +#define hipResViewFormatUnsignedShort1 cudaResViewFormatUnsignedShort1 +#define hipResViewFormatUnsignedShort2 cudaResViewFormatUnsignedShort2 +#define hipResViewFormatUnsignedShort4 cudaResViewFormatUnsignedShort4 +#define hipResViewFormatSignedShort1 cudaResViewFormatSignedShort1 +#define hipResViewFormatSignedShort2 cudaResViewFormatSignedShort2 +#define hipResViewFormatSignedShort4 cudaResViewFormatSignedShort4 +#define hipResViewFormatUnsignedInt1 cudaResViewFormatUnsignedInt1 +#define hipResViewFormatUnsignedInt2 cudaResViewFormatUnsignedInt2 +#define hipResViewFormatUnsignedInt4 cudaResViewFormatUnsignedInt4 +#define hipResViewFormatSignedInt1 cudaResViewFormatSignedInt1 +#define hipResViewFormatSignedInt2 cudaResViewFormatSignedInt2 +#define hipResViewFormatSignedInt4 cudaResViewFormatSignedInt4 +#define hipResViewFormatHalf1 cudaResViewFormatHalf1 +#define hipResViewFormatHalf2 cudaResViewFormatHalf2 +#define hipResViewFormatHalf4 cudaResViewFormatHalf4 +#define hipResViewFormatFloat1 cudaResViewFormatFloat1 +#define hipResViewFormatFloat2 cudaResViewFormatFloat2 +#define hipResViewFormatFloat4 cudaResViewFormatFloat4 +#define hipResViewFormatUnsignedBlockCompressed1 cudaResViewFormatUnsignedBlockCompressed1 +#define hipResViewFormatUnsignedBlockCompressed2 cudaResViewFormatUnsignedBlockCompressed2 +#define hipResViewFormatUnsignedBlockCompressed3 cudaResViewFormatUnsignedBlockCompressed3 +#define hipResViewFormatUnsignedBlockCompressed4 cudaResViewFormatUnsignedBlockCompressed4 +#define hipResViewFormatSignedBlockCompressed4 cudaResViewFormatSignedBlockCompressed4 +#define hipResViewFormatUnsignedBlockCompressed5 cudaResViewFormatUnsignedBlockCompressed5 +#define hipResViewFormatSignedBlockCompressed5 cudaResViewFormatSignedBlockCompressed5 +#define hipResViewFormatUnsignedBlockCompressed6H cudaResViewFormatUnsignedBlockCompressed6H +#define hipResViewFormatSignedBlockCompressed6H cudaResViewFormatSignedBlockCompressed6H +#define hipResViewFormatUnsignedBlockCompressed7 cudaResViewFormatUnsignedBlockCompressed7 + +//! Flags that can be used with hipEventCreateWithFlags: +#define hipEventDefault cudaEventDefault +#define hipEventBlockingSync cudaEventBlockingSync +#define hipEventDisableTiming cudaEventDisableTiming +#define hipEventInterprocess cudaEventInterprocess +#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */ +#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */ + + +#define hipHostMallocDefault cudaHostAllocDefault +#define hipHostMallocPortable cudaHostAllocPortable +#define hipHostMallocMapped cudaHostAllocMapped +#define hipHostMallocWriteCombined cudaHostAllocWriteCombined +#define hipHostMallocCoherent 0x0 +#define hipHostMallocNonCoherent 0x0 + +#define hipMemAttachGlobal cudaMemAttachGlobal +#define hipMemAttachHost cudaMemAttachHost +#define hipMemAttachSingle cudaMemAttachSingle + +#define hipHostRegisterDefault cudaHostRegisterDefault +#define hipHostRegisterPortable cudaHostRegisterPortable +#define hipHostRegisterMapped cudaHostRegisterMapped +#define hipHostRegisterIoMemory cudaHostRegisterIoMemory +#define hipHostRegisterReadOnly cudaHostRegisterReadOnly + +#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER +#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE +#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END +#define hipLimitPrintfFifoSize cudaLimitPrintfFifoSize +#define hipLimitMallocHeapSize cudaLimitMallocHeapSize +#define hipLimitStackSize cudaLimitStackSize +#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess + +#define hipOccupancyDefault cudaOccupancyDefault +#define hipOccupancyDisableCachingOverride cudaOccupancyDisableCachingOverride + +#define hipCooperativeLaunchMultiDeviceNoPreSync \ + cudaCooperativeLaunchMultiDeviceNoPreSync +#define hipCooperativeLaunchMultiDeviceNoPostSync \ + cudaCooperativeLaunchMultiDeviceNoPostSync + + +// enum CUjit_option redefines +#define HIPRTC_JIT_MAX_REGISTERS CU_JIT_MAX_REGISTERS +#define HIPRTC_JIT_THREADS_PER_BLOCK CU_JIT_THREADS_PER_BLOCK +#define HIPRTC_JIT_WALL_TIME CU_JIT_WALL_TIME +#define HIPRTC_JIT_INFO_LOG_BUFFER CU_JIT_INFO_LOG_BUFFER +#define HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES +#define HIPRTC_JIT_ERROR_LOG_BUFFER CU_JIT_ERROR_LOG_BUFFER +#define HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES +#define HIPRTC_JIT_OPTIMIZATION_LEVEL CU_JIT_OPTIMIZATION_LEVEL +#define HIPRTC_JIT_TARGET_FROM_HIPCONTEXT CU_JIT_TARGET_FROM_CUCONTEXT +#define HIPRTC_JIT_TARGET CU_JIT_TARGET +#define HIPRTC_JIT_FALLBACK_STRATEGY CU_JIT_FALLBACK_STRATEGY +#define HIPRTC_JIT_GENERATE_DEBUG_INFO CU_JIT_GENERATE_DEBUG_INFO +#define HIPRTC_JIT_LOG_VERBOSE CU_JIT_LOG_VERBOSE +#define HIPRTC_JIT_GENERATE_LINE_INFO CU_JIT_GENERATE_LINE_INFO +#define HIPRTC_JIT_CACHE_MODE CU_JIT_CACHE_MODE +#define HIPRTC_JIT_NEW_SM3X_OPT CU_JIT_NEW_SM3X_OPT +#define HIPRTC_JIT_FAST_COMPILE CU_JIT_FAST_COMPILE +#define HIPRTC_JIT_NUM_OPTIONS CU_JIT_NUM_OPTIONS + +typedef cudaEvent_t hipEvent_t; +typedef cudaStream_t hipStream_t; +typedef cudaIpcEventHandle_t hipIpcEventHandle_t; +typedef cudaIpcMemHandle_t hipIpcMemHandle_t; +typedef enum cudaLimit hipLimit_t; +typedef enum cudaFuncAttribute hipFuncAttribute; +typedef enum cudaFuncCache hipFuncCache_t; +typedef CUcontext hipCtx_t; +typedef enum cudaSharedMemConfig hipSharedMemConfig; +typedef CUfunc_cache hipFuncCache; +typedef CUjit_option hipJitOption; +typedef CUdevice hipDevice_t; +typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr; +#define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank +#define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported +#define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported +#define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported +#define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize +#define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout + +typedef CUmodule hipModule_t; +typedef CUfunction hipFunction_t; +typedef CUdeviceptr hipDeviceptr_t; +typedef struct cudaArray hipArray; +typedef struct cudaArray* hipArray_t; +typedef struct cudaArray* hipArray_const_t; +typedef struct cudaFuncAttributes hipFuncAttributes; +typedef struct cudaLaunchParams hipLaunchParams; +typedef CUDA_LAUNCH_PARAMS hipFunctionLaunchParams; +#define hipFunction_attribute CUfunction_attribute +#define hipPointer_attribute CUpointer_attribute +#define hip_Memcpy2D CUDA_MEMCPY2D +#define HIP_MEMCPY3D CUDA_MEMCPY3D +#define hipMemcpy3DParms cudaMemcpy3DParms +#define hipArrayDefault cudaArrayDefault +#define hipArrayLayered cudaArrayLayered +#define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore +#define hipArrayCubemap cudaArrayCubemap +#define hipArrayTextureGather cudaArrayTextureGather + +typedef cudaTextureObject_t hipTextureObject_t; +typedef cudaSurfaceObject_t hipSurfaceObject_t; +#define hipTextureType1D cudaTextureType1D +#define hipTextureType1DLayered cudaTextureType1DLayered +#define hipTextureType2D cudaTextureType2D +#define hipTextureType2DLayered cudaTextureType2DLayered +#define hipTextureType3D cudaTextureType3D + +#define hipDeviceScheduleAuto cudaDeviceScheduleAuto +#define hipDeviceScheduleSpin cudaDeviceScheduleSpin +#define hipDeviceScheduleYield cudaDeviceScheduleYield +#define hipDeviceScheduleBlockingSync cudaDeviceScheduleBlockingSync +#define hipDeviceScheduleMask cudaDeviceScheduleMask +#define hipDeviceMapHost cudaDeviceMapHost +#define hipDeviceLmemResizeToMax cudaDeviceLmemResizeToMax + +#define hipCpuDeviceId cudaCpuDeviceId +#define hipInvalidDeviceId cudaInvalidDeviceId +typedef struct cudaExtent hipExtent; +typedef struct cudaPitchedPtr hipPitchedPtr; +typedef struct cudaPos hipPos; +#define make_hipExtent make_cudaExtent +#define make_hipPos make_cudaPos +#define make_hipPitchedPtr make_cudaPitchedPtr +// Flags that can be used with hipStreamCreateWithFlags +#define hipStreamDefault cudaStreamDefault +#define hipStreamNonBlocking cudaStreamNonBlocking + +typedef cudaMemPool_t hipMemPool_t; +typedef enum cudaMemPoolAttr hipMemPoolAttr; +#define hipMemPoolReuseFollowEventDependencies cudaMemPoolReuseFollowEventDependencies +#define hipMemPoolReuseAllowOpportunistic cudaMemPoolReuseAllowOpportunistic +#define hipMemPoolReuseAllowInternalDependencies cudaMemPoolReuseAllowInternalDependencies +#define hipMemPoolAttrReleaseThreshold cudaMemPoolAttrReleaseThreshold +#define hipMemPoolAttrReservedMemCurrent cudaMemPoolAttrReservedMemCurrent +#define hipMemPoolAttrReservedMemHigh cudaMemPoolAttrReservedMemHigh +#define hipMemPoolAttrUsedMemCurrent cudaMemPoolAttrUsedMemCurrent +#define hipMemPoolAttrUsedMemHigh cudaMemPoolAttrUsedMemHigh +typedef struct cudaMemLocation hipMemLocation; +typedef struct cudaMemPoolProps hipMemPoolProps; +typedef struct cudaMemAccessDesc hipMemAccessDesc; +typedef enum cudaMemAccessFlags hipMemAccessFlags; +#define hipMemAccessFlagsProtNone cudaMemAccessFlagsProtNone +#define hipMemAccessFlagsProtRead cudaMemAccessFlagsProtRead +#define hipMemAccessFlagsProtReadWrite cudaMemAccessFlagsProtReadWrite +typedef enum cudaMemAllocationHandleType hipMemAllocationHandleType; +typedef struct cudaMemPoolPtrExportData hipMemPoolPtrExportData; + +typedef struct cudaChannelFormatDesc hipChannelFormatDesc; +typedef struct cudaResourceDesc hipResourceDesc; +typedef struct cudaTextureDesc hipTextureDesc; +typedef struct cudaResourceViewDesc hipResourceViewDesc; +typedef CUDA_RESOURCE_DESC HIP_RESOURCE_DESC; +typedef CUDA_TEXTURE_DESC HIP_TEXTURE_DESC; +typedef CUDA_RESOURCE_VIEW_DESC HIP_RESOURCE_VIEW_DESC; +// adding code for hipmemSharedConfig +#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault +#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte +#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte + +//Function Attributes +#define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK +#define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES +#define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES +#define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES +#define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS +#define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION +#define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION +#define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA +#define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES +#define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT +#define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX + +//Pointer Attributes +#define HIP_POINTER_ATTRIBUTE_CONTEXT CU_POINTER_ATTRIBUTE_CONTEXT +#define HIP_POINTER_ATTRIBUTE_MEMORY_TYPE CU_POINTER_ATTRIBUTE_MEMORY_TYPE +#define HIP_POINTER_ATTRIBUTE_DEVICE_POINTER CU_POINTER_ATTRIBUTE_DEVICE_POINTER +#define HIP_POINTER_ATTRIBUTE_HOST_POINTER CU_POINTER_ATTRIBUTE_HOST_POINTER +#define HIP_POINTER_ATTRIBUTE_P2P_TOKENS CU_POINTER_ATTRIBUTE_P2P_TOKENS +#define HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS CU_POINTER_ATTRIBUTE_SYNC_MEMOPS +#define HIP_POINTER_ATTRIBUTE_BUFFER_ID CU_POINTER_ATTRIBUTE_BUFFER_ID +#define HIP_POINTER_ATTRIBUTE_IS_MANAGED CU_POINTER_ATTRIBUTE_IS_MANAGED +#define HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL +#define HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE +#define HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR CU_POINTER_ATTRIBUTE_RANGE_START_ADDR +#define HIP_POINTER_ATTRIBUTE_RANGE_SIZE CU_POINTER_ATTRIBUTE_RANGE_SIZE +#define HIP_POINTER_ATTRIBUTE_MAPPED CU_POINTER_ATTRIBUTE_MAPPED +#define HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES +#define HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE +#define HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS CU_POINTER_ATTRIBUTE_ACCESS_FLAGS +#define HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE + +typedef enum cudaGraphInstantiateFlags hipGraphInstantiateFlags; +#define hipGraphInstantiateFlagAutoFreeOnLaunch cudaGraphInstantiateFlagAutoFreeOnLaunch +#define hipGraphInstantiateFlagUpload cudaGraphInstantiateFlagUpload +#define hipGraphInstantiateFlagDeviceLaunch cudaGraphInstantiateFlagDeviceLaunch +#define hipGraphInstantiateFlagUseNodePriority cudaGraphInstantiateFlagUseNodePriority + +#if CUDA_VERSION >= CUDA_9000 +#define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__) +#define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__) +#define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__) +#define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__) +#endif // CUDA_VERSION >= CUDA_9000 + +inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) { + switch (cuError) { + case cudaSuccess: + return hipSuccess; + case cudaErrorProfilerDisabled: + return hipErrorProfilerDisabled; + case cudaErrorProfilerNotInitialized: + return hipErrorProfilerNotInitialized; + case cudaErrorProfilerAlreadyStarted: + return hipErrorProfilerAlreadyStarted; + case cudaErrorProfilerAlreadyStopped: + return hipErrorProfilerAlreadyStopped; + case cudaErrorInsufficientDriver: + return hipErrorInsufficientDriver; + case cudaErrorUnsupportedLimit: + return hipErrorUnsupportedLimit; + case cudaErrorPeerAccessUnsupported: + return hipErrorPeerAccessUnsupported; + case cudaErrorInvalidGraphicsContext: + return hipErrorInvalidGraphicsContext; + case cudaErrorSharedObjectSymbolNotFound: + return hipErrorSharedObjectSymbolNotFound; + case cudaErrorSharedObjectInitFailed: + return hipErrorSharedObjectInitFailed; + case cudaErrorOperatingSystem: + return hipErrorOperatingSystem; + case cudaErrorIllegalState: + return hipErrorIllegalState; + case cudaErrorSetOnActiveProcess: + return hipErrorSetOnActiveProcess; + case cudaErrorIllegalAddress: + return hipErrorIllegalAddress; + case cudaErrorInvalidSymbol: + return hipErrorInvalidSymbol; + case cudaErrorMissingConfiguration: + return hipErrorMissingConfiguration; + case cudaErrorMemoryAllocation: + return hipErrorOutOfMemory; + case cudaErrorInitializationError: + return hipErrorNotInitialized; + case cudaErrorLaunchFailure: + return hipErrorLaunchFailure; + case cudaErrorCooperativeLaunchTooLarge: + return hipErrorCooperativeLaunchTooLarge; + case cudaErrorPriorLaunchFailure: + return hipErrorPriorLaunchFailure; + case cudaErrorLaunchOutOfResources: + return hipErrorLaunchOutOfResources; + case cudaErrorInvalidDeviceFunction: + return hipErrorInvalidDeviceFunction; + case cudaErrorInvalidConfiguration: + return hipErrorInvalidConfiguration; + case cudaErrorInvalidDevice: + return hipErrorInvalidDevice; + case cudaErrorInvalidValue: + return hipErrorInvalidValue; + case cudaErrorInvalidPitchValue: + return hipErrorInvalidPitchValue; + case cudaErrorInvalidDevicePointer: + return hipErrorInvalidDevicePointer; + case cudaErrorInvalidMemcpyDirection: + return hipErrorInvalidMemcpyDirection; + case cudaErrorInvalidResourceHandle: + return hipErrorInvalidHandle; + case cudaErrorNotReady: + return hipErrorNotReady; + case cudaErrorNoDevice: + return hipErrorNoDevice; + case cudaErrorPeerAccessAlreadyEnabled: + return hipErrorPeerAccessAlreadyEnabled; + case cudaErrorPeerAccessNotEnabled: + return hipErrorPeerAccessNotEnabled; + case cudaErrorContextIsDestroyed: + return hipErrorContextIsDestroyed; + case cudaErrorHostMemoryAlreadyRegistered: + return hipErrorHostMemoryAlreadyRegistered; + case cudaErrorHostMemoryNotRegistered: + return hipErrorHostMemoryNotRegistered; + case cudaErrorMapBufferObjectFailed: + return hipErrorMapFailed; + case cudaErrorAssert: + return hipErrorAssert; + case cudaErrorNotSupported: + return hipErrorNotSupported; + case cudaErrorCudartUnloading: + return hipErrorDeinitialized; + case cudaErrorInvalidKernelImage: + return hipErrorInvalidImage; + case cudaErrorUnmapBufferObjectFailed: + return hipErrorUnmapFailed; + case cudaErrorNoKernelImageForDevice: + return hipErrorNoBinaryForGpu; + case cudaErrorECCUncorrectable: + return hipErrorECCNotCorrectable; + case cudaErrorDeviceAlreadyInUse: + return hipErrorContextAlreadyInUse; + case cudaErrorInvalidPtx: + return hipErrorInvalidKernelFile; + case cudaErrorLaunchTimeout: + return hipErrorLaunchTimeOut; +#if CUDA_VERSION >= CUDA_10010 + case cudaErrorInvalidSource: + return hipErrorInvalidSource; + case cudaErrorFileNotFound: + return hipErrorFileNotFound; + case cudaErrorSymbolNotFound: + return hipErrorNotFound; + case cudaErrorArrayIsMapped: + return hipErrorArrayIsMapped; + case cudaErrorNotMappedAsPointer: + return hipErrorNotMappedAsPointer; + case cudaErrorNotMappedAsArray: + return hipErrorNotMappedAsArray; + case cudaErrorNotMapped: + return hipErrorNotMapped; + case cudaErrorAlreadyAcquired: + return hipErrorAlreadyAcquired; + case cudaErrorAlreadyMapped: + return hipErrorAlreadyMapped; +#endif +#if CUDA_VERSION >= CUDA_10020 + case cudaErrorDeviceUninitialized: + return hipErrorInvalidContext; +#endif + case cudaErrorStreamCaptureUnsupported: + return hipErrorStreamCaptureUnsupported; + case cudaErrorStreamCaptureInvalidated: + return hipErrorStreamCaptureInvalidated; + case cudaErrorStreamCaptureMerge: + return hipErrorStreamCaptureMerge; + case cudaErrorStreamCaptureUnmatched: + return hipErrorStreamCaptureUnmatched; + case cudaErrorStreamCaptureUnjoined: + return hipErrorStreamCaptureUnjoined; + case cudaErrorStreamCaptureIsolation: + return hipErrorStreamCaptureIsolation; + case cudaErrorStreamCaptureImplicit: + return hipErrorStreamCaptureImplicit; + case cudaErrorCapturedEvent: + return hipErrorCapturedEvent; + case cudaErrorStreamCaptureWrongThread: + return hipErrorStreamCaptureWrongThread; + case cudaErrorGraphExecUpdateFailure: + return hipErrorGraphExecUpdateFailure; + case cudaErrorUnknown: + default: + return hipErrorUnknown; // Note - translated error. + } +} + +inline static hipError_t hipCUResultTohipError(CUresult cuError) { + switch (cuError) { + case CUDA_SUCCESS: + return hipSuccess; + case CUDA_ERROR_OUT_OF_MEMORY: + return hipErrorOutOfMemory; + case CUDA_ERROR_INVALID_VALUE: + return hipErrorInvalidValue; + case CUDA_ERROR_INVALID_DEVICE: + return hipErrorInvalidDevice; + case CUDA_ERROR_DEINITIALIZED: + return hipErrorDeinitialized; + case CUDA_ERROR_NO_DEVICE: + return hipErrorNoDevice; + case CUDA_ERROR_INVALID_CONTEXT: + return hipErrorInvalidContext; + case CUDA_ERROR_NOT_INITIALIZED: + return hipErrorNotInitialized; + case CUDA_ERROR_INVALID_HANDLE: + return hipErrorInvalidHandle; + case CUDA_ERROR_MAP_FAILED: + return hipErrorMapFailed; + case CUDA_ERROR_PROFILER_DISABLED: + return hipErrorProfilerDisabled; + case CUDA_ERROR_PROFILER_NOT_INITIALIZED: + return hipErrorProfilerNotInitialized; + case CUDA_ERROR_PROFILER_ALREADY_STARTED: + return hipErrorProfilerAlreadyStarted; + case CUDA_ERROR_PROFILER_ALREADY_STOPPED: + return hipErrorProfilerAlreadyStopped; + case CUDA_ERROR_INVALID_IMAGE: + return hipErrorInvalidImage; + case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: + return hipErrorContextAlreadyCurrent; + case CUDA_ERROR_UNMAP_FAILED: + return hipErrorUnmapFailed; + case CUDA_ERROR_ARRAY_IS_MAPPED: + return hipErrorArrayIsMapped; + case CUDA_ERROR_ALREADY_MAPPED: + return hipErrorAlreadyMapped; + case CUDA_ERROR_NO_BINARY_FOR_GPU: + return hipErrorNoBinaryForGpu; + case CUDA_ERROR_ALREADY_ACQUIRED: + return hipErrorAlreadyAcquired; + case CUDA_ERROR_NOT_MAPPED: + return hipErrorNotMapped; + case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: + return hipErrorNotMappedAsArray; + case CUDA_ERROR_NOT_MAPPED_AS_POINTER: + return hipErrorNotMappedAsPointer; + case CUDA_ERROR_ECC_UNCORRECTABLE: + return hipErrorECCNotCorrectable; + case CUDA_ERROR_UNSUPPORTED_LIMIT: + return hipErrorUnsupportedLimit; + case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: + return hipErrorContextAlreadyInUse; + case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: + return hipErrorPeerAccessUnsupported; + case CUDA_ERROR_INVALID_PTX: + return hipErrorInvalidKernelFile; + case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: + return hipErrorInvalidGraphicsContext; + case CUDA_ERROR_INVALID_SOURCE: + return hipErrorInvalidSource; + case CUDA_ERROR_FILE_NOT_FOUND: + return hipErrorFileNotFound; + case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: + return hipErrorSharedObjectSymbolNotFound; + case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: + return hipErrorSharedObjectInitFailed; + case CUDA_ERROR_OPERATING_SYSTEM: + return hipErrorOperatingSystem; + case CUDA_ERROR_ILLEGAL_STATE: + return hipErrorIllegalState; + case CUDA_ERROR_NOT_FOUND: + return hipErrorNotFound; + case CUDA_ERROR_NOT_READY: + return hipErrorNotReady; + case CUDA_ERROR_ILLEGAL_ADDRESS: + return hipErrorIllegalAddress; + case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: + return hipErrorLaunchOutOfResources; + case CUDA_ERROR_LAUNCH_TIMEOUT: + return hipErrorLaunchTimeOut; + case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: + return hipErrorPeerAccessAlreadyEnabled; + case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: + return hipErrorPeerAccessNotEnabled; + case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: + return hipErrorSetOnActiveProcess; + case CUDA_ERROR_CONTEXT_IS_DESTROYED: + return hipErrorContextIsDestroyed; + case CUDA_ERROR_ASSERT: + return hipErrorAssert; + case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: + return hipErrorHostMemoryAlreadyRegistered; + case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: + return hipErrorHostMemoryNotRegistered; + case CUDA_ERROR_LAUNCH_FAILED: + return hipErrorLaunchFailure; + case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: + return hipErrorCooperativeLaunchTooLarge; + case CUDA_ERROR_NOT_SUPPORTED: + return hipErrorNotSupported; + case CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED: + return hipErrorStreamCaptureUnsupported; + case CUDA_ERROR_STREAM_CAPTURE_INVALIDATED: + return hipErrorStreamCaptureInvalidated; + case CUDA_ERROR_STREAM_CAPTURE_MERGE: + return hipErrorStreamCaptureMerge; + case CUDA_ERROR_STREAM_CAPTURE_UNMATCHED: + return hipErrorStreamCaptureUnmatched; + case CUDA_ERROR_STREAM_CAPTURE_UNJOINED: + return hipErrorStreamCaptureUnjoined; + case CUDA_ERROR_STREAM_CAPTURE_ISOLATION: + return hipErrorStreamCaptureIsolation; + case CUDA_ERROR_STREAM_CAPTURE_IMPLICIT: + return hipErrorStreamCaptureImplicit; + case CUDA_ERROR_CAPTURED_EVENT: + return hipErrorCapturedEvent; + case CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD: + return hipErrorStreamCaptureWrongThread; + case CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE: + return hipErrorGraphExecUpdateFailure; + case CUDA_ERROR_UNKNOWN: + default: + return hipErrorUnknown; // Note - translated error. + } +} + +inline static CUresult hipErrorToCUResult(hipError_t hError) { + switch (hError) { + case hipSuccess: + return CUDA_SUCCESS; + case hipErrorOutOfMemory: + return CUDA_ERROR_OUT_OF_MEMORY; + case hipErrorInvalidValue: + return CUDA_ERROR_INVALID_VALUE; + case hipErrorInvalidDevice: + return CUDA_ERROR_INVALID_DEVICE; + case hipErrorDeinitialized: + return CUDA_ERROR_DEINITIALIZED; + case hipErrorNoDevice: + return CUDA_ERROR_NO_DEVICE; + case hipErrorInvalidContext: + return CUDA_ERROR_INVALID_CONTEXT; + case hipErrorNotInitialized: + return CUDA_ERROR_NOT_INITIALIZED; + case hipErrorInvalidHandle: + return CUDA_ERROR_INVALID_HANDLE; + case hipErrorMapFailed: + return CUDA_ERROR_MAP_FAILED; + case hipErrorProfilerDisabled: + return CUDA_ERROR_PROFILER_DISABLED; + case hipErrorProfilerNotInitialized: + return CUDA_ERROR_PROFILER_NOT_INITIALIZED; + case hipErrorProfilerAlreadyStarted: + return CUDA_ERROR_PROFILER_ALREADY_STARTED; + case hipErrorProfilerAlreadyStopped: + return CUDA_ERROR_PROFILER_ALREADY_STOPPED; + case hipErrorInvalidImage: + return CUDA_ERROR_INVALID_IMAGE; + case hipErrorContextAlreadyCurrent: + return CUDA_ERROR_CONTEXT_ALREADY_CURRENT; + case hipErrorUnmapFailed: + return CUDA_ERROR_UNMAP_FAILED; + case hipErrorArrayIsMapped: + return CUDA_ERROR_ARRAY_IS_MAPPED; + case hipErrorAlreadyMapped: + return CUDA_ERROR_ALREADY_MAPPED; + case hipErrorNoBinaryForGpu: + return CUDA_ERROR_NO_BINARY_FOR_GPU; + case hipErrorAlreadyAcquired: + return CUDA_ERROR_ALREADY_ACQUIRED; + case hipErrorNotMapped: + return CUDA_ERROR_NOT_MAPPED; + case hipErrorNotMappedAsArray: + return CUDA_ERROR_NOT_MAPPED_AS_ARRAY; + case hipErrorNotMappedAsPointer: + return CUDA_ERROR_NOT_MAPPED_AS_POINTER; + case hipErrorECCNotCorrectable: + return CUDA_ERROR_ECC_UNCORRECTABLE; + case hipErrorUnsupportedLimit: + return CUDA_ERROR_UNSUPPORTED_LIMIT; + case hipErrorContextAlreadyInUse: + return CUDA_ERROR_CONTEXT_ALREADY_IN_USE; + case hipErrorPeerAccessUnsupported: + return CUDA_ERROR_PEER_ACCESS_UNSUPPORTED; + case hipErrorInvalidKernelFile: + return CUDA_ERROR_INVALID_PTX; + case hipErrorInvalidGraphicsContext: + return CUDA_ERROR_INVALID_GRAPHICS_CONTEXT; + case hipErrorInvalidSource: + return CUDA_ERROR_INVALID_SOURCE; + case hipErrorFileNotFound: + return CUDA_ERROR_FILE_NOT_FOUND; + case hipErrorSharedObjectSymbolNotFound: + return CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND; + case hipErrorSharedObjectInitFailed: + return CUDA_ERROR_SHARED_OBJECT_INIT_FAILED; + case hipErrorOperatingSystem: + return CUDA_ERROR_OPERATING_SYSTEM; + case hipErrorIllegalState: + return CUDA_ERROR_ILLEGAL_STATE; + case hipErrorNotFound: + return CUDA_ERROR_NOT_FOUND; + case hipErrorNotReady: + return CUDA_ERROR_NOT_READY; + case hipErrorIllegalAddress: + return CUDA_ERROR_ILLEGAL_ADDRESS; + case hipErrorLaunchOutOfResources: + return CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES; + case hipErrorLaunchTimeOut: + return CUDA_ERROR_LAUNCH_TIMEOUT; + case hipErrorPeerAccessAlreadyEnabled: + return CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED; + case hipErrorPeerAccessNotEnabled: + return CUDA_ERROR_PEER_ACCESS_NOT_ENABLED; + case hipErrorSetOnActiveProcess: + return CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE; + case hipErrorContextIsDestroyed: + return CUDA_ERROR_CONTEXT_IS_DESTROYED; + case hipErrorAssert: + return CUDA_ERROR_ASSERT; + case hipErrorHostMemoryAlreadyRegistered: + return CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED; + case hipErrorHostMemoryNotRegistered: + return CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED; + case hipErrorLaunchFailure: + return CUDA_ERROR_LAUNCH_FAILED; + case hipErrorCooperativeLaunchTooLarge: + return CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE; + case hipErrorNotSupported: + return CUDA_ERROR_NOT_SUPPORTED; + case hipErrorStreamCaptureUnsupported: + return CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED; + case hipErrorStreamCaptureInvalidated: + return CUDA_ERROR_STREAM_CAPTURE_INVALIDATED; + case hipErrorStreamCaptureMerge: + return CUDA_ERROR_STREAM_CAPTURE_MERGE; + case hipErrorStreamCaptureUnmatched: + return CUDA_ERROR_STREAM_CAPTURE_UNMATCHED; + case hipErrorStreamCaptureUnjoined: + return CUDA_ERROR_STREAM_CAPTURE_UNJOINED; + case hipErrorStreamCaptureIsolation: + return CUDA_ERROR_STREAM_CAPTURE_ISOLATION; + case hipErrorStreamCaptureImplicit: + return CUDA_ERROR_STREAM_CAPTURE_IMPLICIT; + case hipErrorCapturedEvent: + return CUDA_ERROR_CAPTURED_EVENT; + case hipErrorStreamCaptureWrongThread: + return CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD; + case hipErrorGraphExecUpdateFailure: + return CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE; + case hipErrorUnknown: + default: + return CUDA_ERROR_UNKNOWN; // Note - translated error. + } +} + +inline static cudaError_t hipErrorToCudaError(hipError_t hError) { + switch (hError) { + case hipSuccess: + return cudaSuccess; + case hipErrorOutOfMemory: + return cudaErrorMemoryAllocation; + case hipErrorProfilerDisabled: + return cudaErrorProfilerDisabled; + case hipErrorProfilerNotInitialized: + return cudaErrorProfilerNotInitialized; + case hipErrorProfilerAlreadyStarted: + return cudaErrorProfilerAlreadyStarted; + case hipErrorProfilerAlreadyStopped: + return cudaErrorProfilerAlreadyStopped; + case hipErrorInvalidConfiguration: + return cudaErrorInvalidConfiguration; + case hipErrorLaunchOutOfResources: + return cudaErrorLaunchOutOfResources; + case hipErrorInvalidValue: + return cudaErrorInvalidValue; + case hipErrorInvalidPitchValue: + return cudaErrorInvalidPitchValue; + case hipErrorInvalidHandle: + return cudaErrorInvalidResourceHandle; + case hipErrorInvalidDevice: + return cudaErrorInvalidDevice; + case hipErrorInvalidMemcpyDirection: + return cudaErrorInvalidMemcpyDirection; + case hipErrorInvalidDevicePointer: + return cudaErrorInvalidDevicePointer; + case hipErrorNotInitialized: + return cudaErrorInitializationError; + case hipErrorNoDevice: + return cudaErrorNoDevice; + case hipErrorNotReady: + return cudaErrorNotReady; + case hipErrorPeerAccessNotEnabled: + return cudaErrorPeerAccessNotEnabled; + case hipErrorPeerAccessAlreadyEnabled: + return cudaErrorPeerAccessAlreadyEnabled; + case hipErrorHostMemoryAlreadyRegistered: + return cudaErrorHostMemoryAlreadyRegistered; + case hipErrorHostMemoryNotRegistered: + return cudaErrorHostMemoryNotRegistered; + case hipErrorDeinitialized: + return cudaErrorCudartUnloading; + case hipErrorInvalidSymbol: + return cudaErrorInvalidSymbol; + case hipErrorInsufficientDriver: + return cudaErrorInsufficientDriver; + case hipErrorMissingConfiguration: + return cudaErrorMissingConfiguration; + case hipErrorPriorLaunchFailure: + return cudaErrorPriorLaunchFailure; + case hipErrorInvalidDeviceFunction: + return cudaErrorInvalidDeviceFunction; + case hipErrorInvalidImage: + return cudaErrorInvalidKernelImage; + case hipErrorInvalidContext: +#if CUDA_VERSION >= CUDA_10020 + return cudaErrorDeviceUninitialized; +#else + return cudaErrorUnknown; +#endif + case hipErrorMapFailed: + return cudaErrorMapBufferObjectFailed; + case hipErrorUnmapFailed: + return cudaErrorUnmapBufferObjectFailed; + case hipErrorArrayIsMapped: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorArrayIsMapped; +#else + return cudaErrorUnknown; +#endif + case hipErrorAlreadyMapped: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorAlreadyMapped; +#else + return cudaErrorUnknown; +#endif + case hipErrorNoBinaryForGpu: + return cudaErrorNoKernelImageForDevice; + case hipErrorAlreadyAcquired: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorAlreadyAcquired; +#else + return cudaErrorUnknown; +#endif + case hipErrorNotMapped: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorNotMapped; +#else + return cudaErrorUnknown; +#endif + case hipErrorNotMappedAsArray: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorNotMappedAsArray; +#else + return cudaErrorUnknown; +#endif + case hipErrorNotMappedAsPointer: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorNotMappedAsPointer; +#else + return cudaErrorUnknown; +#endif + case hipErrorECCNotCorrectable: + return cudaErrorECCUncorrectable; + case hipErrorUnsupportedLimit: + return cudaErrorUnsupportedLimit; + case hipErrorContextAlreadyInUse: + return cudaErrorDeviceAlreadyInUse; + case hipErrorPeerAccessUnsupported: + return cudaErrorPeerAccessUnsupported; + case hipErrorInvalidKernelFile: + return cudaErrorInvalidPtx; + case hipErrorInvalidGraphicsContext: + return cudaErrorInvalidGraphicsContext; + case hipErrorInvalidSource: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorInvalidSource; +#else + return cudaErrorUnknown; +#endif + case hipErrorFileNotFound: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorFileNotFound; +#else + return cudaErrorUnknown; +#endif + case hipErrorSharedObjectSymbolNotFound: + return cudaErrorSharedObjectSymbolNotFound; + case hipErrorSharedObjectInitFailed: + return cudaErrorSharedObjectInitFailed; + case hipErrorOperatingSystem: + return cudaErrorOperatingSystem; + case hipErrorIllegalState: + return cudaErrorIllegalState; + case hipErrorNotFound: +#if CUDA_VERSION >= CUDA_10010 + return cudaErrorSymbolNotFound; +#else + return cudaErrorUnknown; +#endif + case hipErrorIllegalAddress: + return cudaErrorIllegalAddress; + case hipErrorLaunchTimeOut: + return cudaErrorLaunchTimeout; + case hipErrorSetOnActiveProcess: + return cudaErrorSetOnActiveProcess; + case hipErrorContextIsDestroyed: + return cudaErrorContextIsDestroyed; + case hipErrorAssert: + return cudaErrorAssert; + case hipErrorLaunchFailure: + return cudaErrorLaunchFailure; + case hipErrorCooperativeLaunchTooLarge: + return cudaErrorCooperativeLaunchTooLarge; + case hipErrorStreamCaptureUnsupported: + return cudaErrorStreamCaptureUnsupported; + case hipErrorStreamCaptureInvalidated: + return cudaErrorStreamCaptureInvalidated; + case hipErrorStreamCaptureMerge: + return cudaErrorStreamCaptureMerge; + case hipErrorStreamCaptureUnmatched: + return cudaErrorStreamCaptureUnmatched; + case hipErrorStreamCaptureUnjoined: + return cudaErrorStreamCaptureUnjoined; + case hipErrorStreamCaptureIsolation: + return cudaErrorStreamCaptureIsolation; + case hipErrorStreamCaptureImplicit: + return cudaErrorStreamCaptureImplicit; + case hipErrorCapturedEvent: + return cudaErrorCapturedEvent; + case hipErrorStreamCaptureWrongThread: + return cudaErrorStreamCaptureWrongThread; + case hipErrorGraphExecUpdateFailure: + return cudaErrorGraphExecUpdateFailure; + case hipErrorNotSupported: + return cudaErrorNotSupported; + // HSA: does not exist in CUDA + case hipErrorRuntimeMemory: + // HSA: does not exist in CUDA + case hipErrorRuntimeOther: + case hipErrorUnknown: + case hipErrorTbd: + default: + return cudaErrorUnknown; // Note - translated error. + } +} + +inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { + switch (kind) { + case hipMemcpyHostToHost: + return cudaMemcpyHostToHost; + case hipMemcpyHostToDevice: + return cudaMemcpyHostToDevice; + case hipMemcpyDeviceToHost: + return cudaMemcpyDeviceToHost; + case hipMemcpyDeviceToDevice: + return cudaMemcpyDeviceToDevice; + case hipMemcpyDefault: + return cudaMemcpyDefault; + default: + return (hipMemcpyKind)-1; + } +} + +inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode( + hipTextureAddressMode kind) { + switch (kind) { + case hipAddressModeWrap: + return cudaAddressModeWrap; + case hipAddressModeClamp: + return cudaAddressModeClamp; + case hipAddressModeMirror: + return cudaAddressModeMirror; + case hipAddressModeBorder: + return cudaAddressModeBorder; + default: + return (hipTextureAddressMode)-1; + } +} + +inline static enum cudaMemRangeAttribute hipMemRangeAttributeToCudaMemRangeAttribute( + hipMemRangeAttribute kind) { + switch (kind) { + case hipMemRangeAttributeReadMostly: + return cudaMemRangeAttributeReadMostly; + case hipMemRangeAttributePreferredLocation: + return cudaMemRangeAttributePreferredLocation; + case hipMemRangeAttributeAccessedBy: + return cudaMemRangeAttributeAccessedBy; + case hipMemRangeAttributeLastPrefetchLocation: + return cudaMemRangeAttributeLastPrefetchLocation; + default: + return (hipMemRangeAttribute)-1; + } +} + +inline static enum cudaMemoryAdvise hipMemoryAdviseTocudaMemoryAdvise( + hipMemoryAdvise kind) { + switch (kind) { + case hipMemAdviseSetReadMostly: + return cudaMemAdviseSetReadMostly; + case hipMemAdviseUnsetReadMostly : + return cudaMemAdviseUnsetReadMostly ; + case hipMemAdviseSetPreferredLocation: + return cudaMemAdviseSetPreferredLocation; + case hipMemAdviseUnsetPreferredLocation: + return cudaMemAdviseUnsetPreferredLocation; + case hipMemAdviseSetAccessedBy: + return cudaMemAdviseSetAccessedBy; + case hipMemAdviseUnsetAccessedBy: + return cudaMemAdviseUnsetAccessedBy; + default: + return (enum cudaMemoryAdvise)-1; + } +} + +inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode( + hipTextureFilterMode kind) { + switch (kind) { + case hipFilterModePoint: + return cudaFilterModePoint; + case hipFilterModeLinear: + return cudaFilterModeLinear; + default: + return (hipTextureFilterMode)-1; + } +} + +inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) { + switch (kind) { + case hipReadModeElementType: + return cudaReadModeElementType; + case hipReadModeNormalizedFloat: + return cudaReadModeNormalizedFloat; + default: + return (hipTextureReadMode)-1; + } +} + +inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind( + hipChannelFormatKind kind) { + switch (kind) { + case hipChannelFormatKindSigned: + return cudaChannelFormatKindSigned; + case hipChannelFormatKindUnsigned: + return cudaChannelFormatKindUnsigned; + case hipChannelFormatKindFloat: + return cudaChannelFormatKindFloat; + case hipChannelFormatKindNone: + return cudaChannelFormatKindNone; + default: + return (hipChannelFormatKind)-1; + } +} + +typedef enum cudaExternalMemoryHandleType hipExternalMemoryHandleType; +#define hipExternalMemoryHandleTypeOpaqueFd cudaExternalMemoryHandleTypeOpaqueFd +#define hipExternalMemoryHandleTypeOpaqueWin32 cudaExternalMemoryHandleTypeOpaqueWin32 +#define hipExternalMemoryHandleTypeOpaqueWin32Kmt cudaExternalMemoryHandleTypeOpaqueWin32Kmt +#define hipExternalMemoryHandleTypeD3D12Heap cudaExternalMemoryHandleTypeD3D12Heap +#define hipExternalMemoryHandleTypeD3D12Resource cudaExternalMemoryHandleTypeD3D12Resource +#if CUDA_VERSION >= CUDA_10020 +#define hipExternalMemoryHandleTypeD3D11Resource cudaExternalMemoryHandleTypeD3D11Resource +#define hipExternalMemoryHandleTypeD3D11ResourceKmt cudaExternalMemoryHandleTypeD3D11ResourceKmt +#define hipExternalMemoryHandleTypeNvSciBuf cudaExternalMemoryHandleTypeNvSciBuf +#endif + +typedef struct cudaExternalMemoryHandleDesc hipExternalMemoryHandleDesc; +typedef struct cudaExternalMemoryBufferDesc hipExternalMemoryBufferDesc; +typedef cudaExternalMemory_t hipExternalMemory_t; + +typedef enum cudaExternalSemaphoreHandleType hipExternalSemaphoreHandleType; +#define hipExternalSemaphoreHandleTypeOpaqueFd cudaExternalSemaphoreHandleTypeOpaqueFd +#define hipExternalSemaphoreHandleTypeOpaqueWin32 cudaExternalSemaphoreHandleTypeOpaqueWin32 +#define hipExternalSemaphoreHandleTypeOpaqueWin32Kmt cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt +#define hipExternalSemaphoreHandleTypeD3D12Fence cudaExternalSemaphoreHandleTypeD3D12Fence +#if CUDA_VERSION >= CUDA_10020 +#define hipExternalSemaphoreHandleTypeD3D11Fence cudaExternalSemaphoreHandleTypeD3D11Fence +#define hipExternalSemaphoreHandleTypeNvSciSync cudaExternalSemaphoreHandleTypeNvSciSync +#define hipExternalSemaphoreHandleTypeKeyedMutex cudaExternalSemaphoreHandleTypeKeyedMutex +#define hipExternalSemaphoreHandleTypeKeyedMutexKmt cudaExternalSemaphoreHandleTypeKeyedMutexKmt +#endif +#if CUDA_VERSION >= CUDA_11020 +#define hipExternalSemaphoreHandleTypeTimelineSemaphoreFd cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd +#define hipExternalSemaphoreHandleTypeTimelineSemaphoreWin32 cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 +#endif + +typedef struct cudaExternalSemaphoreHandleDesc hipExternalSemaphoreHandleDesc; +typedef cudaExternalSemaphore_t hipExternalSemaphore_t; +typedef struct cudaExternalSemaphoreSignalParams hipExternalSemaphoreSignalParams; +typedef struct cudaExternalSemaphoreWaitParams hipExternalSemaphoreWaitParams; + +typedef enum cudaGLDeviceList hipGLDeviceList; +#define hipGLDeviceListAll cudaGLDeviceListAll +#define hipGLDeviceListCurrentFrame cudaGLDeviceListCurrentFrame +#define hipGLDeviceListNextFrame cudaGLDeviceListNextFrame + +typedef struct cudaGraphicsResource hipGraphicsResource; +typedef cudaGraphicsResource_t hipGraphicsResource_t; + +typedef enum cudaGraphicsRegisterFlags hipGraphicsRegisterFlags; +#define hipGraphicsRegisterFlagsNone cudaGraphicsRegisterFlagsNone +#define hipGraphicsRegisterFlagsReadOnly cudaGraphicsRegisterFlagsReadOnly +#define hipGraphicsRegisterFlagsWriteDiscard cudaGraphicsRegisterFlagsWriteDiscard +#define hipGraphicsRegisterFlagsSurfaceLoadStore cudaGraphicsRegisterFlagsSurfaceLoadStore +#define hipGraphicsRegisterFlagsTextureGather cudaGraphicsRegisterFlagsTextureGather + +/** + * graph types + * + */ +typedef cudaGraph_t hipGraph_t; +typedef cudaGraphNode_t hipGraphNode_t; +typedef cudaGraphExec_t hipGraphExec_t; +typedef cudaUserObject_t hipUserObject_t; + +typedef enum cudaGraphNodeType hipGraphNodeType; +#define hipGraphNodeTypeKernel cudaGraphNodeTypeKernel +#define hipGraphNodeTypeMemcpy cudaGraphNodeTypeMemcpy +#define hipGraphNodeTypeMemset cudaGraphNodeTypeMemset +#define hipGraphNodeTypeHost cudaGraphNodeTypeHost +#define hipGraphNodeTypeGraph cudaGraphNodeTypeGraph +#define hipGraphNodeTypeEmpty cudaGraphNodeTypeEmpty +#define hipGraphNodeTypeWaitEvent cudaGraphNodeTypeWaitEvent +#define hipGraphNodeTypeEventRecord cudaGraphNodeTypeEventRecord +#define hipGraphNodeTypeExtSemaphoreSignal cudaGraphNodeTypeExtSemaphoreSignal +#define hipGraphNodeTypeExtSemaphoreWait cudaGraphNodeTypeExtSemaphoreWait +#define hipGraphNodeTypeMemcpyFromSymbol cudaGraphNodeTypeMemcpyFromSymbol +#define hipGraphNodeTypeMemcpyToSymbol cudaGraphNodeTypeMemcpyToSymbol +#define hipGraphNodeTypeCount cudaGraphNodeTypeCount + +typedef cudaHostFn_t hipHostFn_t; +typedef struct cudaHostNodeParams hipHostNodeParams; +typedef struct cudaKernelNodeParams hipKernelNodeParams; +typedef struct cudaMemsetParams hipMemsetParams; + +#if CUDA_VERSION >= CUDA_11040 +typedef struct cudaMemAllocNodeParams hipMemAllocNodeParams; +#endif + +typedef enum cudaGraphExecUpdateResult hipGraphExecUpdateResult; +#define hipGraphExecUpdateSuccess cudaGraphExecUpdateSuccess +#define hipGraphExecUpdateError cudaGraphExecUpdateError +#define hipGraphExecUpdateErrorTopologyChanged cudaGraphExecUpdateErrorTopologyChanged +#define hipGraphExecUpdateErrorNodeTypeChanged cudaGraphExecUpdateErrorNodeTypeChanged +#define hipGraphExecUpdateErrorFunctionChanged cudaGraphExecUpdateErrorFunctionChanged +#define hipGraphExecUpdateErrorParametersChanged cudaGraphExecUpdateErrorParametersChanged +#define hipGraphExecUpdateErrorNotSupported cudaGraphExecUpdateErrorNotSupported +#define hipGraphExecUpdateErrorUnsupportedFunctionChange \ + cudaGraphExecUpdateErrorUnsupportedFunctionChange + +typedef enum cudaStreamCaptureMode hipStreamCaptureMode; +#define hipStreamCaptureModeGlobal cudaStreamCaptureModeGlobal +#define hipStreamCaptureModeThreadLocal cudaStreamCaptureModeThreadLocal +#define hipStreamCaptureModeRelaxed cudaStreamCaptureModeRelaxed + +typedef enum cudaStreamCaptureStatus hipStreamCaptureStatus; +#define hipStreamCaptureStatusNone cudaStreamCaptureStatusNone +#define hipStreamCaptureStatusActive cudaStreamCaptureStatusActive +#define hipStreamCaptureStatusInvalidated cudaStreamCaptureStatusInvalidated + +typedef union cudaKernelNodeAttrValue hipKernelNodeAttrValue; +typedef enum cudaKernelNodeAttrID hipKernelNodeAttrID; +#define hipKernelNodeAttributeAccessPolicyWindow cudaKernelNodeAttributeAccessPolicyWindow +#define hipKernelNodeAttributeCooperative cudaKernelNodeAttributeCooperative +typedef enum cudaAccessProperty hipAccessProperty; +#define hipAccessPropertyNormal cudaAccessPropertyNormal +#define hipAccessPropertyStreaming cudaAccessPropertyStreaming +#define hipAccessPropertyPersisting cudaAccessPropertyPersisting +typedef struct cudaAccessPolicyWindow hipAccessPolicyWindow; + +typedef enum cudaGraphMemAttributeType hipGraphMemAttributeType; +#define hipGraphMemAttrUsedMemCurrent cudaGraphMemAttrUsedMemCurrent +#define hipGraphMemAttrUsedMemHigh cudaGraphMemAttrUsedMemHigh +#define hipGraphMemAttrReservedMemCurrent cudaGraphMemAttrReservedMemCurrent +#define hipGraphMemAttrReservedMemHigh cudaGraphMemAttrReservedMemHigh + +typedef enum cudaUserObjectFlags hipUserObjectFlags; +#define hipUserObjectNoDestructorSync cudaUserObjectNoDestructorSync + +typedef enum cudaUserObjectRetainFlags hipUserObjectRetainFlags; +#define hipGraphUserObjectMove cudaGraphUserObjectMove + +#if CUDA_VERSION >= CUDA_11030 +typedef enum cudaStreamUpdateCaptureDependenciesFlags hipStreamUpdateCaptureDependenciesFlags; +#define hipStreamAddCaptureDependencies cudaStreamAddCaptureDependencies +#define hipStreamSetCaptureDependencies cudaStreamSetCaptureDependencies +#endif + +#if CUDA_VERSION >= CUDA_11030 +typedef enum cudaGraphDebugDotFlags hipGraphDebugDotFlags; +#define hipGraphDebugDotFlagsVerbose cudaGraphDebugDotFlagsVerbose +#define hipGraphDebugDotFlagsKernelNodeParams cudaGraphDebugDotFlagsKernelNodeParams +#define hipGraphDebugDotFlagsMemcpyNodeParams cudaGraphDebugDotFlagsMemcpyNodeParams +#define hipGraphDebugDotFlagsMemsetNodeParams cudaGraphDebugDotFlagsMemsetNodeParams +#define hipGraphDebugDotFlagsHostNodeParams cudaGraphDebugDotFlagsHostNodeParams +#define hipGraphDebugDotFlagsEventNodeParams cudaGraphDebugDotFlagsEventNodeParams +#define hipGraphDebugDotFlagsExtSemasSignalNodeParams cudaGraphDebugDotFlagsExtSemasSignalNodeParams +#define hipGraphDebugDotFlagsExtSemasWaitNodeParams cudaGraphDebugDotFlagsExtSemasWaitNodeParams +#define hipGraphDebugDotFlagsKernelNodeAttributes cudaGraphDebugDotFlagsKernelNodeAttributes +#define hipGraphDebugDotFlagsHandles cudaGraphDebugDotFlagsHandles +#endif + +#if CUDA_VERSION >= CUDA_10020 +#define hipMemAllocationGranularityMinimum CU_MEM_ALLOC_GRANULARITY_MINIMUM +#define hipMemAllocationGranularityRecommended CU_MEM_ALLOC_GRANULARITY_RECOMMENDED +typedef enum CUmemAllocationGranularity_flags_enum hipMemAllocationGranularity_flags; +typedef enum cudaMemLocationType hipMemLocationType; +#define hipMemLocationTypeInvalid cudaMemLocationTypeInvalid +#define hipMemLocationTypeDevice cudaMemLocationTypeDevice +#define hipMemHandleTypeNone cudaMemHandleTypeNone +#define hipMemHandleTypePosixFileDescriptor cudaMemHandleTypePosixFileDescriptor +#define hipMemHandleTypeWin32 cudaMemHandleTypeWin32 +#define hipMemHandleTypeWin32Kmt cudaMemHandleTypeWin32Kmt +typedef enum cudaMemAllocationType hipMemAllocationType; +#define hipMemAllocationTypeInvalid cudaMemAllocationTypeInvalid +#define hipMemAllocationTypePinned cudaMemAllocationTypePinned +#define hipMemAllocationTypeMax cudaMemAllocationTypeMax +#define hipMemGenericAllocationHandle_t CUmemGenericAllocationHandle +//CUarrayMapInfo mappings +typedef CUarrayMapInfo hipArrayMapInfo; +typedef CUarraySparseSubresourceType hipArraySparseSubresourceType; +#define hipArraySparseSubresourceTypeSparseLevel CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL +#define hipArraySparseSubresourceTypeMiptail CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL +typedef CUmemOperationType hipMemOperationType; +#define hipMemOperationTypeMap CU_MEM_OPERATION_TYPE_MAP +#define hipMemOperationTypeUnmap CU_MEM_OPERATION_TYPE_UNMAP +typedef CUmemHandleType hipMemHandleType; +#define hipMemHandleTypeGeneric CU_MEM_HANDLE_TYPE_GENERIC +// Explicitely declaring hipMemAllocationProp based on CUmemAllocationProp but using CUDA runtime members instead +// Because hipMemAllocationType, hipMemAllocationHandleType & hipMemLocation are defined using CUDA runtime data types & also used by hipMemPoolProps +// Currently there doesn't exist CUDA inbuilt runtime structure corresponding to CUmemAllocationProp +// Need to update this structure accordingly if CUDA updates CUmemAllocationProp +typedef struct hipMemAllocationProp { + /** Memory allocation type */ + hipMemAllocationType type; + /** Requested handle type */ + hipMemAllocationHandleType requestedHandleTypes; + /** Location of allocation */ + hipMemLocation location; + /** + * Windows-specific POBJECT_ATTRIBUTES required when + * ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This object atributes structure + * includes security attributes that define + * the scope of which exported allocations may be tranferred to other + * processes. In all other cases, this field is required to be zero. + */ + void *win32HandleMetaData; + struct { + /** + * Allocation hint for requesting compressible memory. + * On devices that support Compute Data Compression, compressible + * memory can be used to accelerate accesses to data with unstructured + * sparsity and other compressible data patterns. Applications are + * expected to query allocation property of the handle obtained with + * ::cuMemCreate using ::cuMemGetAllocationPropertiesFromHandle to + * validate if the obtained allocation is compressible or not. Note that + * compressed memory may not be mappable on all devices. + */ + unsigned char compressionType; + /** RDMA capable */ + unsigned char gpuDirectRDMACapable; + /** Bitmask indicating intended usage for this allocation */ + unsigned short usage; + unsigned char reserved[4]; + } allocFlags; +} hipMemAllocationProp; +#endif +/** + * Stream CallBack struct + */ +#define HIPRT_CB CUDART_CB +typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); +inline static hipError_t hipInit(unsigned int flags) { + return hipCUResultTohipError(cuInit(flags)); +} + +inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); } + +inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); } + +inline static hipError_t hipPeekAtLastError() { + return hipCUDAErrorTohipError(cudaPeekAtLastError()); +} + +inline static hipError_t hipMalloc(void** ptr, size_t size) { + return hipCUDAErrorTohipError(cudaMalloc(ptr, size)); +} + +inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { + return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height)); +} + +inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){ + return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes)); +} + +inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { + return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent)); +} + +inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); } + +__HIP_DEPRECATED_MSG("use hipHostMalloc instead") +inline static hipError_t hipMallocHost(void** ptr, size_t size) { + return hipCUDAErrorTohipError(cudaMallocHost(ptr, size)); +} + +__HIP_DEPRECATED_MSG("use hipHostMalloc instead") +inline static hipError_t hipMemAllocHost(void** ptr, size_t size) { + return hipCUResultTohipError(cuMemAllocHost(ptr, size)); +} + +__HIP_DEPRECATED_MSG("use hipHostMalloc instead") +inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); +} + +inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); +} + +inline static hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice, + int device) { + return hipCUDAErrorTohipError(cudaMemAdvise(dev_ptr, count, + hipMemoryAdviseTocudaMemoryAdvise(advice), device)); +} + +inline static hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemPrefetchAsync(dev_ptr, count, device, stream)); +} + +inline static hipError_t hipMemRangeGetAttribute(void* data, size_t data_size, + hipMemRangeAttribute attribute, + const void* dev_ptr, size_t count) { + return hipCUDAErrorTohipError(cudaMemRangeGetAttribute(data, data_size, + hipMemRangeAttributeToCudaMemRangeAttribute(attribute), dev_ptr, count)); +} + +inline static hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes, + hipMemRangeAttribute* attributes, + size_t num_attributes, const void* dev_ptr, + size_t count) { + return hipCUDAErrorTohipError(cudaMemRangeGetAttributes(data, data_sizes, attributes, + num_attributes, dev_ptr, count)); +} + +inline static hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t* dev_ptr, + size_t length __dparm(0), + unsigned int flags __dparm(hipMemAttachSingle)) { + return hipCUDAErrorTohipError(cudaStreamAttachMemAsync(stream, dev_ptr, length, flags)); +} + +inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) { + return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags)); +} + +inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, + size_t width, size_t height __dparm(0), + unsigned int flags __dparm(hipArrayDefault)) { + return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags)); +} + +inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc, + hipExtent extent, unsigned int flags) { + return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags)); +} + +inline static hipError_t hipFreeArray(hipArray* array) { + return hipCUDAErrorTohipError(cudaFreeArray(array)); +} + +inline static hipError_t hipMipmappedArrayCreate(hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels) { + return hipCUResultTohipError(cuMipmappedArrayCreate(pHandle, pMipmappedArrayDesc, numMipmapLevels)); +} + +inline static hipError_t hipMipmappedArrayDestroy(hipMipmappedArray_t hMipmappedArray) { + return hipCUResultTohipError(cuMipmappedArrayDestroy(hMipmappedArray)); +} + +inline static hipError_t hipMipmappedArrayGetLevel(hipArray_t* pLevelArray, + hipMipmappedArray_t hMipMappedArray, + unsigned int level) { + return hipCUResultTohipError(cuMipmappedArrayGetLevel((CUarray*)pLevelArray, hMipMappedArray, level)); +} + +inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags)); +} + +inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { + return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr)); +} + +inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) { + return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags)); +} + +inline static hipError_t hipHostUnregister(void* ptr) { + return hipCUDAErrorTohipError(cudaHostUnregister(ptr)); +} + +__HIP_DEPRECATED_MSG("use hipHostFree instead") +inline static hipError_t hipFreeHost(void* ptr) { + return hipCUDAErrorTohipError(cudaFreeHost(ptr)); +} + +inline static hipError_t hipHostFree(void* ptr) { + return hipCUDAErrorTohipError(cudaFreeHost(ptr)); +} + +inline static hipError_t hipSetDevice(int device) { + return hipCUDAErrorTohipError(cudaSetDevice(device)); +} + +inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) { + + if (prop == NULL) { + return hipErrorInvalidValue; + } + + struct cudaDeviceProp cdprop; + memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp)); + cdprop.major = prop->major; + cdprop.minor = prop->minor; + cdprop.totalGlobalMem = prop->totalGlobalMem; + cdprop.sharedMemPerBlock = prop->sharedMemPerBlock; + cdprop.regsPerBlock = prop->regsPerBlock; + cdprop.warpSize = prop->warpSize; + cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock; + cdprop.clockRate = prop->clockRate; + cdprop.totalConstMem = prop->totalConstMem; + cdprop.multiProcessorCount = prop->multiProcessorCount; + cdprop.l2CacheSize = prop->l2CacheSize; + cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor; + cdprop.computeMode = prop->computeMode; + cdprop.canMapHostMemory = prop->canMapHostMemory; + cdprop.memoryClockRate = prop->memoryClockRate; + cdprop.memoryBusWidth = prop->memoryBusWidth; + return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop)); +} + +inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) { + return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size)); +} + +inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) { + return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size)); +} + +inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) { + return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size)); +} + +inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size, + hipStream_t stream) { + return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream)); +} + +inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size, + hipStream_t stream) { + return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream)); +} + +inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size, + hipStream_t stream) { + return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream)); +} + +inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind copyKind) { + return hipCUDAErrorTohipError( + cudaMemcpy(dst, src, sizeBytes, copyKind)); +} + + +inline static hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind copyKind, hipStream_t stream) { + cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes, copyKind, stream); + + if (error != cudaSuccess) return hipCUDAErrorTohipError(error); + + return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); +} + +inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError( + cudaMemcpyAsync(dst, src, sizeBytes, copyKind, stream)); +} + +inline static hipError_t hipMemcpyToSymbol( + const void* symbol, const void* src, size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind copyType __dparm(hipMemcpyKindToCudaMemcpyKind(hipMemcpyHostToDevice))) { + return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, copyType)); +} + +inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, + size_t sizeBytes, size_t offset, + hipMemcpyKind copyType, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync( + symbol, src, sizeBytes, offset, copyType, stream)); +} + +inline static hipError_t hipMemcpyFromSymbol( + void* dst, const void* symbolName, size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost))) { + return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset, kind)); +} + +inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync( + dst, symbolName, sizeBytes, offset, kind, stream)); +} + +inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { + return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName)); +} + +inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) { + return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName)); +} + +inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaMemcpy2D(dst, dpitch, src, spitch, width, height, kind)); +} + +inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + return hipCUResultTohipError(cuMemcpy2D(pCopy)); +} + +inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) { + return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream)); +} + +inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) { + return hipCUDAErrorTohipError(cudaMemcpy3D(p)); +} + +inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream)); +} + +inline static hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) { + return hipCUResultTohipError(cuMemcpy3D(pCopy)); +} + +inline static hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { + return hipCUResultTohipError(cuMemcpy3DAsync(pCopy, stream)); +} + +inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, + kind, stream)); +} + +inline static hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray* src, + size_t wOffset, size_t hOffset, size_t width, + size_t height, hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaMemcpy2DFromArray(dst, dpitch, src, wOffset, hOffset, width, + height, + kind)); +} + +inline static hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray* src, + size_t wOffset, size_t hOffset, size_t width, + size_t height, hipMemcpyKind kind, + hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpy2DFromArrayAsync(dst, dpitch, src, wOffset, hOffset, + width, height, + kind, + stream)); +} + +inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, + height, kind)); +} + +inline static hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, + hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch, + width, height, + kind, + stream)); +} + +__HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, + size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaMemcpyToArray(dst, wOffset, hOffset, src, count, kind)); +} + +__HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, + size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count, + kind)); +} + +inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, + size_t count) { + return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count)); +} + +inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, + size_t count) { + return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count)); +} + +inline static hipError_t hipDeviceSynchronize() { + return hipCUDAErrorTohipError(cudaDeviceSynchronize()); +} + +inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) { + return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig)); +} + +inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) { + return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value)); +} + +inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) { + return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig)); +} + +inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) { + return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config)); +} + +inline static const char* hipGetErrorString(hipError_t error) { + return cudaGetErrorString(hipErrorToCudaError(error)); +} + +inline static const char* hipGetErrorName(hipError_t error) { + return cudaGetErrorName(hipErrorToCudaError(error)); +} + +inline static hipError_t hipDrvGetErrorString(hipError_t error, const char** errorString) { + CUresult err = hipErrorToCUResult(error); + if( err == CUDA_ERROR_UNKNOWN ) { + return hipCUResultTohipError(cuGetErrorString((CUresult)error, errorString)); + } else { + return hipCUResultTohipError(cuGetErrorString(err, errorString)); + } +} + +inline static hipError_t hipDrvGetErrorName(hipError_t error, const char** errorString) { + CUresult err = hipErrorToCUResult(error); + if( err == CUDA_ERROR_UNKNOWN ) { + return hipCUResultTohipError(cuGetErrorName((CUresult)error, errorString)); + } else { + return hipCUResultTohipError(cuGetErrorName(err, errorString)); + } +} + +inline static hipError_t hipGetDeviceCount(int* count) { + return hipCUDAErrorTohipError(cudaGetDeviceCount(count)); +} + +inline static hipError_t hipGetDevice(int* device) { + return hipCUDAErrorTohipError(cudaGetDevice(device)); +} + +inline static hipError_t hipIpcCloseMemHandle(void* devPtr) { + return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr)); +} + +inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) { + return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event)); +} + +inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { + return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr)); +} + +inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) { + return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle)); +} + +inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags)); +} + +inline static hipError_t hipMemset(void* devPtr, int value, size_t count) { + return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); +} + +inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) { + return hipCUResultTohipError(cuMemsetD32(devPtr, value, count)); +} + +inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream)); +} + +inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count, + hipStream_t stream __dparm(0)) { + return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream)); +} + +inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) { + return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes)); +} + +inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes, + hipStream_t stream __dparm(0)) { + return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream)); +} + +inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) { + return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes)); +} + +inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes, + hipStream_t stream __dparm(0)) { + return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream)); +} + +inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { + return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height)); +} + +inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream)); +} + +inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){ + return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent)); +} + +inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){ + return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream)); +} + +inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) { + + if (p_prop == NULL) { + return hipErrorInvalidValue; + } + + struct cudaDeviceProp cdprop; + cudaError_t cerror; + cerror = cudaGetDeviceProperties(&cdprop, device); + + strncpy(p_prop->name, cdprop.name, 256); + p_prop->totalGlobalMem = cdprop.totalGlobalMem; + p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock; + p_prop->regsPerBlock = cdprop.regsPerBlock; + p_prop->warpSize = cdprop.warpSize; + p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock; + for (int i = 0; i < 3; i++) { + p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i]; + p_prop->maxGridSize[i] = cdprop.maxGridSize[i]; + } + p_prop->clockRate = cdprop.clockRate; + p_prop->memoryClockRate = cdprop.memoryClockRate; + p_prop->memoryBusWidth = cdprop.memoryBusWidth; + p_prop->totalConstMem = cdprop.totalConstMem; + p_prop->major = cdprop.major; + p_prop->minor = cdprop.minor; + p_prop->multiProcessorCount = cdprop.multiProcessorCount; + p_prop->l2CacheSize = cdprop.l2CacheSize; + p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor; + p_prop->computeMode = cdprop.computeMode; + p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate: + + int ccVers = p_prop->major * 100 + p_prop->minor * 10; + p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110); + p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110); + p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120); + p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120); + p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200); + p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120); + p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110); + p_prop->arch.hasDoubles = (ccVers >= 130); + p_prop->arch.hasWarpVote = (ccVers >= 120); + p_prop->arch.hasWarpBallot = (ccVers >= 200); + p_prop->arch.hasWarpShuffle = (ccVers >= 300); + p_prop->arch.hasFunnelShift = (ccVers >= 350); + p_prop->arch.hasThreadFenceSystem = (ccVers >= 200); + p_prop->arch.hasSyncThreadsExt = (ccVers >= 200); + p_prop->arch.hasSurfaceFuncs = (ccVers >= 200); + p_prop->arch.has3dGrid = (ccVers >= 200); + p_prop->arch.hasDynamicParallelism = (ccVers >= 350); + + p_prop->concurrentKernels = cdprop.concurrentKernels; + p_prop->pciDomainID = cdprop.pciDomainID; + p_prop->pciBusID = cdprop.pciBusID; + p_prop->pciDeviceID = cdprop.pciDeviceID; + p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor; + p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard; + p_prop->canMapHostMemory = cdprop.canMapHostMemory; + p_prop->gcnArch = 0; // Not a GCN arch + p_prop->integrated = cdprop.integrated; + p_prop->cooperativeLaunch = cdprop.cooperativeLaunch; + p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch; + p_prop->cooperativeMultiDeviceUnmatchedFunc = 0; + p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0; + + p_prop->maxTexture1D = cdprop.maxTexture1D; + p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0]; + p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1]; + p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0]; + p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1]; + p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2]; + + p_prop->memPitch = cdprop.memPitch; + p_prop->textureAlignment = cdprop.textureAlignment; + p_prop->texturePitchAlignment = cdprop.texturePitchAlignment; + p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled; + p_prop->ECCEnabled = cdprop.ECCEnabled; + p_prop->tccDriver = cdprop.tccDriver; + + return hipCUDAErrorTohipError(cerror); +} + +inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + enum cudaDeviceAttr cdattr; + cudaError_t cerror; + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + cdattr = cudaDevAttrMaxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + cdattr = cudaDevAttrMaxBlockDimX; + break; + case hipDeviceAttributeMaxBlockDimY: + cdattr = cudaDevAttrMaxBlockDimY; + break; + case hipDeviceAttributeMaxBlockDimZ: + cdattr = cudaDevAttrMaxBlockDimZ; + break; + case hipDeviceAttributeMaxGridDimX: + cdattr = cudaDevAttrMaxGridDimX; + break; + case hipDeviceAttributeMaxGridDimY: + cdattr = cudaDevAttrMaxGridDimY; + break; + case hipDeviceAttributeMaxGridDimZ: + cdattr = cudaDevAttrMaxGridDimZ; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + cdattr = cudaDevAttrMaxSharedMemoryPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + cdattr = cudaDevAttrTotalConstantMemory; + break; + case hipDeviceAttributeWarpSize: + cdattr = cudaDevAttrWarpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + cdattr = cudaDevAttrMaxRegistersPerBlock; + break; + case hipDeviceAttributeClockRate: + cdattr = cudaDevAttrClockRate; + break; + case hipDeviceAttributeMemoryClockRate: + cdattr = cudaDevAttrMemoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + cdattr = cudaDevAttrGlobalMemoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + cdattr = cudaDevAttrMultiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + cdattr = cudaDevAttrComputeMode; + break; + case hipDeviceAttributeL2CacheSize: + cdattr = cudaDevAttrL2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + cdattr = cudaDevAttrComputeCapabilityMajor; + break; + case hipDeviceAttributeComputeCapabilityMinor: + cdattr = cudaDevAttrComputeCapabilityMinor; + break; + case hipDeviceAttributeConcurrentKernels: + cdattr = cudaDevAttrConcurrentKernels; + break; + case hipDeviceAttributePciBusId: + cdattr = cudaDevAttrPciBusId; + break; + case hipDeviceAttributePciDeviceId: + cdattr = cudaDevAttrPciDeviceId; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + cdattr = cudaDevAttrIsMultiGpuBoard; + break; + case hipDeviceAttributeIntegrated: + cdattr = cudaDevAttrIntegrated; + break; + case hipDeviceAttributeMaxTexture1DWidth: + cdattr = cudaDevAttrMaxTexture1DWidth; + break; + case hipDeviceAttributeMaxTexture2DWidth: + cdattr = cudaDevAttrMaxTexture2DWidth; + break; + case hipDeviceAttributeMaxTexture2DHeight: + cdattr = cudaDevAttrMaxTexture2DHeight; + break; + case hipDeviceAttributeMaxTexture3DWidth: + cdattr = cudaDevAttrMaxTexture3DWidth; + break; + case hipDeviceAttributeMaxTexture3DHeight: + cdattr = cudaDevAttrMaxTexture3DHeight; + break; + case hipDeviceAttributeMaxTexture3DDepth: + cdattr = cudaDevAttrMaxTexture3DDepth; + break; + case hipDeviceAttributeMaxPitch: + cdattr = cudaDevAttrMaxPitch; + break; + case hipDeviceAttributeTextureAlignment: + cdattr = cudaDevAttrTextureAlignment; + break; + case hipDeviceAttributeTexturePitchAlignment: + cdattr = cudaDevAttrTexturePitchAlignment; + break; + case hipDeviceAttributeKernelExecTimeout: + cdattr = cudaDevAttrKernelExecTimeout; + break; + case hipDeviceAttributeCanMapHostMemory: + cdattr = cudaDevAttrCanMapHostMemory; + break; + case hipDeviceAttributeEccEnabled: + cdattr = cudaDevAttrEccEnabled; + break; + case hipDeviceAttributeCooperativeLaunch: + cdattr = cudaDevAttrCooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + cdattr = cudaDevAttrCooperativeMultiDeviceLaunch; + break; + case hipDeviceAttributeConcurrentManagedAccess: + cdattr = cudaDevAttrConcurrentManagedAccess; + break; + case hipDeviceAttributeManagedMemory: + cdattr = cudaDevAttrManagedMemory; + break; + case hipDeviceAttributePageableMemoryAccessUsesHostPageTables: + cdattr = cudaDevAttrPageableMemoryAccessUsesHostPageTables; + break; + case hipDeviceAttributePageableMemoryAccess: + cdattr = cudaDevAttrPageableMemoryAccess; + break; + case hipDeviceAttributeDirectManagedMemAccessFromHost: + cdattr = cudaDevAttrDirectManagedMemAccessFromHost; + break; + case hipDeviceAttributeGlobalL1CacheSupported: + cdattr = cudaDevAttrGlobalL1CacheSupported; + break; + case hipDeviceAttributeMaxBlocksPerMultiProcessor: + cdattr = cudaDevAttrMaxBlocksPerMultiprocessor; + break; + case hipDeviceAttributeMultiGpuBoardGroupID: + cdattr = cudaDevAttrMultiGpuBoardGroupID; + break; + case hipDeviceAttributeReservedSharedMemPerBlock: + cdattr = cudaDevAttrReservedSharedMemoryPerBlock; + break; + case hipDeviceAttributeSingleToDoublePrecisionPerfRatio: + cdattr = cudaDevAttrSingleToDoublePrecisionPerfRatio; + break; + case hipDeviceAttributeStreamPrioritiesSupported: + cdattr = cudaDevAttrStreamPrioritiesSupported; + break; + case hipDeviceAttributeSurfaceAlignment: + cdattr = cudaDevAttrSurfaceAlignment; + break; + case hipDeviceAttributeTccDriver: + cdattr = cudaDevAttrTccDriver; + break; + case hipDeviceAttributeUnifiedAddressing: + cdattr = cudaDevAttrUnifiedAddressing; + break; +#if CUDA_VERSION >= CUDA_11020 + case hipDeviceAttributeMemoryPoolsSupported: + cdattr = cudaDevAttrMemoryPoolsSupported; + break; +#endif // CUDA_VERSION >= CUDA_11020 + case hipDeviceAttributeVirtualMemoryManagementSupported: + return hipCUResultTohipError(cuDeviceGetAttribute(pi, + CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, + device)); + case hipDeviceAttributeAccessPolicyMaxWindowSize: + cdattr = cudaDevAttrMaxAccessPolicyWindowSize; + break; + case hipDeviceAttributeAsyncEngineCount: + cdattr = cudaDevAttrAsyncEngineCount; + break; + case hipDeviceAttributeCanUseHostPointerForRegisteredMem: + cdattr = cudaDevAttrCanUseHostPointerForRegisteredMem; + break; + case hipDeviceAttributeComputePreemptionSupported: + cdattr = cudaDevAttrComputePreemptionSupported; + break; + case hipDeviceAttributeHostNativeAtomicSupported: + cdattr = cudaDevAttrHostNativeAtomicSupported; + break; + default: + return hipCUDAErrorTohipError(cudaErrorInvalidValue); + } + cerror = cudaDeviceGetAttribute(pi, cdattr, device); + return hipCUDAErrorTohipError(cerror); +} +#if CUDA_VERSION >= CUDA_10020 +inline static CUmemAllocationProp hipMemAllocationPropToCUmemAllocationProp(const hipMemAllocationProp* prop) { + CUmemAllocationProp cuProp; + cuProp.type = (CUmemAllocationType)prop->type; + cuProp.requestedHandleTypes = (CUmemAllocationHandleType)prop->requestedHandleTypes; + cuProp.location.type = (CUmemLocationType)prop->location.type; + cuProp.location.id = prop->location.id; + cuProp.win32HandleMetaData = prop->win32HandleMetaData; + cuProp.allocFlags.compressionType = prop->allocFlags.compressionType; + cuProp.allocFlags.gpuDirectRDMACapable = prop->allocFlags.gpuDirectRDMACapable; + cuProp.allocFlags.usage = prop->allocFlags.usage; + cuProp.allocFlags.reserved[0] = prop->allocFlags.reserved[0]; + cuProp.allocFlags.reserved[1] = prop->allocFlags.reserved[1]; + cuProp.allocFlags.reserved[2] = prop->allocFlags.reserved[2]; + cuProp.allocFlags.reserved[3] = prop->allocFlags.reserved[3]; + return cuProp; +} +inline static CUmemLocation hipMemLocationToCUmemLocation(const hipMemLocation* loc) { + CUmemLocation cuLoc; + cuLoc.id = loc->id; + cuLoc.type = (CUmemLocationType)loc->type; + return cuLoc; +} +inline static CUmemAccessDesc hipMemAccessDescToCUmemAccessDesc(const hipMemAccessDesc* desc) { + CUmemAccessDesc cuDesc; + cuDesc.flags = (CUmemAccess_flags)desc->flags; + cuDesc.location.id = (desc->location).id; + cuDesc.location.type = (CUmemLocationType)((desc->location).type); + return cuDesc; +} +inline static hipError_t hipMemGetAllocationGranularity(size_t* granularity, + const hipMemAllocationProp* prop, + hipMemAllocationGranularity_flags option) { + CUmemAllocationProp cuProp = hipMemAllocationPropToCUmemAllocationProp(prop); + return hipCUResultTohipError(cuMemGetAllocationGranularity(granularity, &cuProp, option)); +} +inline static hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, + size_t size, + const hipMemAllocationProp* prop, + unsigned long long flags) { + CUmemAllocationProp cuProp = hipMemAllocationPropToCUmemAllocationProp(prop); + return hipCUResultTohipError(cuMemCreate(handle, size, &cuProp, flags)); +} +inline static hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle) { + return hipCUResultTohipError(cuMemRelease(handle)); +} +inline static hipError_t hipMemAddressFree(hipDeviceptr_t ptr, size_t size) { + return hipCUResultTohipError(cuMemAddressFree(ptr, size)); +} +inline static hipError_t hipMemAddressReserve(hipDeviceptr_t* ptr, + size_t size, + size_t alignment, + hipDeviceptr_t addr, + unsigned long long flags) { + return hipCUResultTohipError(cuMemAddressReserve(ptr, size, alignment, addr, flags)); +} +inline static hipError_t hipMemExportToShareableHandle(void* shareableHandle, + hipMemGenericAllocationHandle_t handle, + hipMemAllocationHandleType handleType, + unsigned long long flags) { + return hipCUResultTohipError(cuMemExportToShareableHandle(shareableHandle, handle, (CUmemAllocationHandleType)handleType, flags)); +} +inline static hipError_t hipMemGetAccess(unsigned long long* flags, + const hipMemLocation* location, + hipDeviceptr_t ptr) { + CUmemLocation loc = hipMemLocationToCUmemLocation(location); + return hipCUResultTohipError(cuMemGetAccess(flags, &loc, ptr)); +} +inline static hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, + hipMemGenericAllocationHandle_t handle) { + CUmemAllocationProp cuProp = hipMemAllocationPropToCUmemAllocationProp(prop); + return hipCUResultTohipError(cuMemGetAllocationPropertiesFromHandle(&cuProp, handle)); +} +inline static hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, + void* osHandle, + hipMemAllocationHandleType shHandleType) { + return hipCUResultTohipError(cuMemImportFromShareableHandle(handle, osHandle, (CUmemAllocationHandleType)shHandleType)); +} +inline static hipError_t hipMemMap(hipDeviceptr_t ptr, size_t size, size_t offset, + hipMemGenericAllocationHandle_t handle, + unsigned long long flags) { + return hipCUResultTohipError(cuMemMap(ptr, size, offset, handle, flags)); +} +inline static hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, + unsigned int count, + hipStream_t stream) { + return hipCUResultTohipError(cuMemMapArrayAsync(mapInfoList, count, stream)); +} +inline static hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, + void* addr) { + return hipCUResultTohipError(cuMemRetainAllocationHandle(handle, addr)); +} +inline static hipError_t hipMemSetAccess(hipDeviceptr_t ptr, size_t size, + const hipMemAccessDesc* desc, + size_t count) { + CUmemAccessDesc cuDesc = hipMemAccessDescToCUmemAccessDesc(desc); + return hipCUResultTohipError(cuMemSetAccess(ptr, size, &cuDesc, count)); +} +inline static hipError_t hipMemUnmap(hipDeviceptr_t ptr, size_t size) { + return hipCUResultTohipError(cuMemUnmap(ptr, size)); +} +#endif // CUDA_VERSION >= CUDA_10020 + +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + const void* func, + int blockSize, + size_t dynamicSMemSize) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, + blockSize, dynamicSMemSize)); +} + +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + const void* func, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, + blockSize, dynamicSMemSize, flags)); +} + +inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + hipFunction_t f, + int blockSize, + size_t dynamicSMemSize ){ + return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, + blockSize, dynamicSMemSize)); +} + +inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + hipFunction_t f, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags ) { + return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f, + blockSize, dynamicSMemSize, flags)); +} + +//TODO - Match CUoccupancyB2DSize +inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit){ + return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL, + dynSharedMemPerBlk, blockSizeLimit)); +} + +//TODO - Match CUoccupancyB2DSize +inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags){ + return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL, + dynSharedMemPerBlk, blockSizeLimit, flags)); +} + +inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { + struct cudaPointerAttributes cPA; + hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr)); + if (err == hipSuccess) { +#if (CUDART_VERSION >= 11000) + auto memType = cPA.type; +#else + unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11 +#endif + switch (memType) { + case cudaMemoryTypeDevice: + attributes->type = hipMemoryTypeDevice; + break; + case cudaMemoryTypeHost: + attributes->type = hipMemoryTypeHost; + break; + case cudaMemoryTypeManaged: + attributes->type = hipMemoryTypeManaged; + break; + default: + return hipErrorInvalidValue; + } + attributes->device = cPA.device; + attributes->devicePointer = cPA.devicePointer; + attributes->hostPointer = cPA.hostPointer; + attributes->isManaged = 0; + attributes->allocationFlags = 0; + } + return err; +} + +inline static hipError_t hipPointerGetAttribute(void* data, hipPointer_attribute attribute, + hipDeviceptr_t ptr) { + return hipCUResultTohipError(cuPointerGetAttribute(data, attribute, ptr)); +} + +inline static hipError_t hipDrvPointerGetAttributes(unsigned int numAttributes, + hipPointer_attribute* attributes, + void** data, hipDeviceptr_t ptr) { + return hipCUResultTohipError(cuPointerGetAttributes(numAttributes, attributes, data, ptr)); +} + +inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) { + return hipCUDAErrorTohipError(cudaMemGetInfo(free, total)); +} + +inline static hipError_t hipEventCreate(hipEvent_t* event) { + return hipCUDAErrorTohipError(cudaEventCreate(event)); +} + +inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) { + return hipCUDAErrorTohipError(cudaEventRecord(event, stream)); +} + +inline static hipError_t hipEventSynchronize(hipEvent_t event) { + return hipCUDAErrorTohipError(cudaEventSynchronize(event)); +} + +inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { + return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop)); +} + +inline static hipError_t hipEventDestroy(hipEvent_t event) { + return hipCUDAErrorTohipError(cudaEventDestroy(event)); +} + +inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) { + return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags)); +} + +inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { + return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority)); +} + +inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority)); +} + +inline static hipError_t hipStreamCreate(hipStream_t* stream) { + return hipCUDAErrorTohipError(cudaStreamCreate(stream)); +} + +inline static hipError_t hipStreamSynchronize(hipStream_t stream) { + return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); +} + +inline static hipError_t hipStreamDestroy(hipStream_t stream) { + return hipCUDAErrorTohipError(cudaStreamDestroy(stream)); +} + +inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { + return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags)); +} + +inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) { + return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority)); +} + +inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags)); +} + +inline static hipError_t hipStreamQuery(hipStream_t stream) { + return hipCUDAErrorTohipError(cudaStreamQuery(stream)); +} + +inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, + void* userData, unsigned int flags) { + return hipCUDAErrorTohipError( + cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags)); +} + +inline static hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device) { + hipCtx_t context; + auto err = hipCUResultTohipError(cuStreamGetCtx(stream, &context)); + if (err != hipSuccess) return err; + + err = hipCUResultTohipError(cuCtxPushCurrent(context)); + if (err != hipSuccess) return err; + + err = hipCUResultTohipError(cuCtxGetDevice(device)); + if (err != hipSuccess) return err; + + return hipCUResultTohipError(cuCtxPopCurrent(&context)); +} + +inline static hipError_t hipDriverGetVersion(int* driverVersion) { + return hipCUDAErrorTohipError(cudaDriverGetVersion(driverVersion)); +} + +inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) { + return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion)); +} + +inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) { + return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice)); +} + +inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) { + return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice)); +} + +inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { + return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags)); +} + +inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { + return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx)); +} + +inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { + return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags)); +} + +inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, + int* active) { + return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active)); +} + +inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { + return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev)); +} + +inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { + return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev)); +} + +inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { + return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev)); +} + +inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { + return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags)); +} + +inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, + hipDeviceptr_t dptr) { + return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr)); +} + +inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t count) { + return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count)); +} + +inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, + int srcDevice, size_t count, + hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError( + cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream)); +} + +// Profile APIs: +inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); } + +inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); } + +inline static hipError_t hipGetDeviceFlags(unsigned int* flags) { + return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags)); +} + +inline static hipError_t hipSetDeviceFlags(unsigned int flags) { + return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags)); +} + +inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) { + return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags)); +} + +inline static hipError_t hipEventQuery(hipEvent_t event) { + return hipCUDAErrorTohipError(cudaEventQuery(event)); +} + +inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) { + return hipCUResultTohipError(cuCtxCreate(ctx, flags, device)); +} + +inline static hipError_t hipCtxDestroy(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxDestroy(ctx)); +} + +inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { + return hipCUResultTohipError(cuCtxPopCurrent(ctx)); +} + +inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxPushCurrent(ctx)); +} + +inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxSetCurrent(ctx)); +} + +inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { + return hipCUResultTohipError(cuCtxGetCurrent(ctx)); +} + +inline static hipError_t hipCtxGetDevice(hipDevice_t* device) { + return hipCUResultTohipError(cuCtxGetDevice(device)); +} + +inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { + return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion)); +} + +inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) { + return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig)); +} + +inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) { + return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig)); +} + +inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { + return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config)); +} + +inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { + return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig)); +} + +inline static hipError_t hipCtxSynchronize(void) { + return hipCUResultTohipError(cuCtxSynchronize()); +} + +inline static hipError_t hipCtxGetFlags(unsigned int* flags) { + return hipCUResultTohipError(cuCtxGetFlags(flags)); +} + +inline static hipError_t hipCtxDetach(hipCtx_t ctx) { + return hipCUResultTohipError(cuCtxDetach(ctx)); +} + +inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) { + return hipCUResultTohipError(cuDeviceGet(device, ordinal)); +} + +inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) { + return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device)); +} + +inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) { + return hipCUResultTohipError(cuDeviceGetName(name, len, device)); +} + +inline static hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device) { + if (uuid == NULL) { + return hipErrorInvalidValue; + } + struct CUuuid_st CUuid; + hipError_t err = hipCUResultTohipError(cuDeviceGetUuid(&CUuid, device)); + if (err == hipSuccess) { + strncpy(uuid->bytes, CUuid.bytes, 16); + } + return err; +} + +inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, + int srcDevice, int dstDevice) { + return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice)); +} + +inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) { + return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device)); +} + +inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) { + return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId)); +} + +inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) { + return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config)); +} + +inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) { + return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config)); +} + +inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { + return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit)); +} + +inline static hipError_t hipDeviceSetLimit(hipLimit_t limit, size_t value) { + return hipCUDAErrorTohipError(cudaDeviceSetLimit(limit, value)); +} + +inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) { + return hipCUResultTohipError(cuDeviceTotalMem(bytes, device)); +} + +inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { + return hipCUResultTohipError(cuModuleLoad(module, fname)); +} + +inline static hipError_t hipModuleUnload(hipModule_t hmod) { + return hipCUResultTohipError(cuModuleUnload(hmod)); +} + +inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, + const char* kname) { + return hipCUResultTohipError(cuModuleGetFunction(function, module, kname)); +} + +inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){ + return hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name)); +} + +inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { + return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func)); +} + +inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) { + return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc)); +} + +inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name) { + return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name)); +} + +inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) { + return hipCUResultTohipError(cuModuleLoadData(module, image)); +} + +inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, + unsigned int numOptions, hipJitOption* options, + void** optionValues) { + return hipCUResultTohipError( + cuModuleLoadDataEx(module, image, numOptions, options, optionValues)); +} + +inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks, + dim3 dimBlocks, void** args, size_t sharedMemBytes, + hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchKernel(function_address, numBlocks, dimBlocks, args, sharedMemBytes, stream)); +} + +inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams, + void** extra) { + return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, stream, + kernelParams, extra)); +} + +inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { + return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig)); +} + +__HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset, + struct textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size __dparm(UINT_MAX)) { + return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); +} + +__HIP_DEPRECATED inline static hipError_t hipBindTexture2D( + size_t* offset, struct textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) { + return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch)); +} + +inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, + hipChannelFormatKind f) { + return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f)); +} + +inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + return hipCUDAErrorTohipError( + cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)); +} + +inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { + return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject)); +} + +inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc)); +} + +inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject)); +} + +inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t textureObject) { + return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject)); +} + +__HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset( + size_t* offset, const struct textureReference* texref) { + return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref)); +} + +inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) +{ + return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); +} + +inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + +inline static hipError_t hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams) { + return hipCUResultTohipError(cuLaunchCooperativeKernel(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, stream,kernelParams)); +} + +inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags)); +} + +inline static hipError_t hipModuleLaunchCooperativeKernelMultiDevice( + hipFunctionLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags) { + return hipCUResultTohipError(cuLaunchCooperativeKernelMultiDevice(launchParamsList, + numDevices, flags)); +} + +inline static hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extSem_out, + const hipExternalSemaphoreHandleDesc* semHandleDesc) { + return hipCUDAErrorTohipError(cudaImportExternalSemaphore(extSem_out,(const struct cudaExternalSemaphoreHandleDesc*)semHandleDesc)); +} + +inline static hipError_t hipSignalExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaSignalExternalSemaphoresAsync(extSemArray, (const struct cudaExternalSemaphoreSignalParams*)paramsArray, numExtSems, stream)); +} +inline static hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaWaitExternalSemaphoresAsync(extSemArray, (const struct cudaExternalSemaphoreWaitParams*)paramsArray, numExtSems, stream)); +} + +inline static hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extSem) { + return hipCUDAErrorTohipError(cudaDestroyExternalSemaphore(extSem)); +} + +inline static hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out, const hipExternalMemoryHandleDesc* memHandleDesc) { + return hipCUDAErrorTohipError(cudaImportExternalMemory(extMem_out, (const struct cudaExternalMemoryHandleDesc*)memHandleDesc)); +} + +inline static hipError_t hipExternalMemoryGetMappedBuffer(void **devPtr, hipExternalMemory_t extMem, const hipExternalMemoryBufferDesc *bufferDesc) { + return hipCUDAErrorTohipError(cudaExternalMemoryGetMappedBuffer(devPtr, extMem, (const struct cudaExternalMemoryBufferDesc*)bufferDesc)); +} + +inline static hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem) { + return hipCUDAErrorTohipError(cudaDestroyExternalMemory(extMem)); +} + +inline static hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, + hipGLDeviceList deviceList) { + return hipCUDAErrorTohipError(cudaGLGetDevices(pHipDeviceCount, pHipDevices, hipDeviceCount, deviceList)); +} + +inline static hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer, unsigned int flags) { + return hipCUDAErrorTohipError(cudaGraphicsGLRegisterBuffer(resource, buffer, flags)); +} + +inline static hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) { + return hipCUDAErrorTohipError(cudaGraphicsGLRegisterImage(resource, image, target, flags)); +} + +inline static hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaGraphicsMapResources(count, resources, stream)); +} + +inline static hipError_t hipGraphicsSubResourceGetMappedArray(hipArray_t* array, hipGraphicsResource_t resource, unsigned int arrayIndex, + unsigned int mipLevel) { + return hipCUDAErrorTohipError(cudaGraphicsSubResourceGetMappedArray(array, resource, arrayIndex, mipLevel)); +} + +inline static hipError_t hipGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, hipGraphicsResource_t resource) { + return hipCUDAErrorTohipError(cudaGraphicsResourceGetMappedPointer(devPtr, size, resource)); +} + +inline static hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources, hipStream_t stream __dparm(0)) { + return hipCUDAErrorTohipError(cudaGraphicsUnmapResources(count, resources, stream)); +} + +inline static hipError_t hipGraphicsUnregisterResource(hipGraphicsResource_t resource) { + return hipCUDAErrorTohipError(cudaGraphicsUnregisterResource(resource)); +} + +#if CUDA_VERSION >= CUDA_11020 +// ========================== HIP Stream Ordered Memory Allocator ================================= +inline static hipError_t hipDeviceGetDefaultMemPool(hipMemPool_t* mem_pool, int device) { + return hipCUDAErrorTohipError(cudaDeviceGetDefaultMemPool(mem_pool, device)); +} + +inline static hipError_t hipDeviceSetMemPool(int device, hipMemPool_t mem_pool) { + return hipCUDAErrorTohipError(cudaDeviceSetMemPool(device, mem_pool)); +} + +inline static hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool, int device) { + return hipCUDAErrorTohipError(cudaDeviceGetMemPool(mem_pool, device)); +} + +inline static hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMallocAsync(dev_ptr, size, stream)); +} + +inline static hipError_t hipFreeAsync(void* dev_ptr, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaFreeAsync(dev_ptr, stream)); +} + +inline static hipError_t hipMemPoolTrimTo(hipMemPool_t mem_pool, size_t min_bytes_to_hold) { + return hipCUDAErrorTohipError(cudaMemPoolTrimTo(mem_pool, min_bytes_to_hold)); +} + +inline static hipError_t hipMemPoolSetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value) { + return hipCUDAErrorTohipError(cudaMemPoolSetAttribute(mem_pool, attr, value)); +} + +inline static hipError_t hipMemPoolGetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value) { + return hipCUDAErrorTohipError(cudaMemPoolGetAttribute(mem_pool, attr, value)); +} + +inline static hipError_t hipMemPoolSetAccess( + hipMemPool_t mem_pool, + const hipMemAccessDesc* desc_list, + size_t count) { + return hipCUDAErrorTohipError(cudaMemPoolSetAccess(mem_pool, desc_list, count)); +} + +inline static hipError_t hipMemPoolGetAccess( + hipMemAccessFlags* flags, + hipMemPool_t mem_pool, + hipMemLocation* location) { + return hipCUDAErrorTohipError(cudaMemPoolGetAccess(flags, mem_pool, location)); +} + +inline static hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_props) { + return hipCUDAErrorTohipError(cudaMemPoolCreate(mem_pool, pool_props)); +} + +inline static hipError_t hipMemPoolDestroy(hipMemPool_t mem_pool) { + return hipCUDAErrorTohipError(cudaMemPoolDestroy(mem_pool)); +} + +inline static hipError_t hipMallocFromPoolAsync( + void** dev_ptr, + size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMallocFromPoolAsync(dev_ptr, size, mem_pool, stream)); +} + +inline static hipError_t hipMemPoolExportToShareableHandle( + void* shared_handle, + hipMemPool_t mem_pool, + hipMemAllocationHandleType handle_type, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaMemPoolExportToShareableHandle( + shared_handle, mem_pool, handle_type, flags)); +} + +inline static hipError_t hipMemPoolImportFromShareableHandle( + hipMemPool_t* mem_pool, + void* shared_handle, + hipMemAllocationHandleType handle_type, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaMemPoolImportFromShareableHandle( + mem_pool, shared_handle, handle_type, flags)); +} + +inline static hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* ptr) { + return hipCUDAErrorTohipError(cudaMemPoolExportPointer(export_data, ptr)); +} + +inline static hipError_t hipMemPoolImportPointer( + void** ptr, + hipMemPool_t mem_pool, + hipMemPoolPtrExportData* export_data) { + return hipCUDAErrorTohipError(cudaMemPoolImportPointer(ptr, mem_pool, export_data)); +} +#endif // CUDA_VERSION >= CUDA_11020 + +#ifdef __cplusplus +} +#endif + +#ifdef __CUDACC__ + +template +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + T func, + int blockSize, + size_t dynamicSMemSize) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, + blockSize, dynamicSMemSize)); +} + +template +inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, + size_t dynamicSMemSize = 0, + int blockSizeLimit = 0) { + return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, + dynamicSMemSize, blockSizeLimit)); +} + +template +inline static hipError_t hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(int* min_grid_size, + int* block_size, + T func, + UnaryFunction block_size_to_dynamic_smem_size, + int block_size_limit = 0, + unsigned int flags = 0) { + return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(min_grid_size, block_size, func, + block_size_to_dynamic_smem_size, block_size_limit,flags)); +} + +template +inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func, + size_t dynamicSMemSize = 0, + int blockSizeLimit = 0, unsigned int flags = 0) { + return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, + dynamicSMemSize, blockSizeLimit, flags)); +} + +template +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func, + int blockSize, size_t dynamicSMemSize,unsigned int flags) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, + blockSize, dynamicSMemSize, flags)); +} + +template +inline static hipError_t hipBindTexture(size_t* offset, const struct texture& tex, + const void* devPtr, size_t size = UINT_MAX) { + return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size)); +} + +template +inline static hipError_t hipBindTexture(size_t* offset, struct texture& tex, + const void* devPtr, const hipChannelFormatDesc& desc, + size_t size = UINT_MAX) { + return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); +} + +template +__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture* tex) { + return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); +} + +template +__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture& tex) { + return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); +} + +template +__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray( + struct texture& tex, hipArray_const_t array, + const hipChannelFormatDesc& desc) { + return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); +} + +template +__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray( + struct texture* tex, hipArray_const_t array, + const hipChannelFormatDesc* desc) { + return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); +} + +template +__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray( + struct texture& tex, hipArray_const_t array) { + return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array)); +} + +template +inline static hipChannelFormatDesc hipCreateChannelDesc() { + return cudaCreateChannelDesc(); +} + +template +inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + +inline static hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { + return hipCUResultTohipError(cuTexObjectCreate((CUtexObject*)pTexObject, pResDesc, pTexDesc, pResViewDesc)); +} + +inline static hipError_t hipTexObjectDestroy(hipTextureObject_t texObject) { + return hipCUResultTohipError(cuTexObjectDestroy((CUtexObject)texObject)); +} + +inline static hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, hipTextureObject_t texObject) { + return hipCUResultTohipError(cuTexObjectGetResourceDesc(pResDesc, (CUtexObject)texObject)); +} + +inline static hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, hipTextureObject_t texObject) { + return hipCUResultTohipError(cuTexObjectGetResourceViewDesc(pResViewDesc, (CUtexObject)texObject)); +} + +inline static hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject) { + return hipCUResultTohipError(cuTexObjectGetTextureDesc(pTexDesc, (CUtexObject)texObject)); +} + +__HIP_DEPRECATED inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){ + return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am)); +} + +__HIP_DEPRECATED inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){ + return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm)); +} + +inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){ + return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes)); +} + +inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){ + return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch)); +} + +__HIP_DEPRECATED inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){ + return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents)); +} + +__HIP_DEPRECATED inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){ + return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags)); +} + +__HIP_DEPRECATED inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){ + return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags)); +} + +inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){ + return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray)); +} + +inline static hipError_t hipArrayDestroy(hiparray hArray){ + return hipCUResultTohipError(cuArrayDestroy(hArray)); +} + +inline static hipError_t hipArray3DCreate(hiparray* pHandle, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray){ + return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray)); +} + +inline static hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, + unsigned int* flags, hipArray* array) { + return hipCUDAErrorTohipError(cudaArrayGetInfo(desc, extent, flags, array)); +} + +inline static hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + return hipCUResultTohipError(cuArrayGetDescriptor(pArrayDescriptor, (CUarray)array)); +} + +inline static hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + return hipCUResultTohipError(cuArray3DGetDescriptor(pArrayDescriptor, (CUarray)array)); +} + +inline static hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode) { + return hipCUDAErrorTohipError(cudaStreamBeginCapture(stream, mode)); +} + +inline static hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph) { + return hipCUDAErrorTohipError(cudaStreamEndCapture(stream, pGraph)); +} + +inline static hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags) { + return hipCUDAErrorTohipError(cudaGraphCreate(pGraph, flags)); +} + +inline static hipError_t hipGraphDestroy(hipGraph_t graph) { + return hipCUDAErrorTohipError(cudaGraphDestroy(graph)); +} + +inline static hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec) { + return hipCUDAErrorTohipError(cudaGraphExecDestroy(pGraphExec)); +} + +inline static hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphNode_t* pErrorNode, char* pLogBuffer, + size_t bufferSize) { + return hipCUDAErrorTohipError( + cudaGraphInstantiate(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize)); +} + +#if CUDA_VERSION >= CUDA_11040 +inline static hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, + unsigned long long flags) { + return hipCUDAErrorTohipError(cudaGraphInstantiateWithFlags(pGraphExec, graph, flags)); +} + +inline hipError_t hipGraphAddMemAllocNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + hipMemAllocNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphAddMemAllocNode( + pGraphNode, graph, pDependencies, numDependencies, pNodeParams)); +} + +inline hipError_t hipGraphMemAllocNodeGetParams(hipGraphNode_t node, + hipMemAllocNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphMemAllocNodeGetParams(node, pNodeParams)); +} + +inline hipError_t hipGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dev_ptr) { + return hipCUDAErrorTohipError(cudaGraphAddMemFreeNode( + pGraphNode, graph, pDependencies, numDependencies, dev_ptr)); +} + +inline hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr) { + return hipCUDAErrorTohipError(cudaGraphMemFreeNodeGetParams(node, dev_ptr)); +} +#endif +inline static hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaGraphLaunch(graphExec, stream)); +} + +inline static hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipKernelNodeParams* pNodeParams) { + return hipCUDAErrorTohipError( + cudaGraphAddKernelNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams)); +} + +inline static hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipMemcpy3DParms* pCopyParams) { + return hipCUDAErrorTohipError( + cudaGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, pCopyParams)); +} + +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dst, const void* src, size_t count, hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaGraphAddMemcpyNode1D(pGraphNode, graph, pDependencies, numDependencies, dst, src, count, kind)); +} +#endif + +inline static hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipMemsetParams* pMemsetParams) { + return hipCUDAErrorTohipError( + cudaGraphAddMemsetNode(pGraphNode, graph, pDependencies, numDependencies, pMemsetParams)); +} + +inline static hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, + size_t* numNodes) { + return hipCUDAErrorTohipError(cudaGraphGetNodes(graph, nodes, numNodes)); +} + +inline static hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, + size_t* pNumRootNodes) { + return hipCUDAErrorTohipError(cudaGraphGetRootNodes(graph, pRootNodes, pNumRootNodes)); +} + +inline static hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, + hipKernelNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphKernelNodeGetParams(node, pNodeParams)); +} + +inline static hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphKernelNodeSetParams(node, pNodeParams)); +} + +inline static hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + const hipKernelNodeAttrValue* value) { + return hipCUDAErrorTohipError(cudaGraphKernelNodeSetAttribute(hNode, attr, value)); +} + +inline static hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + hipKernelNodeAttrValue* value) { + return hipCUDAErrorTohipError(cudaGraphKernelNodeGetAttribute(hNode, attr, value)); +} + +inline static hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphMemcpyNodeGetParams(node, pNodeParams)); +} + +inline static hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, + const hipMemcpy3DParms* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphMemcpyNodeSetParams(node, pNodeParams)); +} + +inline static hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, + hipMemsetParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphMemsetNodeGetParams(node, pNodeParams)); +} + +inline static hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, + const hipMemsetParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphMemsetNodeSetParams(node, pNodeParams)); +} + +inline static hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode) { + return hipCUDAErrorTohipError(cudaThreadExchangeStreamCaptureMode(mode)); +} + +inline static hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams)); +} + +inline static hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies) { + return hipCUDAErrorTohipError(cudaGraphAddDependencies(graph, from, to, numDependencies)); +} + +inline static hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies) { + return hipCUDAErrorTohipError( + cudaGraphAddEmptyNode(pGraphNode, graph, pDependencies, numDependencies)); +} + +inline static hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value, + unsigned int flags) { + if (value < 0) { + printf("Warning! value is negative, CUDA accept positive values\n"); + } + return hipCUResultTohipError(cuStreamWriteValue32(stream, reinterpret_cast(ptr), + static_cast(value), flags)); +} + +inline static hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value, + unsigned int flags) { + if (value < 0) { + printf("Warning! value is negative, CUDA accept positive values\n"); + } + return hipCUResultTohipError(cuStreamWriteValue64(stream, reinterpret_cast(ptr), + static_cast(value), flags)); +} + +inline static hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, + unsigned int flags, + uint32_t mask __dparm(0xFFFFFFFF)) { + if (value < 0) { + printf("Warning! value is negative, CUDA accept positive values\n"); + } + if (mask != STREAM_OPS_WAIT_MASK_32) { + printf("Warning! mask will not have impact as CUDA ignores it.\n"); + } + return hipCUResultTohipError(cuStreamWaitValue32(stream, reinterpret_cast(ptr), + static_cast(value), flags)); +} + +inline static hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, + unsigned int flags, + uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)) { + if (value < 0) { + printf("Warning! value is negative, CUDA accept positive values\n"); + } + if (mask != STREAM_OPS_WAIT_MASK_64) { + printf("Warning! mask will not have impact as CUDA ignores it.\n"); + } + return hipCUResultTohipError(cuStreamWaitValue64(stream, reinterpret_cast(ptr), + static_cast(value), flags)); +} + +inline static hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, + size_t numDependencies) { + return hipCUDAErrorTohipError(cudaGraphRemoveDependencies(graph, from, to, numDependencies)); +} + + +inline static hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, + hipGraphNode_t* to, size_t* numEdges) { + return hipCUDAErrorTohipError(cudaGraphGetEdges(graph, from, to, numEdges)); +} + +inline static hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, + hipGraphNode_t* pDependencies, + size_t* pNumDependencies) { + return hipCUDAErrorTohipError( + cudaGraphNodeGetDependencies(node, pDependencies, pNumDependencies)); +} + +inline static hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, + hipGraphNode_t* pDependentNodes, + size_t* pNumDependentNodes) { + return hipCUDAErrorTohipError( + cudaGraphNodeGetDependentNodes(node, pDependentNodes, pNumDependentNodes)); +} + +inline static hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType) { + return hipCUDAErrorTohipError(cudaGraphNodeGetType(node, pType)); +} + +inline static hipError_t hipGraphDestroyNode(hipGraphNode_t node) { + return hipCUDAErrorTohipError(cudaGraphDestroyNode(node)); +} + +inline static hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph) { + return hipCUDAErrorTohipError(cudaGraphClone(pGraphClone, originalGraph)); +} + +inline static hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, + hipGraph_t clonedGraph) { + return hipCUDAErrorTohipError(cudaGraphNodeFindInClone(pNode, originalNode, clonedGraph)); +} + +inline static hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, hipGraph_t childGraph) { + return hipCUDAErrorTohipError( + cudaGraphAddChildGraphNode(pGraphNode, graph, pDependencies, numDependencies, childGraph)); +} + +inline static hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph) { + return hipCUDAErrorTohipError(cudaGraphChildGraphNodeGetGraph(node, pGraph)); +} + +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + hipGraph_t childGraph) { + return hipCUDAErrorTohipError( + cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph)); +} +#endif + +inline static hipError_t hipStreamGetCaptureInfo(hipStream_t stream, + hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId) { + return hipCUDAErrorTohipError(cudaStreamGetCaptureInfo(stream, pCaptureStatus, pId)); +} + +#if CUDA_VERSION >= CUDA_11030 +inline static hipError_t hipStreamGetCaptureInfo_v2( + hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out __dparm(0), hipGraph_t* graph_out __dparm(0), + const hipGraphNode_t** dependencies_out __dparm(0), size_t* numDependencies_out __dparm(0)) { + return hipCUDAErrorTohipError(cudaStreamGetCaptureInfo_v2( + stream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out)); +} +#endif + +inline static hipError_t hipStreamIsCapturing(hipStream_t stream, + hipStreamCaptureStatus* pCaptureStatus) { + return hipCUDAErrorTohipError(cudaStreamIsCapturing(stream, pCaptureStatus)); +} + +#if CUDA_VERSION >= CUDA_11030 +inline static hipError_t hipStreamUpdateCaptureDependencies(hipStream_t stream, + hipGraphNode_t* dependencies, + size_t numDependencies, + unsigned int flags __dparm(0)) { + return hipCUDAErrorTohipError(cudaStreamUpdateCaptureDependencies(stream, dependencies, + numDependencies, flags)); +} +#endif + +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, hipEvent_t event) { + return hipCUDAErrorTohipError( + cudaGraphAddEventRecordNode(pGraphNode, graph, pDependencies, numDependencies, event)); +} + +inline static hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, hipEvent_t event) { + return hipCUDAErrorTohipError( + cudaGraphAddEventWaitNode(pGraphNode, graph, pDependencies, numDependencies, event)); +} +#endif + +inline static hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, + const hipHostNodeParams* pNodeParams) { + return hipCUDAErrorTohipError( + cudaGraphAddHostNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams)); +} + +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, + hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dst, + const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaGraphAddMemcpyNodeFromSymbol( + pGraphNode, graph, pDependencies, numDependencies, dst, symbol, count, offset, kind)); +} + +inline static hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaGraphAddMemcpyNodeToSymbol( + pGraphNode, graph, pDependencies, numDependencies, symbol, src, count, offset, kind)); +} + +inline static hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { + return hipCUDAErrorTohipError(cudaGraphEventRecordNodeSetEvent(node, event)); +} + +inline static hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) { + return hipCUDAErrorTohipError(cudaGraphEventWaitNodeGetEvent(node, event_out)); +} + +inline static hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { + return hipCUDAErrorTohipError(cudaGraphEventWaitNodeSetEvent(node, event)); +} +#endif + +inline static hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + const hipHostNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams)); +} + +inline static hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams)); +} + +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, + hipGraphNode_t node, void* dst, + const void* src, size_t count, + hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, count, kind)); +} + +inline static hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, + hipGraphNode_t node, void* dst, + const void* symbol, size_t count, + size_t offset, + hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaGraphExecMemcpyNodeSetParamsFromSymbol( + hGraphExec, node, dst, symbol, count, offset, kind)); +} + +inline static hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( + hipGraphExec_t hGraphExec, hipGraphNode_t node, const void* symbol, const void* src, + size_t count, size_t offset, hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaGraphExecMemcpyNodeSetParamsToSymbol( + hGraphExec, node, symbol, src, count, offset, kind)); +} +#endif + +inline static hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, + hipGraphNode_t node, + const hipMemsetParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams)); +} + +inline static hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, + hipGraphNode_t* hErrorNode_out, + hipGraphExecUpdateResult* updateResult_out) { + return hipCUDAErrorTohipError( + cudaGraphExecUpdate(hGraphExec, hGraph, hErrorNode_out, updateResult_out)); +} + +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, + const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaGraphMemcpyNodeSetParamsFromSymbol(node, dst, symbol, count, offset, kind)); +} + +inline static hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, + const void* symbol, const void* src, + size_t count, size_t offset, + hipMemcpyKind kind) { + return hipCUDAErrorTohipError( + cudaGraphMemcpyNodeSetParamsToSymbol(node, symbol, src, count, offset, kind)); +} + +inline static hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, + hipEvent_t* event_out) { + return hipCUDAErrorTohipError(cudaGraphEventRecordNodeGetEvent(node, event_out)); +} +#endif + +inline static hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, + hipHostNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphHostNodeGetParams(node, pNodeParams)); +} + +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, + const void* src, size_t count, + hipMemcpyKind kind) { + return hipCUDAErrorTohipError(cudaGraphMemcpyNodeSetParams1D(node, dst, src, count, kind)); +} + +inline static hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, + hipGraphNode_t hNode, + hipEvent_t event) { + return hipCUDAErrorTohipError(cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event)); +} + +inline static hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, + hipGraphNode_t hNode, hipEvent_t event) { + return hipCUDAErrorTohipError(cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event)); +} + +inline static hipError_t hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value) { + return hipCUDAErrorTohipError(cudaDeviceGetGraphMemAttribute(device, attr, value)); +} + +inline static hipError_t hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value) { + return hipCUDAErrorTohipError(cudaDeviceSetGraphMemAttribute(device, attr, value)); +} + +inline static hipError_t hipDeviceGraphMemTrim(int device) { + return hipCUDAErrorTohipError(cudaDeviceGraphMemTrim(device)); +} + +inline static hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData) { + return hipCUDAErrorTohipError(cudaLaunchHostFunc(stream, fn, userData)); +} + +inline static hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, + unsigned int initialRefcount, unsigned int flags) { + return hipCUDAErrorTohipError(cudaUserObjectCreate(object_out, ptr, destroy, initialRefcount, flags)); +} + + +inline static hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count __dparm(1)) { + return hipCUDAErrorTohipError(cudaUserObjectRelease(object, count)); +} + + +inline static hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count __dparm(1)) { + return hipCUDAErrorTohipError(cudaUserObjectRelease(object, count)); +} + +inline static hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count __dparm(1), unsigned int flags __dparm(0)) { + return hipCUDAErrorTohipError(cudaGraphRetainUserObject(graph, object, count, flags)); +} + +inline static hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count __dparm(1)) { + return hipCUDAErrorTohipError(cudaGraphReleaseUserObject(graph, object, count)); +} +#endif + +inline static hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, + const hipHostNodeParams* pNodeParams) { + return hipCUDAErrorTohipError(cudaGraphHostNodeSetParams(node, pNodeParams)); +} +#if CUDA_VERSION >= CUDA_11030 +inline static hipError_t hipGraphDebugDotPrint(hipGraph_t graph, const char* path, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaGraphDebugDotPrint(graph, path, flags)); +} +#endif +#if CUDA_VERSION >= CUDA_11000 +inline static hipError_t hipGraphKernelNodeCopyAttributes(hipGraphNode_t hSrc, + hipGraphNode_t hDst) { + return hipCUDAErrorTohipError(cudaGraphKernelNodeCopyAttributes(hSrc, hDst)); +} +#endif +#if CUDA_VERSION >= CUDA_11060 +inline static hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int isEnabled) { + return hipCUDAErrorTohipError(cudaGraphNodeSetEnabled(hGraphExec, hNode, isEnabled)); +} + +inline static hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int* isEnabled) { + return hipCUDAErrorTohipError(cudaGraphNodeGetEnabled(hGraphExec, hNode, isEnabled)); +} +#endif +#if CUDA_VERSION >= CUDA_11010 +inline static hipError_t hipGraphUpload(hipGraphExec_t graphExec, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaGraphUpload(graphExec, stream)); +} +#endif +#endif //__CUDACC__ + +#endif // HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h new file mode 100644 index 0000000000..df374d705a --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h @@ -0,0 +1,6 @@ +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_TEXTURE_TYPES_H + +#include + +#endif diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_unsafe_atomics.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_unsafe_atomics.h new file mode 100644 index 0000000000..993f17507b --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hip_unsafe_atomics.h @@ -0,0 +1,100 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_UNSAFE_ATOMICS_H +#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_UNSAFE_ATOMICS_H + +__device__ inline float unsafeAtomicAdd(float* addr, float value) { + return atomicAdd(addr, value); +} + +__device__ inline double unsafeAtomicAdd(double* addr, double value) { +#if __CUDA_ARCH__ < 600 + unsigned long long *addr_cast = (unsigned long long*)addr; + unsigned long long old_val = *addr_cast; + unsigned long long expected; + do { + expected = old_val; + old_val = atomicCAS(addr_cast, expected, + __double_as_longlong(value + + __longlong_as_double(expected))); + } while (__double_as_longlong(expected) != __double_as_longlong(old_val)); + return old_val; +#else + return atomicAdd(addr, value); +#endif +} + +__device__ inline float unsafeAtomicMax(float* addr, float value) { + return atomicMax(addr, value); +} + +__device__ inline double unsafeAtomicMax(double* addr, double val) { + return atomicMax(addr, val); +} + +__device__ inline float unsafeAtomicMin(float* addr, float value) { + return atomicMin(addr, value); +} + +__device__ inline double unsafeAtomicMin(double* addr, double val) { + return atomicMin(addr, val); +} + +__device__ inline float safeAtomicAdd(float* addr, float value) { + return atomicAdd(addr, value); +} + +__device__ inline double safeAtomicAdd(double* addr, double value) { +#if __CUDA_ARCH__ < 600 + unsigned long long *addr_cast = (unsigned long long*)addr; + unsigned long long old_val = *addr_cast; + unsigned long long expected; + do { + expected = old_val; + old_val = atomicCAS(addr_cast, expected, + __double_as_longlong(value + + __longlong_as_double(expected))); + } while (__double_as_longlong(expected) != __double_as_longlong(old_val)); + return old_val; +#else + return atomicAdd(addr, value); +#endif +} + +__device__ inline float safeAtomicMax(float* addr, float value) { + return atomicMax(addr, value); +} + +__device__ inline double safeAtomicMax(double* addr, double val) { + return atomicMax(addr, val); +} + +__device__ inline float safeAtomicMin(float* addr, float value) { + return atomicMin(addr, value); +} + +__device__ inline double safeAtomicMin(double* addr, double val) { + return atomicMin(addr, val); +} + +#endif diff --git a/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h new file mode 100644 index 0000000000..68864e75c8 --- /dev/null +++ b/projects/clr/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h @@ -0,0 +1,172 @@ +/* +Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef HIPRTC_H +#define HIPRTC_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include + +#if !defined(_WIN32) +#pragma GCC visibility push(default) +#endif + +typedef enum hiprtcResult { + HIPRTC_SUCCESS = 0, + HIPRTC_ERROR_OUT_OF_MEMORY = 1, + HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, + HIPRTC_ERROR_INVALID_INPUT = 3, + HIPRTC_ERROR_INVALID_PROGRAM = 4, + HIPRTC_ERROR_INVALID_OPTION = 5, + HIPRTC_ERROR_COMPILATION = 6, + HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, + HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, + HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, + HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, + HIPRTC_ERROR_INTERNAL_ERROR = 11 +} hiprtcResult; + +inline static nvrtcResult hiprtcResultTonvrtcResult(hiprtcResult result) { + switch (result) { + case HIPRTC_SUCCESS: + return NVRTC_SUCCESS; + case HIPRTC_ERROR_OUT_OF_MEMORY: + return NVRTC_ERROR_OUT_OF_MEMORY; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + return NVRTC_ERROR_PROGRAM_CREATION_FAILURE; + case HIPRTC_ERROR_INVALID_INPUT: + return NVRTC_ERROR_INVALID_INPUT; + case HIPRTC_ERROR_INVALID_PROGRAM: + return NVRTC_ERROR_INVALID_PROGRAM; + case HIPRTC_ERROR_INVALID_OPTION: + return NVRTC_ERROR_INVALID_OPTION; + case HIPRTC_ERROR_COMPILATION: + return NVRTC_ERROR_COMPILATION; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + return NVRTC_ERROR_BUILTIN_OPERATION_FAILURE; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + return NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + return NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + return NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID; + case HIPRTC_ERROR_INTERNAL_ERROR: + return NVRTC_ERROR_INTERNAL_ERROR; + default: + return NVRTC_ERROR_INTERNAL_ERROR; + } +} + +inline static hiprtcResult nvrtcResultTohiprtcResult(nvrtcResult result) { + switch (result) { + case NVRTC_SUCCESS: + return HIPRTC_SUCCESS; + case NVRTC_ERROR_OUT_OF_MEMORY: + return HIPRTC_ERROR_OUT_OF_MEMORY; + case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: + return HIPRTC_ERROR_PROGRAM_CREATION_FAILURE; + case NVRTC_ERROR_INVALID_INPUT: + return HIPRTC_ERROR_INVALID_INPUT; + case NVRTC_ERROR_INVALID_PROGRAM: + return HIPRTC_ERROR_INVALID_PROGRAM; + case NVRTC_ERROR_INVALID_OPTION: + return HIPRTC_ERROR_INVALID_OPTION; + case NVRTC_ERROR_COMPILATION: + return HIPRTC_ERROR_COMPILATION; + case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: + return HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE; + case NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + return HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION; + case NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + return HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION; + case NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; + case NVRTC_ERROR_INTERNAL_ERROR: + return HIPRTC_ERROR_INTERNAL_ERROR; + default: + return HIPRTC_ERROR_INTERNAL_ERROR; + } +} + +inline static const char* hiprtcGetErrorString(hiprtcResult result) { + return nvrtcGetErrorString(hiprtcResultTonvrtcResult(result)); +} + +inline static hiprtcResult hiprtcVersion(int* major, int* minor) { + return nvrtcResultTohiprtcResult(nvrtcVersion(major, minor)); +} + +typedef nvrtcProgram hiprtcProgram; + +inline static hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) { + return nvrtcResultTohiprtcResult(nvrtcAddNameExpression(prog, name_expression)); +} + +inline static hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { + return nvrtcResultTohiprtcResult(nvrtcCompileProgram(prog, numOptions, options)); +} + +inline static hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char** headers, const char** includeNames) { + return nvrtcResultTohiprtcResult( + nvrtcCreateProgram(prog, src, name, numHeaders, headers, includeNames)); +} + +inline static hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { + return nvrtcResultTohiprtcResult(nvrtcDestroyProgram(prog)); +} + +inline static hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** lowered_name) { + return nvrtcResultTohiprtcResult(nvrtcGetLoweredName(prog, name_expression, lowered_name)); +} + +inline static hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* log) { + return nvrtcResultTohiprtcResult(nvrtcGetProgramLog(prog, log)); +} + +inline static hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { + return nvrtcResultTohiprtcResult(nvrtcGetProgramLogSize(prog, logSizeRet)); +} + +inline static hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code) { + return nvrtcResultTohiprtcResult(nvrtcGetPTX(prog, code)); +} + +inline static hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet) { + return nvrtcResultTohiprtcResult(nvrtcGetPTXSize(prog, codeSizeRet)); +} + +#if !defined(_WIN32) +#pragma GCC visibility pop +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif // HIPRTC_H diff --git a/projects/clr/hipamd/install.sh b/projects/clr/hipamd/install.sh new file mode 100755 index 0000000000..aae947f9ab --- /dev/null +++ b/projects/clr/hipamd/install.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Parse command-line options +# Option strings +SHORT=h +LONG=help,opencl:,hip:,rocclr: +# read the options +OPTS=$(getopt --options $SHORT --long $LONG --name "$0" -- "$@") +if [ $? != 0 ] ; then echo "Failed to parse options...exiting." >&2 ; exit 1 ; fi + +usage() { + echo "Usage: $0 --hip --opencl --rocclr " ; + exit 1; +} + +[ $# -eq 0 ] && usage + +eval set -- "$OPTS" + +# extract options and their arguments into variables. +while true ; do + case "$1" in + --hip ) + HIP_DIR="$2" + shift 2 + ;; + --rocclr ) + ROCCLR_DIR="$2" + shift 2 + ;; + --opencl ) + OPENCL_DIR="$2" + shift 2 + ;; + -h | --help ) + usage + shift + ;; + -- ) + shift + break + ;; + *) + echo "Internal error!" + exit 1 + ;; + esac +done + +BUILD_ROOT="$( mktemp -d )" +SRC_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +WORKING_DIR=$PWD +DASH_JAY="-j $(getconf _NPROCESSORS_ONLN)" +OS_NAME="$(cat /etc/os-release | awk -F '=' '/^NAME/{print $2}' | awk '{print $1}' | tr -d '"')" +[[ -z "$ROCM_PATH" ]] && ROCM_PATH=/opt/rocm + +err() { + echo "${1-Died}." >&2 +} + +die() { + err "$1" + exit 1 +} + +pushd () { + command pushd "$@" > /dev/null +} + +popd () { + command popd "$@" > /dev/null +} + +function setupENV() +{ + if [ "$OS_NAME" == "Ubuntu" ] + then + sudo apt-get update + sudo apt-get install dpkg-dev rpm doxygen libelf-dev rename liburi-encode-perl \ + libfile-basedir-perl libfile-copy-recursive-perl libfile-listing-perl + elif [ "$OS_NAME" == "CentOS" ] + then + yum install dpkg-dev rpm-build doxygen elfutils-libelf-devel prename \ + perl-URI-Encode perl-File-Listing perl-File-BaseDir + fi +} + +function buildHIP() +{ + pushd $BUILD_ROOT + HIP_BUILD_DIR="$BUILD_ROOT/hip_build" + mkdir $HIP_BUILD_DIR + pushd $HIP_BUILD_DIR + cmake $SRC_ROOT -DHIP_COMMON_DIR="$HIP_DIR" -DAMD_OPENCL_PATH=$OPENCL_DIR -DROCCLR_PATH=$ROCCLR_DIR -DCMAKE_PREFIX_PATH="$ROCM_PATH" -DCMAKE_BUILD_TYPE=Release + make $DASH_JAY + make package + if [ "$OS_NAME" == "Ubuntu" ] + then + cp hip-*.deb $WORKING_DIR + sudo dpkg -i -B hip-dev*.deb hip-runtime-amd*.deb hip-sample*.deb hip-doc*.deb + elif [ "$OS_NAME" == "CentOS" ] + then + cp hip-*.rpm $WORKING_DIR + sudo rpm -ivh --replacefiles --force hip-devel*.rpm hip-runtime-amd*.rpm hip-sample*.rpm \ + hip-doc*.rpm + fi + popd + popd + rm -rf $BUILD_ROOT +} + +echo "Preparing build environment" +setupENV || die "setupENV failed" +echo "Building and installing HIP packages" +buildHIP || die "buildHIP failed" +echo "Finished building HIP packages" diff --git a/projects/clr/hipamd/packaging/CMakeLists.txt b/projects/clr/hipamd/packaging/CMakeLists.txt new file mode 100644 index 0000000000..92118aea82 --- /dev/null +++ b/projects/clr/hipamd/packaging/CMakeLists.txt @@ -0,0 +1,251 @@ +# Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.16.8) + +#set components for HIP +set(CPACK_COMPONENTS_ALL binary dev doc samples runtime-nvidia) + +###############Install Required files for all compnents######## + +#Enable Component Install +set(CPACK_RPM_COMPONENT_INSTALL ON) +set(CPACK_DEB_COMPONENT_INSTALL ON) + +###Set License#### +set(CPACK_RESOURCE_FILE_LICENSE ${hip_SOURCE_DIR}/LICENSE.txt) +install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT binary) +set(CPACK_RPM_PACKAGE_LICENSE "MIT") + +#Begin binary files install +if(HIP_PLATFORM STREQUAL "amd" ) + if(BUILD_SHARED_LIBS) + install(FILES ${CMAKE_BINARY_DIR}/lib/libamdhip64.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc-builtins.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc-builtins.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc-builtins.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + else() + install(FILES ${CMAKE_BINARY_DIR}/lib/libamdhip64.a DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + endif()#End BUILD_SHARED_LIBS + +#TODO:This do not belong in BINARY package. +#Keeping it as is for now +install(FILES ${CMAKE_BINARY_DIR}/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + +install(FILES ${CMAKE_BINARY_DIR}/hip-config.cmake ${CMAKE_BINARY_DIR}/hip-config-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT binary) +install ( EXPORT hip-targets FILE hip-targets.cmake NAMESPACE hip:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT binary) + +install(FILES ${CMAKE_BINARY_DIR}/src/hip-lang-config.cmake ${CMAKE_BINARY_DIR}/src/hip-lang-config-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip-lang COMPONENT binary) +install ( EXPORT hip-lang-targets FILE hip-lang-targets.cmake NAMESPACE hip-lang:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip-lang COMPONENT binary) + +install(FILES ${CMAKE_BINARY_DIR}/hiprtc-config.cmake ${CMAKE_BINARY_DIR}/hiprtc-config-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hiprtc COMPONENT binary) +install ( EXPORT hiprtc-targets FILE hiprtc-targets.cmake NAMESPACE hiprtc:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hiprtc COMPONENT binary) + +endif()#End HIP_PLATFORM = "amd" +#End bianry files install + +#Begin dev files install +if(WIN32) + install(DIRECTORY ${HIP_COMMON_DIR}/bin DESTINATION . COMPONENT dev + USE_SOURCE_PERMISSIONS) +else() + install(DIRECTORY ${HIP_COMMON_DIR}/bin DESTINATION . COMPONENT dev + USE_SOURCE_PERMISSIONS + DIRECTORY_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + PATTERN *.bat EXCLUDE) +endif() + +install(DIRECTORY ${hip_SOURCE_DIR}/bin DESTINATION . COMPONENT dev + USE_SOURCE_PERMISSIONS + DIRECTORY_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) +install(DIRECTORY ${HIP_COMMON_DIR}/include DESTINATION . COMPONENT dev) +install(DIRECTORY ${hip_SOURCE_DIR}/include/hip/amd_detail + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev) +install(DIRECTORY ${hip_SOURCE_DIR}/include/hip/nvidia_detail + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev) +install(FILES ${CMAKE_BINARY_DIR}/include/hip/amd_detail/hip_prof_str.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/amd_detail COMPONENT dev) +install(FILES ${CMAKE_BINARY_DIR}/include/hip/hip_version.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev) +install(FILES ${CMAKE_BINARY_DIR}/.hipVersion DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT dev) +install(DIRECTORY ${HIP_COMMON_DIR}/cmake/ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT dev) +#End dev files install + +#Begin doc files install +find_program(DOXYGEN_EXE doxygen) +if(DOXYGEN_EXE) + add_custom_target(build_doxygen ALL + COMMAND HIP_PATH=${HIP_COMMON_DIR} doxygen ${HIP_COMMON_DIR}/docs/doxygen-input/doxy.cfg) + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/RuntimeAPI/html + DESTINATION ${CMAKE_INSTALL_DOCDIR}/RuntimeAPI COMPONENT doc) +endif() +#End doc files install + +#Begin samples files install +install(DIRECTORY ${HIP_COMMON_DIR}/samples DESTINATION ${CMAKE_INSTALL_DATADIR}/hip COMPONENT samples) +#End samples files install + + +################################## +# Packaging steps COMMON Variables +################################## +set(CPACK_SET_DESTDIR TRUE) + +set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") +set(CPACK_PACKAGE_CONTACT "HIP Support ") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP:Heterogenous-computing Interface for Portability") +set(CPACK_PACKAGE_VERSION_MAJOR ${HIP_VERSION_MAJOR}) +set(CPACK_PACKAGE_VERSION_MINOR ${HIP_VERSION_MINOR}) +set(CPACK_PACKAGE_VERSION_PATCH ${HIP_VERSION_PATCH}) +set(CPACK_PACKAGE_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_PACKAGING_VERSION_PATCH}) +set(CPACK_GENERATOR "TGZ;DEB;RPM" CACHE STRING "Package types to build") + +set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") +if (CPACK_RPM_PACKAGE_RELEASE MATCHES "local" ) + #If building locally default value will cause build failure + #DEBUG SYMBOL pacaking require SOURCE_DIR to be small + set(CPACK_RPM_BUILD_SOURCE_DIRS_PREFIX ${CPACK_INSTALL_PREFIX}) +endif() +set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") +set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") + +set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") + +set(CPACK_SOURCE_GENERATOR "TGZ") + + +#Begin Binary Packaging setting + +set(CPACK_BINARY_DEB "ON") +set(CPACK_BINARY_RPM "ON") + +set(CPACK_DEBIAN_BINARY_PACKAGE_NAME "hip-runtime-amd") +set(CPACK_RPM_BINARY_PACKAGE_NAME "hip-runtime-amd") + +set(CPACK_COMPONENT_BINARY_DESCRIPTION "HIP:Heterogenous-computing Interface for Portability [RUNTIME - AMD]") +if(FILE_REORG_BACKWARD_COMPATIBILITY) +#This is used for softlinking hip-target files + configure_file(hip-runtime-amd.postinst ${CMAKE_CURRENT_BINARY_DIR}/binary/postinst @ONLY) + configure_file(hip-runtime-amd.prerm ${CMAKE_CURRENT_BINARY_DIR}/binary/prerm @ONLY) + set(CPACK_DEBIAN_BINARY_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/binary/postinst;${CMAKE_CURRENT_BINARY_DIR}/binary/prerm") +endif() +set(CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS "hsa-rocr-dev (>= 1.3), rocminfo, comgr (>= 2.0), rocm-llvm, libc6, rocm-core") +set(CPACK_DEBIAN_BINARY_PACKAGE_PROVIDES "hip-rocclr (= ${CPACK_PACKAGE_VERSION})") +set(CPACK_DEBIAN_BINARY_PACKAGE_REPLACES "hip-rocclr (= ${CPACK_PACKAGE_VERSION})") + +set(CPACK_RPM_BINARY_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") +if(FILE_REORG_BACKWARD_COMPATIBILITY) + set(CPACK_RPM_BINARY_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/binary/postinst") + set(CPACK_RPM_BINARY_PRE_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/binary/prerm") +endif() +string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) +set(CPACK_RPM_BINARY_PACKAGE_REQUIRES "hsa-rocr-dev >= 1.3, rocminfo, comgr >= 2.0, rocm-llvm, rocm-core") +set(CPACK_RPM_BINARY_PACKAGE_PROVIDES "hip-rocclr = ${HIP_BASE_VERSION}") +set(CPACK_RPM_BINARY_PACKAGE_OBSOLETES "hip-rocclr = ${HIP_BASE_VERSION}") +#End Binary Packaging setting + +#Begin dev Packaging setting +set(CPACK_DEV_DEB "ON") +set(CPACK_DEV_RPM "ON") + +set(CPACK_DEBIAN_DEV_PACKAGE_NAME "hip-dev") +set(CPACK_RPM_DEV_PACKAGE_NAME "hip-devel") + +set(CPACK_COMPONENT_DEV_DESCRIPTION "HIP: Heterogenous-computing Interface for Portability [DEVELOPMENT]") + +configure_file(hip-devel.postinst ${CMAKE_CURRENT_BINARY_DIR}/dev/postinst @ONLY) +configure_file(hip-devel.prerm ${CMAKE_CURRENT_BINARY_DIR}/dev/prerm @ONLY) +set(CPACK_DEBIAN_DEV_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/dev/postinst;${CMAKE_CURRENT_BINARY_DIR}/dev/prerm") + +set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "perl (>= 5.0), liburi-encode-perl, libfile-basedir-perl, libfile-copy-recursive-perl, libfile-listing-perl, libfile-which-perl, libc6, file, rocm-core") +set(CPACK_DEBIAN_DEV_PACKAGE_PROVIDES "hip-base") +set(CPACK_DEBIAN_DEV_PACKAGE_REPLACES "hip-base") + +set(CPACK_RPM_DEV_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/dev/postinst") +set(CPACK_RPM_DEV_PRE_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/dev/prerm") +set(CPACK_RPM_DEV_PACKAGE_REQUIRES "perl >= 5.0, perl-File-Which, perl-File-Listing, perl-File-BaseDir, perl-URI-Encode, file, rocm-core") + +set(CPACK_RPM_DEV_PACKAGE_PROVIDES "hip-base") +set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "hip-base") +#End dev Packaging setting + +#Begin doc Packaging setting +set(CPACK_DOC_DEB "ON") +set(CPACK_DOC_RPM "ON") +set(CPACK_DEBIAN_DOC_PACKAGE_NAME "hip-doc") +set(CPACK_RPM_DOC_PACKAGE_NAME "hip-doc") +set(CPACK_COMPONENT_DOC_DESCRIPTION "HIP: Heterogenous-computing Interface for Portability [DOCUMENTATION]") + +set(CPACK_DEBIAN_DOC_PACKAGE_DEPENDS "hip-dev (= ${CPACK_PACKAGE_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}), rocm-core") +set(CPACK_DEBIAN_DOC_PACKAGE_PROVIDES "hip-doc") + +string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) +set(CPACK_RPM_DOC_PACKAGE_REQUIRES "hip-devel = ${HIP_BASE_VERSION}-${CPACK_RPM_PACKAGE_RELEASE}, rocm-core") + +#End doc Packaging setting + +#Begin samples Packaging setting +set(CPACK_SAMPLES_DEB "ON") +set(CPACK_SAMPLES_RPM "ON") +set(CPACK_DEBIAN_SAMPLES_PACKAGE_NAME "hip-samples") +set(CPACK_RPM_SAMPLES_PACKAGE_NAME "hip-samples") +set(CPACK_COMPONENT_SAMPLES_DESCRIPTION "HIP: Heterogenous-computing Interface for Portability [SAMPLES]") +set(CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS "hip-dev (= ${CPACK_PACKAGE_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}), rocm-core") +set(CPACK_DEBIAN_SAMPLES_PACKAGE_PROVIDES "hip-samples") + +set(CPACK_RPM_SAMPLES_PACKAGE_REQUIRES "hip-devel = ${HIP_BASE_VERSION}-${CPACK_RPM_PACKAGE_RELEASE}, rocm-core") +#End samples Packaging setting + +#Begin runtime-nvidia Packaging setting +set(CPACK_RUNTIME-NVIDIA_DEB "ON") +set(CPACK_RUNTIME-NVIDIA_RPM "ON") +set(CPACK_DEBIAN_RUNTIME-NVIDIA_PACKAGE_NAME "hip-runtime-nvidia") +set(CPACK_RPM_RUNTIME-NVIDIA_PACKAGE_NAME "hip-runtime-nvidia") +set(CPACK_COMPONENT_RUNTIME-NVIDIA_DESCRIPTION "HIP: Heterogenous-computing Interface for Portability [RUNTIME-NVIDIA]") + +set(CPACK_DEBIAN_RUNTIME-NVIDIA_PACKAGE_DEPENDS "cuda (>= 7.5), rocm-core") +set(CPACK_DEBIAN_RUNTIME-NVIDIA_PACKAGE_PROVIDES "hip-nvcc") +set(CPACK_DEBIAN_RUNTIME-NVIDIA_PACKAGE_REPLACES "hip-nvcc") + +set(CPACK_RPM_RUNTIME-NVIDIA_PACKAGE_PROVIDES "hip-nvcc") +set(CPACK_RPM_RUNTIME-NVIDIA_PACKAGE_OBSOLETES "hip-nvcc") +set(CPACK_RPM_RUNTIME-NVIDIA_PACKAGE_REQUIRES "cuda >= 7.5, rocm-core") + +# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake +if(NOT ROCM_DEP_ROCMCORE) + + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_BINARY_PACKAGE_REQUIRES ${CPACK_RPM_BINARY_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS ${CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DOC_PACKAGE_REQUIRES ${CPACK_RPM_DOC_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DOC_PACKAGE_DEPENDS ${CPACK_DEBIAN_DOC_PACKAGE_DEPENDS}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_SAMPLES_PACKAGE_REQUIRES ${CPACK_RPM_SAMPLES_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS ${CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME-NVIDIA_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME-NVIDIA_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME-NVIDIA_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME-NVIDIA_PACKAGE_DEPENDS}) +endif() + +include(CPack) diff --git a/projects/clr/hipamd/packaging/convert_md_to_html.sh b/projects/clr/hipamd/packaging/convert_md_to_html.sh new file mode 100755 index 0000000000..a686d118ee --- /dev/null +++ b/projects/clr/hipamd/packaging/convert_md_to_html.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +function die { + echo "${1-Died}." >&2 + exit 1 +} + +function cleanup { + rm -rf "$workdir" +} + +# parse arguments +hip_srcdir=$1 +html_destdir=$2 +[ "$hip_srcdir" != "" ] || [ "$html_destdir" != "" ] || die "Invalid arguments!" + +# create temporary directory for grip settings +workdir=`mktemp -d` +trap cleanup EXIT + +# setup grip +export GRIPURL=$hip_srcdir +export GRIPHOME=$workdir +echo "CACHE_DIRECTORY = '$html_destdir/asset'" > $workdir/settings.py +mkdir -p $html_destdir $html_destdir/docs/markdown + +# convert all md files to html +pushd $hip_srcdir +for f in *.md docs/markdown/*.md; do grip --export --no-inline $f $html_destdir/${f%.*}.html; done +popd + +# convert absolute links to relative links +pushd $html_destdir +for f in *.html; do sed -i "s?$GRIPURL/??g" $f; done +for f in docs/markdown/*.html; do sed -i "s?$GRIPURL/?../../?g" $f; done +popd + +# update document titles +pushd $html_destdir +for f in *.html; do sed -i "s?.md - Grip??g" $f; done +for f in docs/markdown/*.html; do sed -i "s?.md - Grip??g" $f; done +popd + +# replace .md with .html in links +pushd $html_destdir +for f in *.html; do sed -i "s?.md\"?.html\"?g" $f; done +for f in *.html; do sed -i "s?.md#?.html#?g" $f; done +for f in docs/markdown/*.html; do sed -i "s?.md\"?.html\"?g" $f; done +for f in docs/markdown/*.html; do sed -i "s?.md#?.html#?g" $f; done +popd + +# replace github.io links +pushd $html_destdir +sed -i "s?http://rocm-developer-tools.github.io/HIP?docs/RuntimeAPI/html/index.html?g" README.html +sed -i "s?http://rocm-developer-tools.github.io/HIP?docs/RuntimeAPI/html/?g" RELEASE.html +popd + +exit 0 diff --git a/projects/clr/hipamd/packaging/hip-devel.postinst b/projects/clr/hipamd/packaging/hip-devel.postinst new file mode 100755 index 0000000000..9b32b08854 --- /dev/null +++ b/projects/clr/hipamd/packaging/hip-devel.postinst @@ -0,0 +1,38 @@ +#!/bin/bash +# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +ROCMDIR=@ROCM_PATH@ +HIPINCDIR=$ROCMDIR/@CMAKE_INSTALL_INCLUDEDIR@/hip +CURRENTDIR=`pwd` +# The following will be removed after upstream updation +cd $HIPINCDIR +ln -r -s -f amd_detail hcc_detail +ln -r -s -f nvidia_detail nvcc_detail +cd $CURRENTDIR + +#FILE_REORG_BACKWARD_COMPATIBILITY +HIPINCDIR=$ROCMDIR/hip/include/hip +if [ -d $HIPINCDIR ]; then + # The following will be removed after upstream updation + cd $HIPINCDIR + ln -r -s -f amd_detail hcc_detail + ln -r -s -f nvidia_detail nvcc_detail + cd $CURRENTDIR +fi diff --git a/projects/clr/hipamd/packaging/hip-devel.prerm b/projects/clr/hipamd/packaging/hip-devel.prerm new file mode 100755 index 0000000000..9dabd4d452 --- /dev/null +++ b/projects/clr/hipamd/packaging/hip-devel.prerm @@ -0,0 +1,41 @@ +#!/bin/bash +# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +ROCMDIR=@ROCM_PATH@ +CURRENTDIR=`pwd` + +HIPINCDIR=$ROCMDIR/@CMAKE_INSTALL_INCLUDEDIR@/hip +([ ! -d $HIPINCDIR ]) && exit 0 +cd $HIPINCDIR +rm hcc_detail +rm nvcc_detail +cd $CURRENTDIR + +#FILE_REORG_BACKWARD_COMPATIBILITY + #backward copatibility code , to be removed later +HIPDIR=$ROCMDIR/hip +HIPINCDIR=$ROCMDIR/hip/include/hip +([ ! -d $HIPINCDIR ]) && exit 0 +cd $HIPINCDIR +rm -f hcc_detail +rm -f nvcc_detail +cd $CURRENTDIR +([ ! -d $HIPDIR ]) && exit 0 +rmdir --ignore-fail-on-non-empty $HIPDIR diff --git a/projects/clr/hipamd/packaging/hip-runtime-amd.postinst b/projects/clr/hipamd/packaging/hip-runtime-amd.postinst new file mode 100755 index 0000000000..31876784b7 --- /dev/null +++ b/projects/clr/hipamd/packaging/hip-runtime-amd.postinst @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +ROCMDIR=@ROCM_PATH@ +ROCMCMAKEDIR=$ROCMDIR/@CMAKE_INSTALL_LIBDIR@/cmake +HIPCMAKEDIR=$ROCMDIR/hip/lib/cmake +CURRENTDIR=`pwd` + +mkdir -p $HIPCMAKEDIR/hip +mkdir -p $HIPCMAKEDIR/hip-lang +mkdir -p $HIPCMAKEDIR/hiprtc + +HIPTARGETFILES=$(ls -A $ROCMCMAKEDIR/hip | grep "^hip-targets") +cd $HIPCMAKEDIR/hip +for f in $HIPTARGETFILES +do + ln -s -r -f $ROCMCMAKEDIR/hip/$f $(basename $f) +done +cd $CURRENTDIR + +HIPLANGTARGETFILES=$(ls -A $ROCMCMAKEDIR/hip-lang | grep "^hip-lang-targets") +cd $HIPCMAKEDIR/hip-lang +for f in $HIPLANGTARGETFILES +do + ln -s -r -f $ROCMCMAKEDIR/hip-lang/$f $(basename $f) +done +cd $CURRENTDIR + +HIPRTCTARGETFILES=$(ls -A $ROCMCMAKEDIR/hiprtc | grep "^hiprtc-targets") +cd $HIPCMAKEDIR/hiprtc +for f in $HIPRTCTARGETFILES +do + ln -s -r -f $ROCMCMAKEDIR/hiprtc/$f $(basename $f) +done +cd $CURRENTDIR diff --git a/projects/clr/hipamd/packaging/hip-runtime-amd.prerm b/projects/clr/hipamd/packaging/hip-runtime-amd.prerm new file mode 100755 index 0000000000..0dd8675d2b --- /dev/null +++ b/projects/clr/hipamd/packaging/hip-runtime-amd.prerm @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright (c) 2016 - 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +ROCMDIR=@ROCM_PATH@ +HIPDIR=$ROCMDIR/hip +HIPCMAKEDIR=$ROCMDIR/hip/lib/cmake/hip +HIPLANGCMAKEDIR=$ROCMDIR/hip/lib/cmake/hip-lang +HIPRTCCMAKEDIR=$ROCMDIR/hip/lib/cmake/hiprtc +CURRENTDIR=`pwd` +([ ! -d $ROCMDIR ] || [ ! -d $HIPDIR ]) && exit 0 + +([ ! -d $HIPCMAKEDIR ] ) && exit 0 +# Remove soft-links to hip-target +HIPTARGETFILES=$(ls -A $HIPCMAKEDIR | grep "^hip-targets") + +cd $HIPCMAKEDIR +for f in $HIPTARGETFILES; do + [ -e $f ] || continue + rm $(basename $f) +done +cd $CURRENTDIR +([ ! -d $HIPLANGCMAKEDIR ] ) && exit 0 +# Remove soft-links to hip-lang-target +HIPLANGTARGETFILES=$(ls -A $HIPLANGCMAKEDIR | grep "^hip-lang-targets") + +cd $HIPLANGCMAKEDIR +for f in $HIPLANGTARGETFILES; do + [ -e $f ] || continue + rm $(basename $f) +done + +cd $CURRENTDIR + +([ ! -d $HIPRTCCMAKEDIR ] ) && exit 0 +# Remove soft-links to hiprtc-target +HIPRTCTARGETFILES=$(ls -A $HIPRTCCMAKEDIR | grep "^hiprtc-targets") + +cd $HIPRTCCMAKEDIR +for f in $HIPRTCTARGETFILES; do + [ -e $f ] || continue + rm $(basename $f) +done + +cd $CURRENTDIR + +rmdir --ignore-fail-on-non-empty $HIPCMAKEDIR +rmdir --ignore-fail-on-non-empty $HIPLANGCMAKEDIR +rmdir --ignore-fail-on-non-empty $HIPRTCCMAKEDIR diff --git a/projects/clr/hipamd/packaging/hip-runtime-nvidia.postinst b/projects/clr/hipamd/packaging/hip-runtime-nvidia.postinst new file mode 100755 index 0000000000..34ff0d4721 --- /dev/null +++ b/projects/clr/hipamd/packaging/hip-runtime-nvidia.postinst @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +ROCMDIR=@ROCM_PATH@ +HIPDIR=$ROCMDIR/hip + +if [ -d $ROCMDIR ] ; then + ln -s -f $ROCMDIR /opt/rocm +fi diff --git a/projects/clr/hipamd/packaging/hip-runtime-nvidia.prerm b/projects/clr/hipamd/packaging/hip-runtime-nvidia.prerm new file mode 100755 index 0000000000..d9194fa4b1 --- /dev/null +++ b/projects/clr/hipamd/packaging/hip-runtime-nvidia.prerm @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if [ -L "/opt/rocm" ] ; then + unlink /opt/rocm +fi diff --git a/projects/clr/hipamd/src/CMakeLists.txt b/projects/clr/hipamd/src/CMakeLists.txt new file mode 100644 index 0000000000..619d6a0e87 --- /dev/null +++ b/projects/clr/hipamd/src/CMakeLists.txt @@ -0,0 +1,319 @@ +# Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.5.1) + +include(GNUInstallDirs) + +set(VERSION_MAJOR_AMDHIP ${HIP_VERSION_MAJOR}) +set(VERSION_MINOR_AMDHIP ${HIP_VERSION_MINOR}) + +if(ADDRESS_SANITIZER) + set(ASAN_LINKER_FLAGS "-fsanitize=address") + set(ASAN_COMPILER_FLAGS "-fno-omit-frame-pointer -fsanitize=address") + + if(NOT CMAKE_COMPILER_IS_GNUCC) + if(BUILD_SHARED_LIBS) + set(ASAN_COMPILER_FLAGS "${ASAN_COMPILER_FLAGS} -shared-libsan") + set(ASAN_LINKER_FLAGS "${ASAN_LINKER_FLAGS} -shared-libsan") + else() + set(ASAN_LINKER_FLAGS "${ASAN_LINKER_FLAGS} -static-libsan") + endif() + endif() + + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ASAN_COMPILER_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ASAN_COMPILER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_LINKER_FLAGS} -s -Wl,--build-id=sha1") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${ASAN_LINKER_FLAGS} -Wl,--build-id=sha1") +endif() + +if(CMAKE_COMPILER_IS_GNUCC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=deprecated-declarations") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations") +endif() + +option(DISABLE_DIRECT_DISPATCH "Disable Direct Dispatch" OFF) + +option(BUILD_SHARED_LIBS "Build the shared library" ON) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") +find_package(ROCclr) + +if(BUILD_SHARED_LIBS) + add_library(amdhip64 SHARED) + # Windows doesn't have a strip utility, so CMAKE_STRIP won't be set. + if((CMAKE_BUILD_TYPE STREQUAL "Release") AND NOT ("${CMAKE_STRIP}" STREQUAL "")) + add_custom_command(TARGET amdhip64 POST_BUILD COMMAND ${CMAKE_STRIP} $) + endif() +else() + add_library(amdhip64 STATIC $) +endif() + +set_target_properties(amdhip64 PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON + # Workaround for many places in the HIP project + # having hardcoded references to build/lib/libamdhip64.so + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set_target_properties(amdhip64 PROPERTIES OUTPUT_NAME "amdhip64") +else() + set_target_properties(amdhip64 PROPERTIES OUTPUT_NAME "amdhip32") +endif() + +# Disable versioning for Windows +# as currently HIP_LIB_VERSION_STRING and HIP_LIB_VERSION_MAJOR +# are not being populated +if(NOT WIN32) + if(BUILD_SHARED_LIBS) + set_target_properties(amdhip64 PROPERTIES + VERSION ${HIP_LIB_VERSION_STRING} + SOVERSION ${HIP_LIB_VERSION_MAJOR}) + endif() +endif() + +target_sources(amdhip64 PRIVATE + cl_gl.cpp + fixme.cpp + hip_activity.cpp + hip_code_object.cpp + hip_context.cpp + hip_device_runtime.cpp + hip_device.cpp + hip_error.cpp + hip_event.cpp + hip_event_ipc.cpp + hip_fatbin.cpp + hip_global.cpp + hip_graph_internal.cpp + hip_graph.cpp + hip_hmm.cpp + hip_intercept.cpp + hip_memory.cpp + hip_mempool.cpp + hip_mempool_impl.cpp + hip_module.cpp + hip_peer.cpp + hip_platform.cpp + hip_profile.cpp + hip_stream_ops.cpp + hip_stream.cpp + hip_surface.cpp + hip_texture.cpp + hip_gl.cpp + hip_vm.cpp) + +if(WIN32) + target_sources(amdhip64 PRIVATE + cl_d3d9.cpp + cl_d3d10.cpp + cl_d3d11.cpp + hip_runtime.cpp) +endif() + +if(BUILD_SHARED_LIBS) + if(WIN32) + target_sources(amdhip64 PRIVATE amdhip.def) + else() + target_link_libraries(amdhip64 PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") + set_target_properties(amdhip64 PROPERTIES LINK_DEPENDS "${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") + endif() +endif() + +if(WIN32) + configure_file(hip_hcc_in.rc.in hip_hcc_info.rc @ONLY) + target_sources(amdhip64 PRIVATE hip_hcc_info.rc) +endif() + +target_include_directories(amdhip64 + PRIVATE + ${HIP_COMMON_INCLUDE_DIR} + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_BINARY_DIR}/include) + +target_compile_definitions(amdhip64 PRIVATE __HIP_PLATFORM_AMD__) +target_link_libraries(amdhip64 PRIVATE ${OPENGL_LIBRARIES}) +target_link_libraries(amdhip64 PRIVATE ${CMAKE_DL_LIBS}) + +# Note in static case we cannot link against rocclr. +# If we would, we'd also have to export rocclr and have hipcc pass it to the linker. +if(BUILD_SHARED_LIBS) + target_link_libraries(amdhip64 PRIVATE rocclr) +else() + target_compile_definitions(amdhip64 PRIVATE $) + target_include_directories(amdhip64 PRIVATE $) +endif() + +if(DISABLE_DIRECT_DISPATCH) + target_compile_definitions(amdhip64 PRIVATE DISABLE_DIRECT_DISPATCH) +endif() + +# Short-Term solution for pre-compiled headers for online compilation +# Enable pre compiled header +if(__HIP_ENABLE_PCH) + find_package(LLVM REQUIRED CONFIG + PATHS + ${ROCM_PATH}/llvm) + # find_package(LLVM) returns the lib/cmake/llvm location. We require the root. + if(NOT DEFINED HIP_LLVM_ROOT) + set(HIP_LLVM_ROOT "${LLVM_DIR}/../../..") + endif() + + execute_process(COMMAND sh -c "${CMAKE_CURRENT_SOURCE_DIR}/hip_embed_pch.sh ${HIP_COMMON_INCLUDE_DIR} ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include ${HIP_LLVM_ROOT}" COMMAND_ECHO STDERR RESULT_VARIABLE EMBED_PCH_RC WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + if (EMBED_PCH_RC AND NOT EMBED_PCH_RC EQUAL 0) + message(FATAL_ERROR "Failed to embed PCH") + endif() + + target_compile_definitions(amdhip64 PRIVATE __HIP_ENABLE_PCH) + target_sources(amdhip64 PRIVATE ${CMAKE_BINARY_DIR}/hip_pch.o) +endif() + +set(HIPRTC_OBJECTS) +# Add hiprtc +add_subdirectory(hiprtc) + +if(NOT WIN32) + if(BUILD_SHARED_LIBS) + target_link_libraries(amdhip64 PRIVATE ${HIPRTC_OBJECTS}) + target_compile_definitions(amdhip64 PRIVATE __HIP_ENABLE_RTC) + add_dependencies(amdhip64 hiprtc-builtins) + INSTALL(TARGETS hiprtc-builtins + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + endif() +endif() + +############################# +# Profiling API support +############################# +# Generate profiling API macros/structures header +option(USE_PROF_API "Enable roctracer integration" ON) +# Enable profiling API +if(USE_PROF_API) + set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/amd_detail/hip_prof_str.h") + set(PROF_API_STR_IN "${CMAKE_SOURCE_DIR}/include/hip/amd_detail/hip_prof_str.h") + set(PROF_API_HDR "${HIP_COMMON_INCLUDE_DIR}/hip/hip_runtime_api.h") + set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") + set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") + set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") + + find_package(Python3 COMPONENTS Interpreter REQUIRED) + + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import CppHeaderParser" + RESULT_VARIABLE CPP_HEADER_PARSER + OUTPUT_QUIET) + + if(NOT ${CPP_HEADER_PARSER} EQUAL 0) + message(FATAL_ERROR "\ +The \"CppHeaderParser\" Python3 package is not installed. \ +Please install it using the following command: \"pip3 install CppHeaderParser\".\ +") + endif() + + add_custom_command(OUTPUT ${PROF_API_STR} + COMMAND ${Python3_EXECUTABLE} ${PROF_API_GEN} -v -t --priv ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR_IN} ${PROF_API_STR} + DEPENDS ${PROF_API_STR_IN} ${PROF_API_HDR} ${PROF_API_GEN} + COMMENT "Generating profiling primitives: ${PROF_API_STR}") + + add_custom_target(gen-prof-api-str-header ALL + DEPENDS ${PROF_API_STR} + SOURCES ${PROF_API_HDR}) + + set_target_properties(amdhip64 PROPERTIES PUBLIC_HEADER ${PROF_API_STR}) + + find_path(PROF_API_HEADER_DIR prof_protocol.h + HINTS + ${PROF_API_HEADER_PATH} + PATHS + ${ROCM_PATH}/roctracer + PATH_SUFFIXES + include/ext) + + if(NOT PROF_API_HEADER_DIR) + message(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=") + else() + target_compile_definitions(amdhip64 PUBLIC USE_PROF_API=1) + target_include_directories(amdhip64 PUBLIC ${PROF_API_HEADER_DIR}) + message(STATUS "Profiling API: ${PROF_API_HEADER_DIR}") + endif() + + add_dependencies(amdhip64 gen-prof-api-str-header) +endif() + +add_custom_command(TARGET amdhip64 POST_BUILD COMMAND + ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/.hipInfo ${PROJECT_BINARY_DIR}/lib/.hipInfo) +add_custom_command(TARGET amdhip64 POST_BUILD COMMAND + ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/include ${PROJECT_BINARY_DIR}/include) + +add_library(host INTERFACE) +target_link_libraries(host INTERFACE amdhip64) + +add_library(device INTERFACE) +target_link_libraries(device INTERFACE host) + +# Current packaging assumes that HIP runtime will always be installed in ${ROCM_PATH}/lib +# This is false to assume, because some distros like CentOS will use the lib64 directory instead of lib +# Relying on CMake to choose the library directory for us will default in that case to lib64 +# Hence there will be a mismatch between where HIP is installed and where CMake thinks it is + +INSTALL(TARGETS amdhip64 host device + EXPORT hip-targets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +INSTALL(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) + +INSTALL(TARGETS amdhip64 host device + EXPORT hip-lang-targets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +INSTALL(EXPORT hip-lang-targets DESTINATION ${CONFIG_LANG_PACKAGE_INSTALL_DIR} NAMESPACE hip-lang::) + +if(NOT WIN32) +include(CMakePackageConfigHelpers) + +configure_package_config_file( + ${HIP_COMMON_DIR}/hip-lang-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/hip-lang-config.cmake + INSTALL_DESTINATION ${CONFIG_LANG_PACKAGE_INSTALL_DIR} + PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/hip-lang-config-version.cmake + VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_GITDATE}" + COMPATIBILITY SameMajorVersion) +install( + FILES + ${CMAKE_CURRENT_BINARY_DIR}/hip-lang-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/hip-lang-config-version.cmake + DESTINATION + ${CONFIG_LANG_PACKAGE_INSTALL_DIR}/ + ) +endif() diff --git a/projects/clr/hipamd/src/amd_hsa_elf.hpp b/projects/clr/hipamd/src/amd_hsa_elf.hpp new file mode 100644 index 0000000000..383d4562be --- /dev/null +++ b/projects/clr/hipamd/src/amd_hsa_elf.hpp @@ -0,0 +1,135 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +// This header file is partially copied from +// https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/BinaryFormat/ELF.h + +// AMDGPU OS for HSA compatible compute kernels. +enum { ELFOSABI_AMDGPU_HSA = 64, ELFOSABI_AMDGPU_PAL = 65, ELFOSABI_AMDGPU_MESA3D = 66 }; + +enum { + ELFABIVERSION_AMDGPU_HSA_V2 = 0, + ELFABIVERSION_AMDGPU_HSA_V3 = 1, + ELFABIVERSION_AMDGPU_HSA_V4 = 2, + ELFABIVERSION_AMDGPU_HSA_V5 = 3 +}; + +// AMDGPU specific e_flags +enum : unsigned { + EF_AMDGPU_MACH = 0x0ff, + // AMDGPU processors + EF_AMDGPU_MACH_NONE = 0x000, + EF_AMDGPU_MACH_R600_R600 = 0x001, + EF_AMDGPU_MACH_R600_R630 = 0x002, + EF_AMDGPU_MACH_R600_RS880 = 0x003, + EF_AMDGPU_MACH_R600_RV670 = 0x004, + EF_AMDGPU_MACH_R600_RV710 = 0x005, + EF_AMDGPU_MACH_R600_RV730 = 0x006, + EF_AMDGPU_MACH_R600_RV770 = 0x007, + EF_AMDGPU_MACH_R600_CEDAR = 0x008, + EF_AMDGPU_MACH_R600_CYPRESS = 0x009, + EF_AMDGPU_MACH_R600_JUNIPER = 0x00a, + EF_AMDGPU_MACH_R600_REDWOOD = 0x00b, + EF_AMDGPU_MACH_R600_SUMO = 0x00c, + EF_AMDGPU_MACH_R600_BARTS = 0x00d, + EF_AMDGPU_MACH_R600_CAICOS = 0x00e, + EF_AMDGPU_MACH_R600_CAYMAN = 0x00f, + EF_AMDGPU_MACH_R600_TURKS = 0x010, + EF_AMDGPU_MACH_R600_RESERVED_FIRST = 0x011, + EF_AMDGPU_MACH_R600_RESERVED_LAST = 0x01f, + EF_AMDGPU_MACH_R600_FIRST = EF_AMDGPU_MACH_R600_R600, + EF_AMDGPU_MACH_R600_LAST = EF_AMDGPU_MACH_R600_TURKS, + + // AMDGCN-based processors. + EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020, + EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021, + EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022, + EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023, + EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024, + EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025, + EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027, + EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028, + EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029, + EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a, + EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b, + EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c, + EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d, + EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e, + EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, + EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030, + EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031, + EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032, + EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033, + EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034, + EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035, + EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036, + EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037, + EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038, + EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039, + EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a, + EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b, + EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c, + EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d, + EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e, + EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f, + EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040, + EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041, + EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X43 = 0x043, + EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044, + EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045, + EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046, + EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047, + + // First/last AMDGCN-based processors. + EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1102, + + // Indicates if the "xnack" target feature is enabled for all code contained + // in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3. + EF_AMDGPU_FEATURE_XNACK_V3 = 0x100, + // Indicates if the "sramecc" target feature is enabled for all code + // contained in the object. + // + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3. + EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200, + + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + EF_AMDGPU_FEATURE_XNACK_V4 = 0x300, + EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000, + EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100, + EF_AMDGPU_FEATURE_XNACK_OFF_V4 = 0x200, + EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300, + + // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values. + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00, + EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000, + EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400, + EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800, + EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00, +}; diff --git a/projects/clr/hipamd/src/amdhip.def b/projects/clr/hipamd/src/amdhip.def new file mode 100644 index 0000000000..ffaff7f57d --- /dev/null +++ b/projects/clr/hipamd/src/amdhip.def @@ -0,0 +1,446 @@ +EXPORTS +hipChooseDevice +hipCtxCreate +hipCtxDestroy +hipCtxDisablePeerAccess +hipCtxEnablePeerAccess +hipCtxGetApiVersion +hipCtxGetCacheConfig +hipCtxGetCurrent +hipCtxGetDevice +hipCtxGetFlags +hipCtxGetSharedMemConfig +hipCtxPopCurrent +hipCtxPushCurrent +hipCtxSetCacheConfig +hipCtxSetCurrent +hipCtxSetSharedMemConfig +hipCtxSynchronize +hipDeviceCanAccessPeer +hipDeviceComputeCapability +hipDeviceDisablePeerAccess +hipDeviceEnablePeerAccess +hipDeviceGet +hipDeviceGetAttribute +hipDeviceGetByPCIBusId +hipDeviceGetCacheConfig +hipDeviceGetStreamPriorityRange +hipDeviceGetLimit +hipDeviceGetName +hipDeviceGetUuid +hipDeviceGetPCIBusId +hipDeviceGetSharedMemConfig +hipDeviceGetP2PAttribute +hipDevicePrimaryCtxGetState +hipDevicePrimaryCtxRelease +hipDevicePrimaryCtxReset +hipDevicePrimaryCtxRetain +hipDevicePrimaryCtxSetFlags +hipDeviceReset +hipDeviceSetCacheConfig +hipDeviceSetSharedMemConfig +hipDeviceSynchronize +hipDeviceTotalMem +hipDriverGetVersion +hipEventCreate +hipEventCreateWithFlags +hipEventDestroy +hipEventElapsedTime +hipEventQuery +hipEventRecord +hipEventSynchronize +hipExtGetLinkTypeAndHopCount +hipExtLaunchMultiKernelMultiDevice +hipExtMallocWithFlags +hipExtModuleLaunchKernel +hipExtLaunchKernel +hipFree +hipFreeArray +hipFuncSetAttribute +hipFuncSetCacheConfig +hipFuncSetSharedMemConfig +hipGetDevice +hipGetDeviceCount +hipGetDeviceProperties +hipGetErrorName +hipGetErrorString +hipGetLastError +hipMemAllocHost +hipHostAlloc +hipHostFree +hipHostGetDevicePointer +hipHostGetFlags +hipHostMalloc +hipHostRegister +hipHostUnregister +hipInit +hipIpcCloseMemHandle +hipIpcGetMemHandle +hipIpcOpenMemHandle +hipIpcGetEventHandle +hipIpcOpenEventHandle +hipMalloc +hipMalloc3D +hipMalloc3DArray +hipMallocManaged +hipDeviceGetDefaultMemPool +hipDeviceSetMemPool +hipDeviceGetMemPool +hipMallocAsync +hipFreeAsync +hipMemPoolTrimTo +hipMemPoolSetAttribute +hipMemPoolGetAttribute +hipMemPoolSetAccess +hipMemPoolGetAccess +hipMemPoolCreate +hipMemPoolDestroy +hipMallocFromPoolAsync +hipMemPoolExportToShareableHandle +hipMemPoolImportFromShareableHandle +hipMemPoolExportPointer +hipMemPoolImportPointer +hipArrayCreate +hipArray3DCreate +hipArrayDestroy +hipArrayGetInfo +hipArrayGetDescriptor +hipArray3DGetDescriptor +hipMallocArray +hipMemAdvise +hipMemAllocPitch +hipMallocPitch +hipMemcpy +hipMemcpyWithStream +hipMemcpyParam2D +hipMemcpy2D +hipMemcpy2DAsync +hipMemcpy2DToArray +hipMemcpy2DToArrayAsync +hipMemcpy3D +hipMemcpy3DAsync +hipDrvMemcpy3D +hipDrvMemcpy3DAsync +hipMemcpyAsync +hipMemcpyDtoD +hipMemcpyDtoDAsync +hipMemcpyDtoH +hipMemcpyDtoHAsync +hipMemcpyFromSymbol +hipMemcpyFromSymbolAsync +hipMemcpyHtoD +hipMemcpyHtoDAsync +hipMemcpyPeer +hipMemcpyPeerAsync +hipMemcpyToArray +hipMemcpyFromArray +hipMemcpyToSymbol +hipMemcpyToSymbolAsync +hipMemGetAddressRange +hipGetSymbolAddress +hipGetSymbolSize +hipMemGetInfo +hipMemPrefetchAsync +hipMemPtrGetInfo +hipMemRangeGetAttribute +hipMemRangeGetAttributes +hipMemset +hipMemsetAsync +hipMemsetD8 +hipMemsetD8Async +hipMemsetD16 +hipMemsetD16Async +hipMemsetD32 +hipMemsetD32Async +hipMemset2D +hipMemset2DAsync +hipMemset3D +hipMemset3DAsync +hipModuleGetFunction +hipModuleGetGlobal +hipModuleGetTexRef +hipModuleLaunchKernel +hipModuleLaunchKernelExt +hipModuleLaunchCooperativeKernel +hipModuleLaunchCooperativeKernelMultiDevice +hipLaunchCooperativeKernel +hipLaunchCooperativeKernelMultiDevice +hipHccModuleLaunchKernel +hipModuleLoad +hipModuleLoadData +hipModuleLoadDataEx +hipModuleUnload +hipModuleOccupancyMaxPotentialBlockSize +hipModuleOccupancyMaxPotentialBlockSizeWithFlags +hipModuleOccupancyMaxActiveBlocksPerMultiprocessor +hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +hipOccupancyMaxPotentialBlockSize +hipOccupancyMaxActiveBlocksPerMultiprocessor +hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +hipFuncGetAttribute +hipFuncGetAttributes +hipPeekAtLastError +hipPointerGetAttributes +hipProfilerStart +hipProfilerStop +hipRuntimeGetVersion +hipGetDeviceFlags +hipSetDevice +hipSetDeviceFlags +hipStreamAddCallback +hipStreamAttachMemAsync +hipStreamCreate +hipStreamCreateWithFlags +hipStreamCreateWithPriority +hipStreamDestroy +hipStreamGetDevice +hipStreamGetFlags +hipStreamQuery +hipStreamSynchronize +hipStreamWaitEvent +__hipPopCallConfiguration +__hipPushCallConfiguration +__hipRegisterFatBinary +__hipRegisterFunction +__hipRegisterVar +__hipRegisterSurface +__hipRegisterTexture +__hipRegisterManagedVar +__hipUnregisterFatBinary +hipConfigureCall +hipSetupArgument +hipLaunchByPtr +hipLaunchKernel +hipRegisterTracerCallback +hipApiName +hipKernelNameRef +hipBindTexture +hipBindTexture2D +hipBindTextureToArray +hipBindTextureToMipmappedArray +hipGetTextureAlignmentOffset +hipGetTextureReference +hipUnbindTexture +hipCreateChannelDesc +hipCreateTextureObject +hipDestroyTextureObject +hipGetChannelDesc +hipGetTextureObjectResourceDesc +hipGetTextureObjectResourceViewDesc +hipGetTextureObjectTextureDesc +hipTexRefGetAddress +hipTexRefGetAddressMode +hipTexRefGetArray +hipTexRefGetBorderColor +hipTexRefGetFilterMode +hipTexRefGetFlags +hipTexRefGetFormat +hipTexRefGetMaxAnisotropy +hipTexRefGetMipmapFilterMode +hipTexRefGetMipmapLevelBias +hipTexRefGetMipmapLevelClamp +hipTexRefGetMipmappedArray +hipTexRefSetAddress +hipTexRefSetAddress2D +hipTexRefSetAddressMode +hipTexRefSetArray +hipTexRefSetBorderColor +hipTexRefSetFilterMode +hipTexRefSetFlags +hipTexRefSetFormat +hipTexRefSetMaxAnisotropy +hipTexRefSetMipmapFilterMode +hipTexRefSetMipmapLevelBias +hipTexRefSetMipmapLevelClamp +hipTexRefSetMipmappedArray +hipProfilerStart +hipProfilerStop +hipCreateSurfaceObject +hipDestroySurfaceObject +hipGetCmdName +hipMipmappedArrayCreate +hipMallocMipmappedArray +hipMipmappedArrayDestroy +hipFreeMipmappedArray +hipMipmappedArrayGetLevel +hipGetMipmappedArrayLevel +hipMallocHost +hipFreeHost +hipTexObjectCreate +hipTexObjectDestroy +hipTexObjectGetResourceDesc +hipTexObjectGetResourceViewDesc +hipTexObjectGetTextureDesc +hipExtStreamCreateWithCUMask +hipStreamGetPriority +hipMemcpy2DFromArray +hipMemcpy2DFromArrayAsync +hipDrvMemcpy2DUnaligned +hipMemcpyAtoH +hipMemcpyHtoA +hipMemcpyParam2DAsync +__gnu_h2f_ieee +__gnu_f2h_ieee +hipExtStreamGetCUMask +hipImportExternalMemory +hipExternalMemoryGetMappedBuffer +hipDestroyExternalMemory +hipGraphCreate +hipGraphDestroy +hipGraphAddKernelNode +hipGraphAddMemsetNode +hipGraphAddMemcpyNode +hipGraphAddMemcpyNode1D +hipGraphInstantiate +hipGraphLaunch +hipStreamIsCapturing +hipStreamBeginCapture +hipStreamEndCapture +hipGraphExecDestroy +hipPointerGetAttribute +hipDrvPointerGetAttributes +hipImportExternalSemaphore +hipSignalExternalSemaphoresAsync +hipWaitExternalSemaphoresAsync +hipDestroyExternalSemaphore +hipGLGetDevices +hipGraphicsGLRegisterBuffer +hipGraphicsGLRegisterImage +hipGraphicsMapResources +hipGraphicsResourceGetMappedPointer +hipGraphicsSubResourceGetMappedArray +hipGraphicsUnmapResources +hipGraphicsUnregisterResource +hipGraphGetNodes +hipGraphGetRootNodes +hipGraphKernelNodeGetParams +hipGraphKernelNodeSetParams +hipGraphKernelNodeSetAttribute +hipGraphKernelNodeGetAttribute +hipGraphMemcpyNodeGetParams +hipGraphMemcpyNodeSetParams +hipGraphMemsetNodeGetParams +hipGraphMemsetNodeSetParams +hipGraphAddDependencies +hipGraphExecKernelNodeSetParams +hipGraphAddEmptyNode +hipStreamGetCaptureInfo +hipStreamGetCaptureInfo_v2 +hipStreamUpdateCaptureDependencies +hipGraphRemoveDependencies +hipGraphGetEdges +hipGraphNodeGetDependencies +hipGraphNodeGetDependentNodes +hipGraphNodeGetType +hipGraphDestroyNode +hipGraphClone +hipGraphNodeFindInClone +hipGraphAddChildGraphNode +hipGraphChildGraphNodeGetGraph +hipGraphExecChildGraphNodeSetParams +hipGraphAddMemcpyNodeFromSymbol +hipGraphMemcpyNodeSetParamsFromSymbol +hipGraphExecMemcpyNodeSetParamsFromSymbol +hipGraphAddMemcpyNodeToSymbol +hipGraphMemcpyNodeSetParamsToSymbol +hipGraphExecMemcpyNodeSetParamsToSymbol +hipGraphExecMemcpyNodeSetParams +hipGraphMemcpyNodeSetParams1D +hipGraphExecMemcpyNodeSetParams1D +hipGraphAddEventRecordNode +hipGraphEventRecordNodeGetEvent +hipGraphEventRecordNodeSetEvent +hipGraphExecEventRecordNodeSetEvent +hipGraphAddEventWaitNode +hipGraphEventWaitNodeGetEvent +hipGraphEventWaitNodeSetEvent +hipGraphExecEventWaitNodeSetEvent +hipGraphAddHostNode +hipGraphHostNodeGetParams +hipGraphHostNodeSetParams +hipGraphExecHostNodeSetParams +hipGraphExecUpdate +hipGraphInstantiateWithFlags +hipGraphExecMemsetNodeSetParams +hipDeviceGetGraphMemAttribute +hipDeviceSetGraphMemAttribute +hipDeviceGraphMemTrim +amd_dbgapi_get_build_name +amd_dbgapi_get_git_hash +amd_dbgapi_get_build_id +hipThreadExchangeStreamCaptureMode +hipMemAddressFree +hipMemAddressReserve +hipMemCreate +hipMemExportToShareableHandle +hipMemGetAccess +hipMemGetAllocationGranularity +hipMemGetAllocationPropertiesFromHandle +hipMemImportFromShareableHandle +hipMemMap +hipMemMapArrayAsync +hipMemRelease +hipMemRetainAllocationHandle +hipMemSetAccess +hipMemUnmap +hipMemcpy_spt +hipMemcpyAsync_spt +hipStreamSynchronize_spt +hipMemcpyToSymbol_spt +hipMemcpyFromSymbol_spt +hipMemcpy2D_spt +hipMemcpy2DToArray_spt +hipMemcpy2DFromArray_spt +hipMemcpy3D_spt +hipMemset_spt +hipMemset2D_spt +hipMemset3D_spt +hipStreamQuery_spt +hipStreamGetFlags_spt +hipStreamGetPriority_spt +hipStreamWaitEvent_spt +hipEventRecord_spt +hipLaunchKernel_spt +hipLaunchCooperativeKernel_spt +hipStreamWriteValue32 +hipStreamWriteValue64 +hipStreamWaitValue32 +hipStreamWaitValue64 +hipDeviceSetLimit +hipGetStreamDeviceId +hipGraphLaunch_spt +hipStreamBeginCapture_spt +hipStreamEndCapture_spt +hipStreamIsCapturing_spt +hipStreamGetCaptureInfo_spt +hipStreamGetCaptureInfo_v2_spt +hipStreamAddCallback_spt +hipMemsetAsync_spt +hipMemset2DAsync_spt +hipMemset3DAsync_spt +hipMemcpy3DAsync_spt +hipMemcpy2DAsync_spt +hipMemcpyFromSymbolAsync_spt +hipMemcpyToSymbolAsync_spt +hipMemcpyFromArray_spt +hipMemcpy2DToArray_spt +hipMemcpy2DFromArrayAsync_spt +hipMemcpy2DToArrayAsync_spt +hipDrvGetErrorName +hipDrvGetErrorString +hipUserObjectCreate +hipUserObjectRelease +hipUserObjectRetain +hipGraphRetainUserObject +hipGraphReleaseUserObject +hipLaunchHostFunc +hipLaunchHostFunc_spt +hipGraphDebugDotPrint +hipGraphKernelNodeCopyAttributes +hipGraphNodeGetEnabled +hipGraphNodeSetEnabled +hipGraphUpload +hipGraphAddMemAllocNode +hipGraphMemAllocNodeGetParams +hipGraphAddMemFreeNode +hipGraphMemFreeNodeGetParams diff --git a/projects/clr/hipamd/src/cl_d3d10.cpp b/projects/clr/hipamd/src/cl_d3d10.cpp new file mode 100644 index 0000000000..692b26cb7e --- /dev/null +++ b/projects/clr/hipamd/src/cl_d3d10.cpp @@ -0,0 +1,1450 @@ +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifdef _WIN32 + +#include "top.hpp" + +#include "cl_common.hpp" +#include "cl_d3d10_amd.hpp" +#include "platform/command.hpp" + +#include +#include + + +/*! \addtogroup API + * @{ + * + * \addtogroup CL_D3D10_Interops + * + * This section discusses OpenCL functions that allow applications to use Direct3D 10 + * resources (buffers/textures) as OpenCL memory objects. This allows efficient sharing of + * data between OpenCL and Direct3D 10. The OpenCL API can be used to execute kernels that + * read and/or write memory objects that are also the Direct3D resources. + * An OpenCL image object can be created from a D3D10 texture object. An + * OpenCL buffer object can be created from a D3D10 buffer object (index/vertex). + * + * @} + * \addtogroup clGetDeviceIDsFromD3D10KHR + * @{ + */ + +RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D10KHR, + (cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void* d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + cl_int errcode; + ID3D10Device* d3d10_device = NULL; + cl_device_id* gpu_devices; + cl_uint num_gpu_devices = 0; + bool create_d3d10Device = false; + static const bool VALIDATE_ONLY = true; + HMODULE d3d10Module = NULL; + + if (platform != NULL && platform != AMD_PLATFORM) { + LogWarning("\"platrform\" is not a valid AMD platform"); + return CL_INVALID_PLATFORM; + } + if (((num_entries > 0 || num_devices == NULL) && devices == NULL) || + (num_entries == 0 && devices != NULL)) { + return CL_INVALID_VALUE; + } + // Get GPU devices + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } + + if (!num_gpu_devices) { + *not_null(num_devices) = 0; + return CL_DEVICE_NOT_FOUND; + } + + switch (d3d_device_source) { + case CL_D3D10_DEVICE_KHR: + d3d10_device = static_cast(d3d_object); + break; + case CL_D3D10_DXGI_ADAPTER_KHR: { + typedef HRESULT(WINAPI * LPD3D10CREATEDEVICE)(IDXGIAdapter*, D3D10_DRIVER_TYPE, HMODULE, UINT, + UINT32, ID3D10Device**); + static LPD3D10CREATEDEVICE dynamicD3D10CreateDevice = NULL; + + d3d10Module = LoadLibrary("D3D10.dll"); + if (d3d10Module == NULL) { + return CL_INVALID_PLATFORM; + } + + dynamicD3D10CreateDevice = + (LPD3D10CREATEDEVICE)GetProcAddress(d3d10Module, "D3D10CreateDevice"); + + IDXGIAdapter* dxgi_adapter = static_cast(d3d_object); + HRESULT hr = dynamicD3D10CreateDevice(dxgi_adapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, + D3D10_SDK_VERSION, &d3d10_device); + if (SUCCEEDED(hr) && (NULL != d3d10_device)) { + create_d3d10Device = true; + } else { + FreeLibrary(d3d10Module); + return CL_INVALID_VALUE; + } + } break; + default: + LogWarning("\"d3d_device_source\" is invalid"); + return CL_INVALID_VALUE; + } + + switch (d3d_device_set) { + case CL_PREFERRED_DEVICES_FOR_D3D10_KHR: + case CL_ALL_DEVICES_FOR_D3D10_KHR: { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); + + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + break; + } + + void* external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; + external_device[amd::Context::DeviceFlagIdx::D3D10DeviceKhrIdx] = d3d10_device; + + std::vector compatible_devices; + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(amd::Context::Flags::D3D10DeviceKhr, external_device, + NULL, VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + if (compatible_devices.size() == 0) { + *not_null(num_devices) = 0; + errcode = CL_DEVICE_NOT_FOUND; + break; + } + + auto it = compatible_devices.cbegin(); + cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); + + while (compatible_count--) { + *devices++ = as_cl(*it++); + --num_entries; + } + while (num_entries--) { + *devices++ = (cl_device_id)0; + } + + *not_null(num_devices) = (cl_uint)compatible_devices.size(); + } break; + + default: + LogWarning("\"d3d_device_set\" is invalid"); + errcode = CL_INVALID_VALUE; + } + + if (create_d3d10Device) { + d3d10_device->Release(); + FreeLibrary(d3d10Module); + } + return errcode; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromD3D10BufferKHR + * @{ + */ + +/*! \brief Creates an OpenCL buffer object from a Direct3D 10 resource. + * + * \param context is a valid OpenCL context. + * + * \param flags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param pD3DResource is a valid pointer to a D3D10 resource of type ID3D10Buffer. + * + * \return valid non-zero OpenCL buffer object and \a errcode_ret is set + * to CL_SUCCESS if the buffer object is created successfully. It returns a NULL + * value with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 10 + * interoperatbility has not been initialized between context and the ID3D10Device + * from which pD3DResource was created. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D10Buffer. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r33? + */ + +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10BufferKHR, + (cl_context context, cl_mem_flags flags, ID3D10Buffer* pD3DResource, + cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + return ( + amd::clCreateBufferFromD3D10ResourceAMD(*as_amd(context), flags, pD3DResource, errcode_ret)); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateImageFromD3D10Resource + * @{ + */ + +/*! \brief Create an OpenCL 2D or 3D image object from a D3D10 resource. + * + * \param context is a valid OpenCL context. + * + * \param flags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param pD3DResource is a valid pointer to a D3D10 resource of type + * ID3D10Texture2D, ID3D10Texture2D, or ID3D10Texture3D. + * If pD3DResource is of type ID3D10Texture1D then the created image object + * will be a 1D mipmapped image object. + * If pD3DResource is of type ID3D10Texture2D and was not created with flag + * D3D10_RESOURCE_MISC_TEXTURECUBE then the created image object will be a + * 2D mipmapped image object. + * If pD3DResource is of type ID3D10Texture2D and was created with flag + * D3D10_RESOURCE_MISC_TEXTURECUBE then the created image object will be + * a cubemap mipmapped image object. + * errocde_ret returns CL_INVALID_D3D_RESOURCE if an OpenCL memory object has + * already been created from pD3DResource in context. + * If pD3DResource is of type ID3D10Texture3D then the created image object will + * be a 3D mipmapped imageobject. + * + * \return valid non-zero OpenCL image object and \a errcode_ret is set + * to CL_SUCCESS if the image object is created successfully. It returns a NULL + * value with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 10 + * interoperatbility has not been initialized between context and the ID3D10Device + * from which pD3DResource was created. + * - CL_INVALID_VALUE if values specified in \a flags are not valid. + * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D10Texture1D, + * ID3D10Texture2D, or ID3D10Texture3D. + * - CL_INVALID_D3D_RESOURCE if an OpenCL memory object has already been created + * from \a pD3DResource in context. + * - CL_INVALID_IMAGE_FORMAT if the Direct3D 10 texture format does not map + * to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r48? + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D10Resource, + (cl_context context, cl_mem_flags flags, ID3D10Resource* pD3DResource, + UINT subresource, int* errcode_ret, UINT dimension)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + + // Verify context init'ed for interop + ID3D10Device* pDev; + pD3DResource->GetDevice(&pDev); + if (pDev == NULL) { + *not_null(errcode_ret) = CL_INVALID_D3D10_DEVICE_KHR; + LogWarning("Cannot retrieve D3D10 device from D3D10 resource"); + return (cl_mem)0; + } + pDev->Release(); + if (!((*as_amd(context)).info().flags_ & amd::Context::D3D10DeviceKhr)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from D3D10 device"); + return (cl_mem)0; + } + + // Check for image support + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } + + switch (dimension) { +#if 0 + case 1: + return(amd::clCreateImage1DFromD3D10ResourceAMD( + *as_amd(context), + flags, + pD3DResource, + subresource, + errcode_ret)); +#endif // 0 + case 2: + return (amd::clCreateImage2DFromD3D10ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); + case 3: + return (amd::clCreateImage3DFromD3D10ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); + default: + break; + } + + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromD3D10Texture2DKHR + * @{ + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture2DKHR, + (cl_context context, cl_mem_flags flags, ID3D10Texture2D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D10Resource(context, flags, resource, subresource, errcode_ret, 2); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromD3D10Texture3DKHR + * @{ + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture3DKHR, + (cl_context context, cl_mem_flags flags, ID3D10Texture3D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D10Resource(context, flags, resource, subresource, errcode_ret, 3); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueAcquireD3D10ObjectsKHR + * @{ + */ +RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D10ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueReleaseD3D10ObjectsKHR + * @{ + */ +RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D10ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR); +} +RUNTIME_EXIT + + +// +// +// namespace amd +// +// +namespace amd { +/*! @} + * \addtogroup CL-D3D10 interop helper functions + * @{ + */ + + +//******************************************************************* +// +// Internal implementation of CL API functions +// +//******************************************************************* +// +// clCreateBufferFromD3D10ResourceAMD +// +cl_mem clCreateBufferFromD3D10ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D10Resource* pD3DResource, int* errcode_ret) { + // Verify pD3DResource is a buffer + D3D10_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D10_RESOURCE_DIMENSION_BUFFER) { + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; + } + + D3D10Object obj; + int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, 0, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + BufferD3D10* pBufferD3D10 = new (amdContext) BufferD3D10(amdContext, flags, obj); + if (!pBufferD3D10) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pBufferD3D10->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferD3D10->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pBufferD3D10); +} +#if 0 +// There is no support for 1D images in the base imagee code +// +// clCreateImage1DFromD3D10ResourceAMD +// +cl_mem clCreateImage1DFromD3D10ResourceAMD( + Context& amdContext, + cl_mem_flags flags, + ID3D10Resource* pD3DResource, + UINT subresource, + int* errcode_ret) +{ + + // Verify the resource is a 1D texture + D3D10_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if(rType != D3D10_RESOURCE_DIMENSION_TEXTURE1D) { + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem) 0; + } + + D3D10Object obj; + int errcode = D3D10Object::initD3D10Object(pD3DResource, subresource, obj); + if(CL_SUCCESS != errcode) + { + *not_null(errcode_ret) = errcode; + return (cl_mem) 0; + } + + Image1DD3D10 *pImage1DD3D10 = new Image1DD3D10(amdContext, flags, obj); + if(!pImage1DD3D10) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem) 0; + } + if (!pImage1DD3D10->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage1DD3D10->release(); + return (cl_mem) 0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage1DD3D10); +} +#endif + +// +// clCreateImage2DFromD3D10ResourceAMD +// +cl_mem clCreateImage2DFromD3D10ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D10Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D10_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) { + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; + } + + D3D10Object obj; + int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + Image2DD3D10* pImage2DD3D10 = new (amdContext) Image2DD3D10(amdContext, flags, obj); + if (!pImage2DD3D10) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D10->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D10->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D10); +} + +// +// clCreateImage2DFromD3D10ResourceAMD +// +cl_mem clCreateImage3DFromD3D10ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D10Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D10_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE3D) { + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; + } + + D3D10Object obj; + int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + Image3DD3D10* pImage3DD3D10 = new (amdContext) Image3DD3D10(amdContext, flags, obj); + if (!pImage3DD3D10) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage3DD3D10->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage3DD3D10->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage3DD3D10); +} + +// +// Helper function SyncD3D10Objects +// +void SyncD3D10Objects(std::vector& memObjects) { + Memory*& mem = memObjects.front(); + if (!mem) { + LogWarning("\nNULL memory object\n"); + return; + } + InteropObject* interop = mem->getInteropObj(); + if (!interop) { + LogWarning("\nNULL interop object\n"); + return; + } + D3D10Object* d3d10Obj = interop->asD3D10Object(); + if (!d3d10Obj) { + LogWarning("\nNULL D3D10 object\n"); + return; + } + ID3D10Query* query = d3d10Obj->getQuery(); + if (!query) { + LogWarning("\nNULL ID3D10Query\n"); + return; + } + query->End(); + BOOL data = FALSE; + while (S_OK != query->GetData(&data, sizeof(BOOL), 0)) { + } +} + +// +// Class D3D10Object implementation +// +size_t D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) { + size_t bytesPerPixel; + + switch (dxgiFmt) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + bytesPerPixel = 16; + break; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + bytesPerPixel = 12; + break; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + bytesPerPixel = 8; + break; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + bytesPerPixel = 4; + break; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + bytesPerPixel = 2; + break; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_R1_UNORM: + bytesPerPixel = 1; + break; + + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + // Less than 1 byte per pixel - needs special consideration + bytesPerPixel = 0; + break; + + default: + bytesPerPixel = 0; + _ASSERT(FALSE); + break; + } + return bytesPerPixel; +} + +cl_image_format D3D10Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt) { + cl_image_format fmt; + + //! @todo [odintsov]: add real fmt conversion from DXGI to CL + fmt.image_channel_order = 0; // CL_RGBA; + fmt.image_channel_data_type = 0; // CL_UNSIGNED_INT8; + + switch (dxgiFmt) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R32G32B32A32_FLOAT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32G32B32A32_UINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32B32A32_SINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + fmt.image_channel_order = CL_RGB; + break; + + case DXGI_FORMAT_R32G32B32_FLOAT: + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32G32B32_UINT: + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32B32_SINT: + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R16G16B16A16_FLOAT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case DXGI_FORMAT_R16G16B16A16_UNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16G16B16A16_UINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; + + case DXGI_FORMAT_R16G16B16A16_SNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; + + case DXGI_FORMAT_R16G16B16A16_SINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; + + case DXGI_FORMAT_R32G32_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_R32G32_FLOAT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32G32_UINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32_SINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R32G8X24_TYPELESS: + break; + + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + break; + + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + break; + + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + break; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R10G10B10A2_UNORM: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R10G10B10A2_UINT: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R11G11B10_FLOAT: + fmt.image_channel_order = CL_RGB; + break; + + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_UINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_SNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_SINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; + + case DXGI_FORMAT_R16G16_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_R16G16_FLOAT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case DXGI_FORMAT_R16G16_UNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16G16_UINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; + + case DXGI_FORMAT_R16G16_SNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; + + case DXGI_FORMAT_R16G16_SINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; + + case DXGI_FORMAT_R32_TYPELESS: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_D32_FLOAT: + break; + + case DXGI_FORMAT_R32_FLOAT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32_UINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32_SINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R24G8_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_D24_UNORM_S8_UINT: + break; + + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + break; + + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + break; + + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + break; + + case DXGI_FORMAT_R8G8_B8G8_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_G8R8_G8B8_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_B8G8R8X8_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_R8G8_UNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8_UINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + + case DXGI_FORMAT_R8G8_SNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; + + case DXGI_FORMAT_R8G8_SINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; + + case DXGI_FORMAT_R16_TYPELESS: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_R16_FLOAT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case DXGI_FORMAT_D16_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16_UNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16_UINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; + + case DXGI_FORMAT_R16_SNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; + + case DXGI_FORMAT_R16_SINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; + + case DXGI_FORMAT_B5G6R5_UNORM: + fmt.image_channel_data_type = CL_UNORM_SHORT_565; + break; + + case DXGI_FORMAT_B5G5R5A1_UNORM: + fmt.image_channel_order = CL_BGRA; + break; + + case DXGI_FORMAT_R8_TYPELESS: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_R8_UNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8_UINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + + case DXGI_FORMAT_R8_SNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; + + case DXGI_FORMAT_R8_SINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; + + case DXGI_FORMAT_A8_UNORM: + fmt.image_channel_order = CL_A; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R1_UNORM: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + break; + + default: + _ASSERT(FALSE); + break; + } + + return fmt; +} + +size_t D3D10Object::getResourceByteSize() { + size_t bytes = 1; + + //! @todo [odintsov]: take into consideration the mip level?! + + switch (objDesc_.objDim_) { + case D3D10_RESOURCE_DIMENSION_BUFFER: + bytes = objDesc_.objSize_.ByteWidth; + break; + + case D3D10_RESOURCE_DIMENSION_TEXTURE3D: + bytes = objDesc_.objSize_.Depth; + + case D3D10_RESOURCE_DIMENSION_TEXTURE2D: + bytes *= objDesc_.objSize_.Height; + + case D3D10_RESOURCE_DIMENSION_TEXTURE1D: + bytes *= objDesc_.objSize_.Width * getElementBytes(); + break; + + default: + LogError("getResourceByteSize: unknown type of D3D10 resource"); + bytes = 0; + break; + } + return bytes; +} + +int D3D10Object::initD3D10Object(const Context& amdContext, ID3D10Resource* pRes, UINT subres, + D3D10Object& obj) { + ID3D10Device* pDev; + HRESULT hr; + ScopedLock sl(resLock_); + + // Check if this ressource has already been used for interop + for (const auto& it : resources_) { + if (it.first == (void*)pRes && it.second == subres) { + return CL_INVALID_D3D10_RESOURCE_KHR; + } + } + + (obj.pD3D10Res_ = pRes)->GetDevice(&pDev); + + if (!pDev) { + return CL_INVALID_D3D10_DEVICE_KHR; + } + + D3D10_QUERY_DESC desc = {D3D10_QUERY_EVENT, 0}; + pDev->CreateQuery(&desc, &obj.pQuery_); + +#define SET_SHARED_FLAGS() \ + { \ + obj.pD3D10ResOrig_ = obj.pD3D10Res_; \ + memcpy(&obj.objDescOrig_, &obj.objDesc_, sizeof(D3D10ObjDesc_t)); \ + /* @todo - Check device type and select right usage for resource */ \ + /* For now get only DPU path, CPU path for buffers */ \ + /* will not worl on DEFAUL resources */ \ + /*desc.Usage = D3D10_USAGE_STAGING;*/ \ + desc.Usage = D3D10_USAGE_DEFAULT; \ + desc.MiscFlags = D3D10_RESOURCE_MISC_SHARED; \ + desc.CPUAccessFlags = 0; \ + } + +#define STORE_SHARED_FLAGS(restype) \ + { \ + if (S_OK == hr && obj.pD3D10Res_) { \ + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; \ + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ + } else { \ + LogError("\nCannot create shared " #restype "\n"); \ + return CL_INVALID_D3D10_RESOURCE_KHR; \ + } \ + } + +#define SET_BINDING() \ + { \ + switch (desc.Format) { \ + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \ + case DXGI_FORMAT_D32_FLOAT: \ + case DXGI_FORMAT_D24_UNORM_S8_UINT: \ + case DXGI_FORMAT_D16_UNORM: \ + desc.BindFlags = D3D10_BIND_DEPTH_STENCIL; \ + break; \ + default: \ + desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET; \ + break; \ + } \ + } + + pRes->GetType(&obj.objDesc_.objDim_); + + // Init defaults + obj.objDesc_.objSize_.Height = 1; + obj.objDesc_.objSize_.Depth = 1; + obj.objDesc_.mipLevels_ = 1; + obj.objDesc_.arraySize_ = 1; + obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN; + obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault; + + switch (obj.objDesc_.objDim_) { + case D3D10_RESOURCE_DIMENSION_BUFFER: // = 1, + { + D3D10_BUFFER_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Buffer without shared handle - create + // a duplicate with shared handle to provide for CAL + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET; + hr = pDev->CreateBuffer(&desc, NULL, (ID3D10Buffer**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Buffer); + } + } break; + + case D3D10_RESOURCE_DIMENSION_TEXTURE1D: // = 2, + { + D3D10_TEXTURE1D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.ArraySize; + } + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Texture1D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D10_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture1D(&desc, NULL, (ID3D10Texture1D**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Texture1D); + } + } break; + + case D3D10_RESOURCE_DIMENSION_TEXTURE2D: // = 3, + { + D3D10_TEXTURE2D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.MipLevels; + } + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Texture2D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D10_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture2D(&desc, NULL, (ID3D10Texture2D**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Texture2D); + } + } break; + + case D3D10_RESOURCE_DIMENSION_TEXTURE3D: // = 4 + { + D3D10_TEXTURE3D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + desc.Depth >>= miplevel; + if (!desc.Depth) { + desc.Depth = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.objSize_.Depth = desc.Depth; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Texture3D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (obj.objDesc_.mipLevels_ > 1) + obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D10_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + hr = pDev->CreateTexture3D(&desc, NULL, (ID3D10Texture3D**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Texture3D); + } + } break; + + default: + LogError("unknown type of D3D10 resource"); + return CL_INVALID_D3D10_RESOURCE_KHR; + } + obj.subRes_ = subres; + pDev->Release(); + // Check for CL format compatibilty + if (obj.objDesc_.objDim_ != D3D10_RESOURCE_DIMENSION_BUFFER) { + cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_); + amd::Image::Format imageFormat(clFmt); + if (!imageFormat.isSupported(amdContext)) { + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + } + } + resources_.push_back({pRes, subres}); + return CL_SUCCESS; +} + +bool D3D10Object::copyOrigToShared() { + // Don't copy if there is no orig + if (NULL == getD3D10ResOrig()) return true; + + ID3D10Device* d3dDev; + pD3D10Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D10 device from D3D10 resource\n"); + return false; + } + // Any usage source can be read by GPU + d3dDev->CopySubresourceRegion(pD3D10Res_, 0, 0, 0, 0, pD3D10ResOrig_, subRes_, NULL); + + // Flush D3D queues and make sure D3D stuff is finished + d3dDev->Flush(); + pQuery_->End(); + BOOL data = FALSE; + while ((S_OK != pQuery_->GetData(&data, sizeof(BOOL), 0)) || (data != TRUE)) { + } + + d3dDev->Release(); + return true; +} + +bool D3D10Object::copySharedToOrig() { + // Don't copy if there is no orig + if (NULL == getD3D10ResOrig()) return true; + + ID3D10Device* d3dDev; + pD3D10Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D10 device from D3D10 resource\n"); + return false; + } + + d3dDev->CopySubresourceRegion(pD3D10ResOrig_, subRes_, 0, 0, 0, pD3D10Res_, 0, NULL); + + d3dDev->Release(); + return true; +} + +std::vector> D3D10Object::resources_; +Monitor D3D10Object::resLock_; + +// +// Class BufferD3D10 implementation +// +void BufferD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +// +// Class Image1DD3D10 implementation +// + +void Image1DD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image1DD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +// +// Class Image2DD3D10 implementation +// + +void Image2DD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image2DD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +// +// Class Image3DD3D10 implementation +// +void Image3DD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image3DD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +} // namespace amd + +#endif //_WIN32 diff --git a/projects/clr/hipamd/src/cl_d3d11.cpp b/projects/clr/hipamd/src/cl_d3d11.cpp new file mode 100644 index 0000000000..9ad60cd2ab --- /dev/null +++ b/projects/clr/hipamd/src/cl_d3d11.cpp @@ -0,0 +1,1570 @@ +/* Copyright (c) 2009 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifdef _WIN32 + +#include "top.hpp" + +#include "cl_d3d11_amd.hpp" +#include "platform/command.hpp" + +#include +#include + +/*! \addtogroup API + * @{ + * + * \addtogroup CL_D3D11_Interops + * + * This section discusses OpenCL functions that allow applications to use Direct3D 11 + * resources (buffers/textures) as OpenCL memory objects. This allows efficient sharing of + * data between OpenCL and Direct3D 11. The OpenCL API can be used to execute kernels that + * read and/or write memory objects that are also the Direct3D resources. + * An OpenCL image object can be created from a D3D11 texture object. An + * OpenCL buffer object can be created from a D3D11 buffer object (index/vertex). + * + * @} + * \addtogroup clGetDeviceIDsFromD3D11KHR + * @{ + */ + +RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D11KHR, + (cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void* d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + cl_int errcode; + ID3D11Device* d3d11_device = NULL; + cl_device_id* gpu_devices; + cl_uint num_gpu_devices = 0; + bool create_d3d11Device = false; + static const bool VALIDATE_ONLY = true; + HMODULE d3d11Module = NULL; + + if (platform != NULL && platform != AMD_PLATFORM) { + LogWarning("\"platrform\" is not a valid AMD platform"); + return CL_INVALID_PLATFORM; + } + if (((num_entries > 0 || num_devices == NULL) && devices == NULL) || + (num_entries == 0 && devices != NULL)) { + return CL_INVALID_VALUE; + } + // Get GPU devices + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } + + if (!num_gpu_devices) { + *not_null(num_devices) = 0; + return CL_DEVICE_NOT_FOUND; + } + + switch (d3d_device_source) { + case CL_D3D11_DEVICE_KHR: + d3d11_device = static_cast(d3d_object); + break; + case CL_D3D11_DXGI_ADAPTER_KHR: { + static PFN_D3D11_CREATE_DEVICE dynamicD3D11CreateDevice = NULL; + + d3d11Module = LoadLibrary("D3D11.dll"); + if (d3d11Module == NULL) { + return CL_INVALID_PLATFORM; + } + + dynamicD3D11CreateDevice = + (PFN_D3D11_CREATE_DEVICE)GetProcAddress(d3d11Module, "D3D11CreateDevice"); + + IDXGIAdapter* dxgi_adapter = static_cast(d3d_object); + D3D_FEATURE_LEVEL requestedFeatureLevels[] = {D3D_FEATURE_LEVEL_10_0}; + D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; + HRESULT hr = dynamicD3D11CreateDevice(dxgi_adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, + requestedFeatureLevels, 1, D3D11_SDK_VERSION, + &d3d11_device, &featureLevel, NULL); + if (SUCCEEDED(hr) && (NULL != d3d11_device)) { + create_d3d11Device = true; + } else { + FreeLibrary(d3d11Module); + return CL_INVALID_VALUE; + } + } break; + default: + LogWarning("\"d3d_device_source\" is invalid"); + return CL_INVALID_VALUE; + } + + switch (d3d_device_set) { + case CL_PREFERRED_DEVICES_FOR_D3D11_KHR: + case CL_ALL_DEVICES_FOR_D3D11_KHR: { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); + + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + break; + } + + std::vector compatible_devices; + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + void* external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; + external_device[amd::Context::DeviceFlagIdx::D3D11DeviceKhrIdx] = d3d11_device; + + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(amd::Context::Flags::D3D11DeviceKhr, external_device, + NULL, VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + if (compatible_devices.size() == 0) { + *not_null(num_devices) = 0; + errcode = CL_DEVICE_NOT_FOUND; + break; + } + + auto it = compatible_devices.cbegin(); + cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); + + while (compatible_count--) { + *devices++ = as_cl(*it++); + --num_entries; + } + while (num_entries--) { + *devices++ = (cl_device_id)0; + } + + *not_null(num_devices) = (cl_uint)compatible_devices.size(); + } break; + + default: + LogWarning("\"d3d_device_set\" is invalid"); + errcode = CL_INVALID_VALUE; + } + + if (create_d3d11Device) { + d3d11_device->Release(); + FreeLibrary(d3d11Module); + } + return errcode; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromD3D11BufferKHR + * @{ + */ + +/*! \brief Creates an OpenCL buffer object from a Direct3D 10 resource. + * + * \param context is a valid OpenCL context. + * + * \param flags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param pD3DResource is a valid pointer to a D3D11 resource of type ID3D11Buffer. + * + * \return valid non-zero OpenCL buffer object and \a errcode_ret is set + * to CL_SUCCESS if the buffer object is created successfully. It returns a NULL + * value with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 10 + * interoperatbility has not been initialized between context and the ID3D11Device + * from which pD3DResource was created. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D11Buffer. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r33? + */ + +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11BufferKHR, + (cl_context context, cl_mem_flags flags, ID3D11Buffer* pD3DResource, + cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + return ( + amd::clCreateBufferFromD3D11ResourceAMD(*as_amd(context), flags, pD3DResource, errcode_ret)); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateImageFromD3D11Resource + * @{ + */ + +/*! \brief Create an OpenCL 2D or 3D image object from a D3D11 resource. + * + * \param context is a valid OpenCL context. + * + * \param flags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param pD3DResource is a valid pointer to a D3D11 resource of type + * ID3D11Texture2D, ID3D11Texture2D, or ID3D11Texture3D. + * If pD3DResource is of type ID3D11Texture1D then the created image object + * will be a 1D mipmapped image object. + * If pD3DResource is of type ID3D11Texture2D and was not created with flag + * D3D11_RESOURCE_MISC_TEXTURECUBE then the created image object will be a + * 2D mipmapped image object. + * If pD3DResource is of type ID3D11Texture2D and was created with flag + * D3D11_RESOURCE_MISC_TEXTURECUBE then the created image object will be + * a cubemap mipmapped image object. + * errocde_ret returns CL_INVALID_D3D_RESOURCE if an OpenCL memory object has + * already been created from pD3DResource in context. + * If pD3DResource is of type ID3D11Texture3D then the created image object will + * be a 3D mipmapped imageobject. + * + * \return valid non-zero OpenCL image object and \a errcode_ret is set + * to CL_SUCCESS if the image object is created successfully. It returns a NULL + * value with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 11 + * interoperatbility has not been initialized between context and the ID3D11Device + * from which pD3DResource was created. + * - CL_INVALID_VALUE if values specified in \a flags are not valid. + * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D11Texture1D, + * ID3D11Texture2D, or ID3D11Texture3D. + * - CL_INVALID_D3D_RESOURCE if an OpenCL memory object has already been created + * from \a pD3DResource in context. + * - CL_INVALID_IMAGE_FORMAT if the Direct3D 11 texture format does not map + * to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r48? + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D11Resource, + (cl_context context, cl_mem_flags flags, ID3D11Resource* pD3DResource, + UINT subresource, int* errcode_ret, UINT dimension)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + + // Verify context init'ed for interop + ID3D11Device* pDev; + pD3DResource->GetDevice(&pDev); + if (pDev == NULL) { + *not_null(errcode_ret) = CL_INVALID_D3D11_DEVICE_KHR; + LogWarning("Cannot retrieve D3D11 device from D3D11 resource"); + return (cl_mem)0; + } + pDev->Release(); + if (!((*as_amd(context)).info().flags_ & amd::Context::D3D11DeviceKhr)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from D3D11 device"); + return (cl_mem)0; + } + + // Check for image support + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } + + switch (dimension) { +#if 0 + case 1: + return(amd::clCreateImage1DFromD3D11ResourceAMD( + *as_amd(context), + flags, + pD3DResource, + subresource, + errcode_ret)); +#endif // 0 + case 2: + return (amd::clCreateImage2DFromD3D11ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); + case 3: + return (amd::clCreateImage3DFromD3D11ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); + default: + break; + } + + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromD3D11Texture2DKHR + * @{ + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture2DKHR, + (cl_context context, cl_mem_flags flags, ID3D11Texture2D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D11Resource(context, flags, resource, subresource, errcode_ret, 2); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromD3D11Texture3DKHR + * @{ + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture3DKHR, + (cl_context context, cl_mem_flags flags, ID3D11Texture3D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D11Resource(context, flags, resource, subresource, errcode_ret, 3); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueAcquireD3D11ObjectsKHR + * @{ + */ +RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D11ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueReleaseD3D11ObjectsKHR + * @{ + */ +RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D11ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetPlaneFromImageAMD + * @{ + */ +RUNTIME_ENTRY_RET(cl_mem, clGetPlaneFromImageAMD, + (cl_context context, cl_mem mem, cl_uint plane, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return 0; + } + if (mem == 0) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return 0; + } + if (!is_valid(mem)) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return 0; + } + amd::Memory* amdMem = as_amd(mem); + amd::Context& amdContext = *as_amd(context); + if (amdMem->getInteropObj() == NULL) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return 0; + } + amd::Image2DD3D11* pImage = reinterpret_cast(amdMem); + ID3D11Resource* pD3DResource = pImage->getD3D11Resource(); + // Verify the resource is a 2D texture + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } + + amd::D3D11Object obj; + int errcode = amd::D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj, plane); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + amd::Image2DD3D11* pImage2DD3D11 = + new (amdContext) amd::Image2DD3D11(amdContext, pImage->getMemFlags(), obj); + if (!pImage2DD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D11->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D11); +} +RUNTIME_EXIT + +// +// +// namespace amd +// +// +namespace amd { +/*! @} + * \addtogroup CL-D3D11 interop helper functions + * @{ + */ + + +//******************************************************************* +// +// Internal implementation of CL API functions +// +//******************************************************************* +// +// clCreateBufferFromD3D11ResourceAMD +// +cl_mem clCreateBufferFromD3D11ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D11Resource* pD3DResource, int* errcode_ret) { + // Verify pD3DResource is a buffer + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_BUFFER) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } + + D3D11Object obj; + int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + BufferD3D11* pBufferD3D11 = new (amdContext) BufferD3D11(amdContext, flags, obj); + if (!pBufferD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pBufferD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferD3D11->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pBufferD3D11); +} + +// +// clCreateImage2DFromD3D11ResourceAMD +// +cl_mem clCreateImage2DFromD3D11ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D11Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } + + D3D11Object obj; + int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + Image2DD3D11* pImage2DD3D11 = new (amdContext) Image2DD3D11(amdContext, flags, obj); + if (!pImage2DD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D11->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D11); +} + +// +// clCreateImage2DFromD3D11ResourceAMD +// +cl_mem clCreateImage3DFromD3D11ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D11Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE3D) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } + + D3D11Object obj; + int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + Image3DD3D11* pImage3DD3D11 = new (amdContext) Image3DD3D11(amdContext, flags, obj); + if (!pImage3DD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage3DD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage3DD3D11->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage3DD3D11); +} + +size_t D3D11Object::getResourceByteSize() { + size_t bytes = 1; + + //! @todo [odintsov]: take into consideration the mip level?! + + switch (objDesc_.objDim_) { + case D3D11_RESOURCE_DIMENSION_BUFFER: + bytes = objDesc_.objSize_.ByteWidth; + break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + bytes = objDesc_.objSize_.Depth; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + bytes *= objDesc_.objSize_.Height; + + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + bytes *= objDesc_.objSize_.Width * getElementBytes(); + break; + + default: + LogError("getResourceByteSize: unknown type of D3D11 resource"); + bytes = 0; + break; + } + return bytes; +} + +cl_uint D3D11Object::getMiscFlag() { + if ((objDesc_.dxgiFormat_ == DXGI_FORMAT_NV12) || + (objDesc_.dxgiFormat_ == DXGI_FORMAT_P010)) { + return 1; + } + else if (objDesc_.dxgiFormat_ == DXGI_FORMAT_YUY2) { + return 3; + } + return 0; +} + +int D3D11Object::initD3D11Object(const Context& amdContext, ID3D11Resource* pRes, UINT subres, + D3D11Object& obj, INT plane) { + ID3D11Device* pDev; + HRESULT hr; + ScopedLock sl(resLock_); + + // Check if this ressource has already been used for interop + for (const auto& it : resources_) { + if (it.first == (void*)pRes && it.second.first == subres && + it.second.second == plane) { + return CL_INVALID_D3D11_RESOURCE_KHR; + } + } + + (obj.pD3D11Res_ = pRes)->GetDevice(&pDev); + + if (!pDev) { + return CL_INVALID_D3D11_DEVICE_KHR; + } + + D3D11_QUERY_DESC desc = {D3D11_QUERY_EVENT, 0}; + pDev->CreateQuery(&desc, &obj.pQuery_); + +#define SET_SHARED_FLAGS() \ + { \ + obj.pD3D11ResOrig_ = obj.pD3D11Res_; \ + /* @todo - Check device type and select right usage for resource */ \ + /* For now get only DPU path, CPU path for buffers */ \ + /* will not worl on DEFAUL resources */ \ + /*desc.Usage = D3D11_USAGE_STAGING;*/ \ + desc.Usage = D3D11_USAGE_DEFAULT; \ + desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; \ + desc.CPUAccessFlags = 0; \ + } + +#define STORE_SHARED_FLAGS_BUFFER(restype) \ + { \ + if (S_OK == hr && obj.pD3D11Res_) { \ + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \ + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ + obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride; \ + } else { \ + LogError("\nCannot create shared " #restype "\n"); \ + return CL_INVALID_D3D11_RESOURCE_KHR; \ + } \ + } + +#define STORE_SHARED_FLAGS(restype) \ + { \ + if (S_OK == hr && obj.pD3D11Res_) { \ + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \ + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ + } else { \ + LogError("\nCannot create shared " #restype "\n"); \ + return CL_INVALID_D3D11_RESOURCE_KHR; \ + } \ + } + +#define SET_BINDING() \ + { \ + switch (desc.Format) { \ + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \ + case DXGI_FORMAT_D32_FLOAT: \ + case DXGI_FORMAT_D24_UNORM_S8_UINT: \ + case DXGI_FORMAT_D16_UNORM: \ + desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; \ + break; \ + default: \ + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; \ + break; \ + } \ + } + + pRes->GetType(&obj.objDesc_.objDim_); + + // Init defaults + obj.objDesc_.objSize_.Height = 1; + obj.objDesc_.objSize_.Depth = 1; + obj.objDesc_.mipLevels_ = 1; + obj.objDesc_.arraySize_ = 1; + obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN; + obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault; + + switch (obj.objDesc_.objDim_) { + case D3D11_RESOURCE_DIMENSION_BUFFER: // = 1, + { + D3D11_BUFFER_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride; + // Handle D3D11Buffer without shared handle - create + // a duplicate with shared handle to provide for CAL + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + hr = pDev->CreateBuffer(&desc, NULL, (ID3D11Buffer**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS_BUFFER(ID3D11Buffer); + } + } break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: // = 2, + { + D3D11_TEXTURE1D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.ArraySize; + } + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D11Texture1D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D11_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture1D(&desc, NULL, (ID3D11Texture1D**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS(ID3D11Texture1D); + } + } break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: // = 3, + { + D3D11_TEXTURE2D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.MipLevels; + } + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + + // Handle D3D11Texture2D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D11_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture2D(&desc, NULL, (ID3D11Texture2D**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS(ID3D11Texture2D); + } + + if ((desc.Format == DXGI_FORMAT_NV12) || (desc.Format == DXGI_FORMAT_P010)) { + if (plane == -1) { + obj.objDesc_.objSize_.Height += obj.objDesc_.objSize_.Height / 2; + } + if (plane == 1) { + obj.objDesc_.objSize_.Width /= 2; + obj.objDesc_.objSize_.Height /= 2; + } + } + // RGBA8 covers 2 pixels, thus divide width by 2 + if (desc.Format == DXGI_FORMAT_YUY2) { + obj.objDesc_.objSize_.Width /= 2; + } + } break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: // = 4 + { + D3D11_TEXTURE3D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + desc.Depth >>= miplevel; + if (!desc.Depth) { + desc.Depth = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.objSize_.Depth = desc.Depth; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D11Texture3D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (obj.objDesc_.mipLevels_ > 1) + obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D11_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + hr = pDev->CreateTexture3D(&desc, NULL, (ID3D11Texture3D**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS(ID3D11Texture3D); + } + } break; + + default: + LogError("unknown type of D3D11 resource"); + return CL_INVALID_D3D11_RESOURCE_KHR; + } + obj.subRes_ = subres; + obj.plane_ = plane; + pDev->Release(); + // Check for CL format compatibilty + if (obj.objDesc_.objDim_ != D3D11_RESOURCE_DIMENSION_BUFFER) { + cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_, plane); + amd::Image::Format imageFormat(clFmt); + if (!imageFormat.isSupported(amdContext)) { + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + } + } + resources_.push_back({pRes, {subres, plane}}); + return CL_SUCCESS; +} + +bool D3D11Object::copyOrigToShared() { + // Don't copy if there is no orig + if (NULL == getD3D11ResOrig()) return true; + + ID3D11Device* d3dDev; + pD3D11Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D11 device from D3D11 resource\n"); + return false; + } + ID3D11DeviceContext* pImmediateContext = NULL; + d3dDev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + assert(pD3D11ResOrig_ != NULL); + // Any usage source can be read by GPU + pImmediateContext->CopySubresourceRegion(pD3D11Res_, 0, 0, 0, 0, pD3D11ResOrig_, subRes_, NULL); + + // Flush D3D queues and make sure D3D stuff is finished + { + ScopedLock sl(resLock_); // protect from multiple + pImmediateContext->Flush(); + pImmediateContext->End(pQuery_); + BOOL data = FALSE; + while (S_OK != pImmediateContext->GetData(pQuery_, &data, sizeof(BOOL), 0)) { + } + } + + pImmediateContext->Release(); + d3dDev->Release(); + return true; +} + +bool D3D11Object::copySharedToOrig() { + // Don't copy if there is no orig + if (NULL == getD3D11ResOrig()) return true; + + ID3D11Device* d3dDev; + pD3D11Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D11 device from D3D11 resource\n"); + return false; + } + ID3D11DeviceContext* pImmediateContext = NULL; + d3dDev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + assert(pD3D11ResOrig_); + pImmediateContext->CopySubresourceRegion(pD3D11ResOrig_, subRes_, 0, 0, 0, pD3D11Res_, 0, NULL); + pImmediateContext->Release(); + + d3dDev->Release(); + return true; +} + +std::vector>> D3D11Object::resources_; +Monitor D3D11Object::resLock_; + +// +// Class BufferD3D11 implementation +// +void BufferD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +// +// Class Image1DD3D11 implementation +// +void Image1DD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image1DD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +// +// Class Image2DD3D11 implementation +// + +void Image2DD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image2DD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +// +// Class Image3DD3D11 implementation +// +void Image3DD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image3DD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +// +// Helper function SyncD3D11Objects +// +void SyncD3D11Objects(std::vector& memObjects) { + Memory*& mem = memObjects.front(); + if (!mem) { + LogWarning("\nNULL memory object\n"); + return; + } + InteropObject* interop = mem->getInteropObj(); + if (!interop) { + LogWarning("\nNULL interop object\n"); + return; + } + D3D11Object* d3dObj = interop->asD3D11Object(); + if (!d3dObj) { + LogWarning("\nNULL D3D11 object\n"); + return; + } + ID3D11Query* query = d3dObj->getQuery(); + if (!query) { + LogWarning("\nNULL ID3D11Query\n"); + return; + } + ID3D11Device* d3dDev; + query->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D11 device from D3D11 resource\n"); + return; + } + ID3D11DeviceContext* pImmediateContext = NULL; + d3dDev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return; + } + pImmediateContext->Release(); + + // Flush D3D queues and make sure D3D stuff is finished + { + ScopedLock sl(d3dObj->getResLock()); + pImmediateContext->End(query); + BOOL data = FALSE; + while ((S_OK != pImmediateContext->GetData(query, &data, sizeof(BOOL), 0)) || (data != TRUE)) { + } + } + + d3dDev->Release(); +} + +// +// Class D3D11Object implementation +// +size_t D3D11Object::getElementBytes(DXGI_FORMAT dxgiFmt, cl_uint plane) { + size_t bytesPerPixel; + + switch (dxgiFmt) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + bytesPerPixel = 16; + break; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + bytesPerPixel = 12; + break; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + bytesPerPixel = 8; + break; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + + case DXGI_FORMAT_YUY2: + bytesPerPixel = 4; + break; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + bytesPerPixel = 2; + break; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_R1_UNORM: + bytesPerPixel = 1; + break; + + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + // Less than 1 byte per pixel - needs special consideration + bytesPerPixel = 0; + break; + case DXGI_FORMAT_NV12: + bytesPerPixel = 1; + if (plane == 1) { + bytesPerPixel = 2; + } + break; + case DXGI_FORMAT_P010: + bytesPerPixel = 2; + if (plane == 1) { + bytesPerPixel = 4; + } + break; + default: + bytesPerPixel = 0; + _ASSERT(FALSE); + break; + } + return bytesPerPixel; +} + +cl_image_format D3D11Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt, cl_uint plane) { + cl_image_format fmt; + + //! @todo [odintsov]: add real fmt conversion from DXGI to CL + fmt.image_channel_order = 0; // CL_RGBA; + fmt.image_channel_data_type = 0; // CL_UNSIGNED_INT8; + + switch (dxgiFmt) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R32G32B32A32_FLOAT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32G32B32A32_UINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32B32A32_SINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + fmt.image_channel_order = CL_RGB; + break; + + case DXGI_FORMAT_R32G32B32_FLOAT: + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32G32B32_UINT: + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32B32_SINT: + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R16G16B16A16_FLOAT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case DXGI_FORMAT_R16G16B16A16_UNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16G16B16A16_UINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; + + case DXGI_FORMAT_R16G16B16A16_SNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; + + case DXGI_FORMAT_R16G16B16A16_SINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; + + case DXGI_FORMAT_R32G32_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_R32G32_FLOAT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32G32_UINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32G32_SINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R32G8X24_TYPELESS: + break; + + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + break; + + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + break; + + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + break; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R10G10B10A2_UNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT_101010; + break; + + case DXGI_FORMAT_R10G10B10A2_UINT: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R11G11B10_FLOAT: + fmt.image_channel_order = CL_RGB; + break; + + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + fmt.image_channel_order = CL_RGBA; + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_YUY2: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_SNORM: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; + + case DXGI_FORMAT_R8G8B8A8_SINT: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; + + case DXGI_FORMAT_R16G16_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_R16G16_FLOAT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case DXGI_FORMAT_R16G16_UNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16G16_UINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; + + case DXGI_FORMAT_R16G16_SNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; + + case DXGI_FORMAT_R16G16_SINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; + + case DXGI_FORMAT_R32_TYPELESS: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_D32_FLOAT: + break; + + case DXGI_FORMAT_R32_FLOAT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case DXGI_FORMAT_R32_UINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; + + case DXGI_FORMAT_R32_SINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; + + case DXGI_FORMAT_R24G8_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_D24_UNORM_S8_UINT: + break; + + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + break; + + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + break; + + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + break; + + case DXGI_FORMAT_R8G8_B8G8_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_G8R8_G8B8_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_B8G8R8X8_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8_TYPELESS: + fmt.image_channel_order = CL_RG; + break; + + case DXGI_FORMAT_R8G8_UNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8G8_UINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + + case DXGI_FORMAT_R8G8_SNORM: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; + + case DXGI_FORMAT_R8G8_SINT: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; + + case DXGI_FORMAT_R16_TYPELESS: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_R16_FLOAT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case DXGI_FORMAT_D16_UNORM: + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16_UNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case DXGI_FORMAT_R16_UINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; + + case DXGI_FORMAT_R16_SNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; + + case DXGI_FORMAT_R16_SINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; + + case DXGI_FORMAT_B5G6R5_UNORM: + fmt.image_channel_data_type = CL_UNORM_SHORT_565; + break; + + case DXGI_FORMAT_B5G5R5A1_UNORM: + fmt.image_channel_order = CL_BGRA; + break; + + case DXGI_FORMAT_R8_TYPELESS: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_R8_UNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R8_UINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + + case DXGI_FORMAT_R8_SNORM: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; + + case DXGI_FORMAT_R8_SINT: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; + + case DXGI_FORMAT_A8_UNORM: + fmt.image_channel_order = CL_A; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case DXGI_FORMAT_R1_UNORM: + fmt.image_channel_order = CL_R; + break; + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + break; + case DXGI_FORMAT_NV12: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + if (plane == 1) { + fmt.image_channel_order = CL_RG; + } + break; + case DXGI_FORMAT_P010: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + if (plane == 1) { + fmt.image_channel_order = CL_RG; + } + break; + default: + _ASSERT(FALSE); + break; + } + + return fmt; +} + +} // namespace amd + +#endif //_WIN32 diff --git a/projects/clr/hipamd/src/cl_d3d9.cpp b/projects/clr/hipamd/src/cl_d3d9.cpp new file mode 100644 index 0000000000..1b78b68054 --- /dev/null +++ b/projects/clr/hipamd/src/cl_d3d9.cpp @@ -0,0 +1,854 @@ +/* Copyright (c) 2012 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifdef _WIN32 + +#include "top.hpp" + +#include "cl_d3d9_amd.hpp" +#include "platform/command.hpp" + +#include +#include + +#define D3DFMT_NV_12 static_cast(MAKEFOURCC('N', 'V', '1', '2')) +#define D3DFMT_P010 static_cast(MAKEFOURCC('P', '0', '1', '0')) +#define D3DFMT_YV_12 static_cast(MAKEFOURCC('Y', 'V', '1', '2')) +#define D3DFMT_YUY2 static_cast(MAKEFOURCC('Y', 'U', 'Y', '2')) + + +RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromDX9MediaAdapterKHR, + (cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr* media_adapters_type, void* media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + cl_int errcode; + // Accept an array of DX9 devices here as the spec mention of array of num_media_adapters size. + IDirect3DDevice9Ex** d3d9_device = static_cast(media_adapters); + cl_device_id* gpu_devices = NULL; + cl_uint num_gpu_devices = 0; + static const bool VALIDATE_ONLY = true; + + if (platform != NULL && platform != AMD_PLATFORM) { + LogWarning("\"platrform\" is not a valid AMD platform"); + return CL_INVALID_PLATFORM; + } + // check if input parameter are correct + if ((num_media_adapters == 0) || (media_adapters_type == NULL) || (media_adapters == NULL) || + (media_adapter_set != CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR && + media_adapter_set != CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR) || + (num_entries == 0 && devices != NULL)) { + return CL_INVALID_VALUE; + } + // Get GPU devices + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } + + if (!num_gpu_devices) { + *not_null(num_devices) = 0; + return CL_DEVICE_NOT_FOUND; + } + + switch (media_adapter_set) { + case CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR: + case CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR: { + gpu_devices = new cl_device_id[num_gpu_devices]; + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + break; + } + + std::vector compatible_devices; + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + amd::Context::Flags context_flag; + amd::Context::DeviceFlagIdx devIdx; + switch (media_adapters_type[i]) { + case CL_ADAPTER_D3D9_KHR: + context_flag = amd::Context::Flags::D3D9DeviceKhr; + devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceKhrIdx; + break; + case CL_ADAPTER_D3D9EX_KHR: + context_flag = amd::Context::Flags::D3D9DeviceEXKhr; + devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceEXKhrIdx; + break; + case CL_ADAPTER_DXVA_KHR: + context_flag = amd::Context::Flags::D3D9DeviceVAKhr; + devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceVAKhrIdx; + break; + } + + for (cl_uint j = 0; j < num_media_adapters; ++j) { + // Since there can be multiple DX9 adapters passed in the array we need to validate + // interopability with each. + void* external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; + external_device[devIdx] = d3d9_device[j]; + + if (is_valid(device) && (media_adapters_type[j] == CL_ADAPTER_D3D9EX_KHR) && + as_amd(device)->bindExternalDevice(context_flag, external_device, NULL, + VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + } + if (compatible_devices.size() == 0) { + *not_null(num_devices) = 0; + errcode = CL_DEVICE_NOT_FOUND; + break; + } + + auto it = compatible_devices.cbegin(); + cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); + + while (compatible_count--) { + *devices++ = as_cl(*it++); + --num_entries; + } + while (num_entries--) { + *devices++ = (cl_device_id)0; + } + + *not_null(num_devices) = (cl_uint)compatible_devices.size(); + } break; + + default: + LogWarning("\"d3d9_device_set\" is invalid"); + errcode = CL_INVALID_VALUE; + } + + delete[] gpu_devices; + return errcode; +} +RUNTIME_EXIT + +RUNTIME_ENTRY_RET(cl_mem, clCreateFromDX9MediaSurfaceKHR, + (cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void* surface_info, cl_uint plane, + cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + cl_dx9_surface_info_khr* cl_surf_info = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + if ((adapter_type != CL_ADAPTER_D3D9_KHR) && (adapter_type != CL_ADAPTER_D3D9EX_KHR) && + (adapter_type != CL_ADAPTER_DXVA_KHR)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return clMemObj; + } + + if (!surface_info) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + + cl_surf_info = (cl_dx9_surface_info_khr*)surface_info; + IDirect3DSurface9* pD3D9Resource = cl_surf_info->resource; + HANDLE shared_handle = cl_surf_info->shared_handle; + + if (!pD3D9Resource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"surface_info\" is a NULL pointer"); + return clMemObj; + } + + D3DSURFACE_DESC Desc; + pD3D9Resource->GetDesc(&Desc); + + if ((Desc.Format != D3DFMT_NV_12) && + (Desc.Format != D3DFMT_P010) && + (Desc.Format != D3DFMT_YV_12) && (plane != 0)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("The plane has to be Zero if the surface format is non-planar !"); + return clMemObj; + } + + // Check for image support + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } + // Verify the resource is a 2D image + return amd::clCreateImage2DFromD3D9ResourceAMD(*as_amd(context), flags, adapter_type, + cl_surf_info, plane, errcode_ret); +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clEnqueueAcquireDX9MediaSurfacesKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR); +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clEnqueueReleaseDX9MediaSurfacesKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR); +} +RUNTIME_EXIT + +// +// +// namespace amd +// +// +namespace amd { +/*! @} + * \addtogroup CL-D3D9 interop helper functions + * @{ + */ +// +// Class D3D9Object implementation +// +std::vector> D3D9Object::resources_; +Monitor D3D9Object::resLock_; + +// +// clCreateImage2DFromD3D9ResourceAMD +// +cl_mem clCreateImage2DFromD3D9ResourceAMD(Context& amdContext, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + cl_dx9_surface_info_khr* surface_info, cl_uint plane, + int* errcode_ret) { + cl_dx9_surface_info_khr* cl_surf_info = reinterpret_cast(surface_info); + IDirect3DSurface9* pD3D9Resource = cl_surf_info->resource; + HANDLE shared_handle = cl_surf_info->shared_handle; + + D3D9Object obj; + cl_int errcode = D3D9Object::initD3D9Object(amdContext, adapter_type, surface_info, plane, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } + + Image2DD3D9* pImage2DD3D9 = new (amdContext) Image2DD3D9(amdContext, flags, obj); + if (!pImage2DD3D9) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D9->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D9->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D9); +} + +// +// Helper function SyncD3D9Objects +// +void SyncD3D9Objects(std::vector& memObjects) { + Memory*& mem = memObjects.front(); + if (!mem) { + LogWarning("\nNULL memory object\n"); + return; + } + InteropObject* interop = mem->getInteropObj(); + if (!interop) { + LogWarning("\nNULL interop object\n"); + return; + } + D3D9Object* d3d9Obj = interop->asD3D9Object(); + if (!d3d9Obj) { + LogWarning("\nNULL D3D9 object\n"); + return; + } + IDirect3DQuery9* query = d3d9Obj->getQuery(); + if (!query) { + LogWarning("\nNULL IDirect3DQuery9\n"); + return; + } + ScopedLock sl(d3d9Obj->getResLock()); + query->Issue(D3DISSUE_END); + BOOL data = FALSE; + while (S_OK != query->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH)) { + } +} + +// +// Class D3D10Object implementation +// +size_t D3D9Object::getElementBytes(D3DFORMAT d3d9Format, cl_uint plane) { + size_t bytesPerPixel; + + switch (d3d9Format) { + case D3DFMT_UNKNOWN: + case D3DFMT_UYVY: + case D3DFMT_DXT1: + case D3DFMT_DXT2: + case D3DFMT_DXT3: + case D3DFMT_DXT4: + case D3DFMT_DXT5: + case D3DFMT_VERTEXDATA: + case D3DFMT_D32: + case D3DFMT_D15S1: + case D3DFMT_D24S8: + case D3DFMT_D24X8: + case D3DFMT_D24X4S4: + case D3DFMT_D16: + case D3DFMT_INDEX16: + case D3DFMT_INDEX32: + case D3DFMT_MULTI2_ARGB8: + case D3DFMT_CxV8U8: + // Less than 1 byte per pixel - needs special consideration + bytesPerPixel = 0; + break; + + case D3DFMT_R3G3B2: + case D3DFMT_P8: + case D3DFMT_A8: + case D3DFMT_L8: + case D3DFMT_A4L4: + bytesPerPixel = 1; + break; + + case D3DFMT_R16F: + case D3DFMT_R5G6B5: + case D3DFMT_X1R5G5B5: + case D3DFMT_A1R5G5B5: + case D3DFMT_A4R4G4B4: + case D3DFMT_A8R3G3B2: + case D3DFMT_X4R4G4B4: + case D3DFMT_A8P8: + case D3DFMT_A8L8: + case D3DFMT_V8U8: + case D3DFMT_L6V5U5: + case D3DFMT_D16_LOCKABLE: + case D3DFMT_L16: + bytesPerPixel = 2; + break; + + case D3DFMT_R8G8B8: + case D3DFMT_D24FS8: + bytesPerPixel = 3; + break; + + case D3DFMT_D32F_LOCKABLE: + case D3DFMT_A8R8G8B8: + case D3DFMT_R32F: + case D3DFMT_X8R8G8B8: + case D3DFMT_A2B10G10R10: + case D3DFMT_A8B8G8R8: + case D3DFMT_X8B8G8R8: + case D3DFMT_G16R16: + case D3DFMT_A2R10G10B10: + case D3DFMT_Q8W8V8U8: + case D3DFMT_X8L8V8U8: + case D3DFMT_V16U16: + case D3DFMT_A2W10V10U10: + case D3DFMT_R8G8_B8G8: + case D3DFMT_G8R8_G8B8: + case D3DFMT_G16R16F: + case D3DFMT_YUY2: + bytesPerPixel = 4; + break; + + case D3DFMT_G32R32F: + case D3DFMT_A16B16G16R16: + case D3DFMT_A16B16G16R16F: + case D3DFMT_Q16W16V16U16: + bytesPerPixel = 8; + break; + case D3DFMT_A32B32G32R32F: + bytesPerPixel = 16; + break; + //#if !defined(D3D_DISABLE_9EX) + // case D3DFMT_D32_LOCKABLE: + // case D3DFMT_S8_LOCKABLE: + //#endif // !D3D_DISABLE_9EX + case D3DFMT_NV_12: + if (plane == 0) { + bytesPerPixel = 1; + } else if (plane == 1) { + bytesPerPixel = 2; + } // plane != 0 or != 1 shouldn't happen here + break; + case D3DFMT_P010: + if (plane == 0) { + bytesPerPixel = 2; + } else if (plane == 1) { + bytesPerPixel = 4; + } // plane != 0 or != 1 shouldn't happen here + break; + case D3DFMT_YV_12: + bytesPerPixel = 1; + break; + + default: + bytesPerPixel = 0; + _ASSERT(FALSE); + break; + } + return bytesPerPixel; +} + +void setObjDesc(amd::D3D9ObjDesc_t& objDesc, D3DSURFACE_DESC& resDesc, cl_uint plane) { + objDesc.d3dPool_ = resDesc.Pool; + objDesc.resType_ = resDesc.Type; + objDesc.usage_ = resDesc.Usage; + objDesc.d3dFormat_ = resDesc.Format; + switch (resDesc.Format) { + case D3DFMT_NV_12: + case D3DFMT_P010: + objDesc.surfRect_.left = 0; + objDesc.surfRect_.top = 0; + if (plane == 0) { + objDesc.objSize_.Height = resDesc.Height; + objDesc.objSize_.Width = resDesc.Width; + objDesc.surfRect_.right = resDesc.Width; // resDesc.Width/2-1; + objDesc.surfRect_.bottom = 3 * resDesc.Height / 2; + ; // 3*resDesc.Height/2-1; + } else if (plane == 1) { + objDesc.objSize_.Height = resDesc.Height / 2; + objDesc.objSize_.Width = resDesc.Width / 2; + objDesc.surfRect_.right = resDesc.Width; // resDesc.Width/2-1; + objDesc.surfRect_.bottom = 3 * resDesc.Height / 2; + ; // 3*resDesc.Height/2-1; + } // plane != 0 or != 1 shouldn't happen here + break; + case D3DFMT_YV_12: + objDesc.surfRect_.left = 0; + if (plane == 0) { + objDesc.objSize_.Height = resDesc.Height; + objDesc.objSize_.Width = resDesc.Width; + objDesc.surfRect_.top = 0; + objDesc.surfRect_.right = resDesc.Width - 1; + objDesc.surfRect_.bottom = resDesc.Height - 1; + } else if (plane == 1) { + objDesc.objSize_.Height = resDesc.Height / 2; + objDesc.objSize_.Width = resDesc.Width / 2; + objDesc.surfRect_.top = resDesc.Height; + objDesc.surfRect_.right = resDesc.Width / 2 - 1; + objDesc.surfRect_.bottom = 3 * resDesc.Height / 2 - 1; + } else if (plane == 2) { + objDesc.objSize_.Height = resDesc.Height / 2; + objDesc.objSize_.Width = resDesc.Width / 2; + objDesc.surfRect_.top = 3 * resDesc.Height / 2; + objDesc.surfRect_.right = resDesc.Width / 2 - 1; + objDesc.surfRect_.bottom = 2 * resDesc.Height - 1; + } // plane > 0 or > 2 shouldn't happen here + break; + default: + objDesc.objSize_.Height = resDesc.Height; + objDesc.objSize_.Width = resDesc.Width; + objDesc.surfRect_.left = 0; + objDesc.surfRect_.top = 0; + objDesc.surfRect_.right = resDesc.Width - 1; + objDesc.surfRect_.bottom = resDesc.Height - 1; + if (resDesc.Format == D3DFMT_YUY2) { + objDesc.objSize_.Width >>= 1; + } + break; + } +} + +int D3D9Object::initD3D9Object(const Context& amdContext, + cl_dx9_media_adapter_type_khr adapter_type, + cl_dx9_surface_info_khr* cl_surf_info, cl_uint plane, + D3D9Object& obj) { + ScopedLock sl(resLock_); + + IDirect3DDevice9Ex* pDev9Ex = NULL; + cl_int errcode = CL_SUCCESS; + + // Check if this ressource has already been used for interop + IDirect3DSurface9* pD3D9res = cl_surf_info->resource; + HANDLE shared_handle = cl_surf_info->shared_handle; + + if ((adapter_type == CL_ADAPTER_D3D9_KHR) || (adapter_type == CL_ADAPTER_DXVA_KHR)) { + return CL_INVALID_DX9_MEDIA_ADAPTER_KHR; // Not supported yet + } + + for (const auto& it : resources_) { + if (it.first.surfInfo.resource == cl_surf_info->resource && it.first.surfPlane == plane) { + return CL_INVALID_D3D9_RESOURCE_KHR; + } + } + + HRESULT hr; + D3DQUERYTYPE desc = D3DQUERYTYPE_EVENT; + + D3DSURFACE_DESC resDesc; + if (D3D_OK != pD3D9res->GetDesc(&resDesc)) { + return CL_INVALID_D3D9_RESOURCE_KHR; + } + + hr = pD3D9res->GetContainer(IID_IDirect3DDevice9Ex, (void**)&pDev9Ex); + if (hr == D3D_OK) { + pDev9Ex->CreateQuery(desc, &(obj.pQuery_)); + } else { + return CL_INVALID_D3D9_RESOURCE_KHR; // d3d9ex should be supported + } + + obj.handleShared_ = shared_handle; + obj.surfPlane_ = plane; + obj.surfInfo_ = *cl_surf_info; + obj.adapterType_ = adapter_type; + + // Init defaults + setObjDesc(obj.objDescOrig_, resDesc, plane); + obj.objDesc_ = obj.objDescOrig_; + + // shared handle cases if the shared_handle is NULL + // first check if the format is NV12 or YV12, which we need special handling + if (NULL == shared_handle) { + bool found = false; + for (const auto& it : resources_) { + if (it.first.surfInfo.resource == cl_surf_info->resource && + it.first.surfPlane != plane) { + obj.handleShared_ = it.second.surfInfo.shared_handle; + obj.pD3D9Res_ = it.second.surfInfo.resource; + obj.pD3D9Res_->AddRef(); + obj.objDesc_ = obj.objDescOrig_; + found = true; + break; + } + } + if (!found) { + obj.handleShared_ = 0; + hr = pDev9Ex->CreateOffscreenPlainSurface(resDesc.Width, resDesc.Height, resDesc.Format, + resDesc.Pool, &obj.pD3D9Res_, &obj.handleShared_); + + if (D3D_OK != hr) { + errcode = CL_INVALID_D3D9_RESOURCE_KHR; + } + } + + // put the original info into the obj + obj.pD3D9ResOrig_ = pD3D9res; + obj.pD3D9ResOrig_->AddRef(); // addRef in case lost the resource + } else { + // Share the original resource + obj.pD3D9ResOrig_ = NULL; + obj.pD3D9Res_ = pD3D9res; + obj.pD3D9Res_->AddRef(); + } + + // Release the Ex interface + if (pDev9Ex) pDev9Ex->Release(); + + // Check for CL format compatibilty + if (obj.objDesc_.resType_ == D3DRTYPE_SURFACE) { + cl_image_format clFmt = obj.getCLFormatFromD3D9(obj.objDesc_.d3dFormat_, plane); + amd::Image::Format imageFormat(clFmt); + if (!imageFormat.isSupported(amdContext)) { + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + } + } + + TD3D9RESINFO d3d9ObjOri = {*cl_surf_info, plane}; + TD3D9RESINFO d3d9ObjShared = {{obj.pD3D9Res_, obj.handleShared_}, plane}; + + if (errcode == CL_SUCCESS) { + resources_.push_back({d3d9ObjOri, d3d9ObjShared}); + } + + return errcode; +} +cl_uint D3D9Object::getMiscFlag() { + switch (objDescOrig_.d3dFormat_) { + case D3DFMT_NV_12: + case D3DFMT_P010: + return 1; + break; + case D3DFMT_YV_12: + return 2; + break; + case D3DFMT_YUY2: + return 3; + break; + default: + return 0; + break; + } +} + +cl_image_format D3D9Object::getCLFormatFromD3D9() { + return getCLFormatFromD3D9(objDesc_.d3dFormat_, surfPlane_); +} + +cl_image_format D3D9Object::getCLFormatFromD3D9(D3DFORMAT d3d9Fmt, cl_uint plane) { + cl_image_format fmt; + + fmt.image_channel_order = 0; // CL_RGBA; + fmt.image_channel_data_type = 0; // CL_UNSIGNED_INT8; + + switch (d3d9Fmt) { + case D3DFMT_R32F: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case D3DFMT_R16F: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case D3DFMT_L16: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case D3DFMT_A8: + fmt.image_channel_order = CL_A; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case D3DFMT_L8: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case D3DFMT_G32R32F: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case D3DFMT_G16R16F: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case D3DFMT_G16R16: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case D3DFMT_A8L8: + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case D3DFMT_A32B32G32R32F: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_FLOAT; + break; + + case D3DFMT_A16B16G16R16F: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; + + case D3DFMT_A16B16G16R16: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; + + case D3DFMT_A8B8G8R8: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case D3DFMT_X8B8G8R8: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case D3DFMT_A8R8G8B8: + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + + case D3DFMT_X8R8G8B8: + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + case D3DFMT_NV_12: + fmt.image_channel_data_type = CL_UNORM_INT8; + if (plane == 0) { + fmt.image_channel_order = CL_R; + } else if (plane == 1) { + fmt.image_channel_order = CL_RG; + } + break; + case D3DFMT_P010: + fmt.image_channel_data_type = CL_UNORM_INT16; + if (plane == 0) { + fmt.image_channel_order = CL_R; + } else if (plane == 1) { + fmt.image_channel_order = CL_RG; + } + break; + case D3DFMT_YV_12: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; + case D3DFMT_YUY2: + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + case D3DFMT_UNKNOWN: + case D3DFMT_R8G8B8: + case D3DFMT_R5G6B5: + case D3DFMT_X1R5G5B5: + case D3DFMT_A1R5G5B5: + case D3DFMT_A4R4G4B4: + case D3DFMT_R3G3B2: + case D3DFMT_A8R3G3B2: + case D3DFMT_X4R4G4B4: + case D3DFMT_A2B10G10R10: + case D3DFMT_A2R10G10B10: + case D3DFMT_A8P8: + case D3DFMT_P8: + case D3DFMT_A4L4: + case D3DFMT_V8U8: + case D3DFMT_L6V5U5: + case D3DFMT_X8L8V8U8: + case D3DFMT_Q8W8V8U8: + case D3DFMT_V16U16: + case D3DFMT_A2W10V10U10: + case D3DFMT_UYVY: + case D3DFMT_R8G8_B8G8: + case D3DFMT_G8R8_G8B8: + case D3DFMT_DXT1: + case D3DFMT_DXT2: + case D3DFMT_DXT3: + case D3DFMT_DXT4: + case D3DFMT_DXT5: + case D3DFMT_D16_LOCKABLE: + case D3DFMT_D32: + case D3DFMT_D15S1: + case D3DFMT_D24S8: + case D3DFMT_D24X8: + case D3DFMT_D24X4S4: + case D3DFMT_D16: + case D3DFMT_D32F_LOCKABLE: + case D3DFMT_D24FS8: + //#if !defined(D3D_DISABLE_9EX) + case D3DFMT_D32_LOCKABLE: + case D3DFMT_S8_LOCKABLE: + //#endif // !D3D_DISABLE_9EX + case D3DFMT_VERTEXDATA: + case D3DFMT_INDEX16: + case D3DFMT_INDEX32: + case D3DFMT_Q16W16V16U16: + case D3DFMT_MULTI2_ARGB8: + case D3DFMT_CxV8U8: + //#if !defined(D3D_DISABLE_9EX) + case D3DFMT_A1: + case D3DFMT_A2B10G10R10_XR_BIAS: + case D3DFMT_BINARYBUFFER: + _ASSERT(FALSE); // NOT SURPPORTED + break; + //#endif // !D3D_DISABLE_9EX + default: + _ASSERT(FALSE); + break; + } + + return fmt; +} + +bool D3D9Object::copyOrigToShared() { + // Don't copy if there is no orig + if (NULL == getD3D9ResOrig()) return true; + + IDirect3DDevice9Ex* d3dDev; + HRESULT hr; + ScopedLock sl(getResLock()); + + IDirect3DSurface9* srcSurf = getD3D9ResOrig(); + IDirect3DSurface9* dstSurf = getD3D9Resource(); + + hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev); + if (hr != D3D_OK || !d3dDev) { + LogError("\nCannot get D3D9 device from D3D9 surface\n"); + return false; + } + + hr = d3dDev->StretchRect(srcSurf, NULL, dstSurf, NULL, D3DTEXF_NONE); + if (hr != D3D_OK) { + LogError("\ncopy original surface to shared surface failed\n"); + return false; + } + // Flush D3D queues and make sure D3D stuff is finished + pQuery_->Issue(D3DISSUE_END); + BOOL data; + while ((D3D_OK != pQuery_->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH)) && (data != TRUE)) { + } + + if (d3dDev) d3dDev->Release(); + return true; +} + +bool D3D9Object::copySharedToOrig() { + // Don't copy if there is no orig + if (NULL == getD3D9ResOrig()) return true; + + IDirect3DDevice9Ex* d3dDev; + HRESULT hr; + ScopedLock sl(getResLock()); + + hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev); + if (hr != D3D_OK || !d3dDev) { + LogError("\nCannot get D3D9 device from D3D9 surface\n"); + return false; + } + + hr = d3dDev->StretchRect(getD3D9Resource(), NULL, getD3D9ResOrig(), NULL, D3DTEXF_NONE); + if (hr != D3D_OK) { + LogError("\ncopy shared surface to original surface failed\n"); + return false; + } + + if (d3dDev) d3dDev->Release(); + return true; +} + +void Image2DD3D9::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image2DD3D9)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +} // namespace amd + +#endif //_WIN32 diff --git a/projects/clr/hipamd/src/cl_gl.cpp b/projects/clr/hipamd/src/cl_gl.cpp new file mode 100644 index 0000000000..241e3f20b1 --- /dev/null +++ b/projects/clr/hipamd/src/cl_gl.cpp @@ -0,0 +1,2472 @@ +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "top.hpp" + +#ifdef _WIN32 +#include +#include +#include +// This is necessary since there are common GL/D3D10 functions +#include "cl_d3d9_amd.hpp" +#include "cl_d3d10_amd.hpp" +#include "cl_d3d11_amd.hpp" +#endif //_WIN32 + +#include +#include + +#include +#include +#include + +#include "cl_common.hpp" +#include "cl_gl_amd.hpp" + +#include "device/device.hpp" + +/* The pixel internal format for DOPP texture defined in gl_enum.h */ +#define GL_BGR8_ATI 0x8083 +#define GL_BGRA8_ATI 0x8088 + +#include +#include + + +/*! \addtogroup API + * @{ + * + * \addtogroup CL_GL_Interops + * + * This section discusses OpenCL functions that allow applications to + * use OpenGL buffer/texture/render-buffer objects as OpenCL memory + * objects. This allows efficient sharing of data between these OpenCL + * and OpenGL. The OpenCL API can be used to execute kernels that read + * and/or write memory objects that are also an OpenGL buffer object + * or a texture. An OpenCL image object can be created from an OpenGL + * texture or renderbuffer object. An OpenCL buffer object can be + * created from an OpenGL buffer object. An OpenCL memory object can + * be created from an OpenGL texture/buffer/render-buffer object or + * the default system provided framebuffer if any only if the OpenCL + * clContext has been created from a GL clContext. OpenGL contexts are + * created using platform specific APIs (EGL, CGL, WGL, GLX are some + * of the platform specific APIs that allow applications to create GL + * contexts). The appropriate platform API (such as EGL, CGL, WGL, + * GLX) will be extended to allow a CL clContext to be created from a + * GL clContext. Creating an OpenCL memory object from the default + * system provided framebuffer will also require an appropriate + * extension to the platform API. Refer to the appropriate platform + * API documentation to understand how to create a CL clContext from a + * GL clContext and creating a CL memory object from the default + * system provided framebuffer. + * + * @{ + * + * \addtogroup clCreateFromGLBuffer + * + * @{ + */ + +/*! \brief Creates an OpenCL buffer object from an OpenGL buffer object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. Only + * CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE can be used. + * + * \param glBufferName is a GL buffer object name. The GL buffer + * object must have a data store created though it does not need to + * be initialized. The size of the data store will be used to + * determine the size of the CL buffer object. + * + * \param pCpuMem is a pointer to the buffer data that may already be + * allocated by the application. The size of the buffer that pCpuMem points + * to must be >= \a size bytes. Passing in a pointer to an already allocated + * buffer on the host and using it as a buffer object allows applications to + * share data efficiently with kernels and the host. + * + * \param errcode_ret will return an appropriate error code. If errcode_ret + * is NULL, no error code is returned. + * + * \return valid non-zero OpenCL buffer object and errcode_ret is set + * to CL_SUCCESS if the buffer object is created successfully. It + * returns a NULL value with one of the following error values + * returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_GL_OBJECT if glBufferName is not a GL buffer object or is a + * GL buffer object but does not have a data store created. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLBuffer, + (cl_context context, cl_mem_flags flags, GLuint bufobj, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + return (amd::clCreateFromGLBufferAMD(*as_amd(context), flags, bufobj, errcode_ret)); +} +RUNTIME_EXIT + +/*! \brief creates the following: + * - an OpenCL 2D image object from an OpenGL 2D texture object + * or a single face of an OpenGL cubemap texture object, + * - an OpenCL 2D image array object from an OpenGL 2D texture array object, + * - an OpenCL 1D image object from an OpenGL 1D texture object, + * - an OpenCL 1D image buffer object from an OpenGL texture buffer object, + * - an OpenCL 1D image array object from an OpenGL 1D texture array object, + * - an OpenCL 3D image object from an OpenGL 3D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param texture_target must be GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, + * GL_TEXTURE_BUFFER, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, + * GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X, + * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB. + * + * \param miplevel is the mipmap level to be used. If \a texture_target + * is GL_TEXTURE_BUFFER, \a miplevel must be 0. + * + * \param texture is a GL 1D, 2D, 3D, 1D array, 2D array, cubemap, + * rectangle or buffer texture object. + * The texture object must be a complete texture as per + * OpenGL rules on texture completeness. The texture format and dimensions + * defined by OpenGL for the specified miplevel of the texture will be + * used to create the OpenCL image memory object. Only GL texture formats + * that map to appropriate image channel order and data type can be used + * to create the the OpenCL image memory object. + * + * \param errcode_ret will return an appropriate error code. If \a + * errcode_ret is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture, + * cubemap or texture rectangle. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.2r07 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture, + (cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, texture_target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLTexture2D + * @{ + */ + +/*! \brief Create an OpenCL 2D image object from an OpenGL 2D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param target must be GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X, + * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB. + * + * \param miplevel is the mipmap level to be used. + * + * \param texture is a GL 2D texture, cubemap or texture rectangle + * object name. The texture object must be a complete texture as per + * OpenGL rules on texture completeness. The \a texture format and + * dimensions specified using appropriate glTexImage2D call for \a + * miplevel will be used to create the 2D image object. Only GL + * texture formats that map to appropriate image channel order and + * data type can be used to create the 2D image object. + * + * \param errcode_ret will return an appropriate error code. If \a + * errcode_ret is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture, + * cubemap or texture rectangle. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture2D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLTexture3D + * @{ + */ + +/*! \brief Create an OpenCL 3D image object from an OpenGL 3D texture object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param target must be GL_TEXTURE_3D. + * + * \param miplevel is the mipmap level to be used. + * + * \param texture is a GL 3D texture object [name]. + * The texture object must be a complete texture as per OpenGL rules on texture + * completeness. The \a texture format and dimensions specified using appropriate + * glTexImage3D call for \a miplevel will be used to create the 3D image object. + * Only GL texture formats that map to appropriate image channel order and + * data type can be used to create the 3D image object. + * + * \param errcode_ret will return an appropriate error code. If \a errcode_ret + * is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set to + * CL_SUCCESS if the image object is created successfully. It returns a NULL value + * with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture. + * - CL_INVALID_GL_OBJECT if \a texture is not an GL 3D texture. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not + * map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture3D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (const auto& it : devices) { + if (it->info().imageSupport_) { + supportPass = true; + } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } + + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clCreateFromGLRenderbuffer + * @{ + */ + +/*! \brief Create an OpenCL 2D image object from an OpenGL renderbuffer object. + * + * \param clContext is a valid OpenCL clContext created from an OpenGL clContext. + * + * \param clFlags is a bit-field that is used to specify usage information. + * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values + * can be used. + * + * \param renderbuffer is a GL renderbuffer object name. The renderbuffer + * storage must be specified before the image object can be created. Only + * GL renderbuffer formats that map to appropriate image channel order and + * data type can be used to create the 2D image object. + * + * \param errcode_ret will return an appropriate error code. If \a errcode_ret + * is NULL, no error code is returned. + * + * \return A valid non-zero OpenCL image object and \a errcode_ret is set + * to CL_SUCCESS if the image object is created successfully. It returns a + * NULL value with one of the following error values returned in \a errcode_ret: + * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not + * created from a GL clContext. + * - CL_INVALID_VALUE if values specified in \a clFlags are not valid. + * - CL_INVALID_GL_OBJECT if \a renderbuffer is not an GL renderbuffer object. + * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL renderbuffer format + * does not map to an appropriate OpenCL image format. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * by the runtime. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLRenderbuffer, (cl_context context, cl_mem_flags flags, + GLuint renderbuffer, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; + + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + return (amd::clCreateFromGLRenderbufferAMD(*as_amd(context), flags, renderbuffer, errcode_ret)); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLObjectInfo + * @{ + */ + +/*! \brief Query GL object type from a CL memory object. + * + * \param memobj [is a valid cl_mem object created from a GL object]. + * + * \param gl_object_type returns the type of GL object attached to memobj + * and can be CL_GL_OBJECT_BUFFER, CL_GL_OBJECT_TEXTURE2D, + * CL_GL_OBJECT_TEXTURE_RECTANGLE, CL_GL_OBJECT_TEXTURE3D, or + * CL_GL_OBJECT_RENDERBUFFER. If \a gl_object_type is NULL, it is ignored. + * + * \param gl_object_name returns the GL object name used to create memobj. + * If \a gl_object_name is NULL, it is ignored. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the call was executed successfully. + * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object. + * - CL_INVALID_GL_OBJECT if there is no GL object associated with \a memobj. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clGetGLObjectInfo, + (cl_mem memobj, cl_gl_object_type* gl_object_type, GLuint* gl_object_name)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } + + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + + amd::GLObject* glObject = interop->asGLObject(); + if (NULL == glObject) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + + cl_int result; + + cl_gl_object_type clGLType = glObject->getCLGLObjectType(); + result = amd::clGetInfo(clGLType, sizeof(cl_gl_object_type), gl_object_type, NULL); + + GLuint glName = glObject->getGLName(); + result |= amd::clGetInfo(glName, sizeof(GLuint), gl_object_name, NULL); + + return result; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLTextureInfo + * @{ + */ + +/*! \brief Query additional information about the GL texture object associated + * with \a memobj. + * + * \param memobj [is a valid cl_mem object created from a GL object]. + * + * \param param_name specifies what additional information about the GL + * texture object associated with \a memobj to query: + * - CL_GL_TEXTURE_TARGET (GLenum) to query the \a target argument specified + * in clCreateGLTexture2D or clCreateGLTexture3D calls. + * - CL_GL_MIPMAP_LEVEL (GLint) to query the \a miplevel argument specified + * in clCreateGLTexture2D or clCreateGLTexture3D calls. + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object. + * - CL_INVALID_GL_OBJECT if there is no GL texture object (2D or 3D texture) + * associated with \a memobj. + * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes + * specified by \a param_value_size is < size of return type required by + * \a param_name and \a param_value is not NULL, or if \a param_value and + * \a param_value_size_ret are NULL. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clGetGLTextureInfo, + (cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + amd::GLObject* glObject = interop->asGLObject(); + if ((NULL == glObject) || (NULL != glObject->asBufferGL())) { + LogWarning("CL object \"memobj\" is not created from GL texture"); + return CL_INVALID_GL_OBJECT; + } + + switch (param_name) { + case CL_GL_TEXTURE_TARGET: { + GLenum glTarget = glObject->getGLTarget(); + if (glTarget == GL_TEXTURE_CUBE_MAP) { + glTarget = glObject->getCubemapFace(); + } + return amd::clGetInfo(glTarget, param_value_size, param_value, param_value_size_ret); + } + case CL_GL_MIPMAP_LEVEL: { + GLint mipLevel = glObject->getGLMipLevel(); + return amd::clGetInfo(mipLevel, param_value_size, param_value, param_value_size_ret); + } + case CL_GL_NUM_SAMPLES: { + GLsizei numSamples = glObject->getNumSamples(); + return amd::clGetInfo(numSamples, param_value_size, param_value, param_value_size_ret); + } + default: + LogWarning("Unknown param_name in clGetGLTextureInfoAMD"); + break; + } + + return CL_INVALID_VALUE; +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueAcquireExtObjects + * @{ + */ + +/*! \brief Acquire OpenCL memory objects that have been created from external + * objects (OpenGL, D3D). + * + * \param command_queue is a valid command-queue. + * + * \param num_objects is the number of memory objects to be acquired + * in \a mem_objects. + * + * \param mem_objects is a pointer to a list of CL memory objects that refer + * to a GL object (buffer/texture/renderbuffer objects or the framebuffer). + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clEnqueueAcquireGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_GL_OBJECTS); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clEnqueueReleaseGLObjects + * @{ + */ + +/*! \brief Release OpenCL memory objects that have been created from OpenGL + * objects. + * + * \param command_queue is a valid command-queue [which is associated with the + * OpenCL clContext releasing the OpenGL objects]. + * + * \param num_objects is the number of memory objects to be released + * in \a mem_objects. + * + * \param mem_objects is a pointer to a list of CL memory objects that refer + * to a GL object (buffer/texture/renderbuffer objects or the framebuffer). + * + * \param event_wait_list specify [is a pointer to] events that need to + * complete before this particular command can be executed. + * If \a event_wait_list is NULL, then this particular command does not wait + * on any event to complete. If \a event_wait_list is NULL, + * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL, + * the list of events pointed to by \a event_wait_list must be valid and + * \a num_events_in_wait_list must be greater than 0. The events specified in + * \a event_wait_list act as synchronization points. + * + * \param num_events_in_wait_list specify the number of events in + * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be + * greater than 0 if \a event_wait_list is not NULL. + * + * \param event returns an event object that identifies this particular + * command and can be used to query or queue a wait for this particular + * command to complete. \a event can be NULL in which case it will not be + * possible for the application to query the status of this command or queue a + * wait for this command to complete. + * + * \return One of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * + * \version 1.0r29 + */ +RUNTIME_ENTRY(cl_int, clEnqueueReleaseGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_GL_OBJECTS); +} +RUNTIME_EXIT + +/*! @} +* \addtogroup clCreateEventFromGLsyncKHR +* @{ +*/ + +/*! \brief Creates an event object linked to an OpenGL sync object. +* Completion of such an event object is equivalent to waiting for completion +* of the fence command associated with the linked GL sync object. +* +* \param context is valid OpenCL context created from an OpenGL context +* or share group, using the cl_khr_gl_sharing extension. +* +* \param sync is the 'name' of a sync object in the GL share group associated +* with context. +* +* \param errcode_ret Returns an appropriate error code as described below. +* If errcode_ret is NULL, no error code is returned. +* +* \return a valid OpenCL event object and errcode_ret is set to CL_SUCCESS +* if the event object is created successfully.Otherwise, it returns a NULL +* value with one of the following error values returned in errcode_ret: +* - CL_INVALID_CONTEXT if context is not a valid context or was not created +* from a GL context. +* - CL_INVALID_GL_OBJECT if sync is not the name of a sync object in the +* GL share group associated with context. +* +* \version 1.1 +*/ + +RUNTIME_ENTRY_RET(cl_event, clCreateEventFromGLsyncKHR, + (cl_context context, cl_GLsync clGLsync, cl_int* errcode_ret)) { + // create event of fence sync type + amd::ClGlEvent* clglEvent = new amd::ClGlEvent(*as_amd(context)); + clglEvent->context().glenv()->glFlush_(); + // initially set the status of fence as queued + clglEvent->setStatus(CL_SUBMITTED); + // store GLsync id of the fence in event in order to associate them together + clglEvent->setData(clGLsync); + amd::Event* evt = dynamic_cast(clglEvent); + evt->retain(); + return as_cl(evt); +} +RUNTIME_EXIT + +/*! @} + * \addtogroup clGetGLContextInfoKHR + * @{ + */ + +/*! \brief This f-n is defined in CL extension cl_khr_gl_sharing and serves + * the purpose of quering current device and all devices that support + * CL-GL interoperability. + * + * \param properties points to an , which is a array of + * ordered pairs terminated with zero. If an + * attribute is not specified in , then its default value + * (listed in table 4.attr) is used (it is said to be specified + * implicitly). If is NULL or empty (points to a list + * whose first value is zero), all attributes take on their default + * values. + * + * \param param_name may accept one of the following enumerated values: + * - CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 + * - CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \param param_value is a pointer to memory where the appropriate result + * being queried is returned. If \a param_value is NULL, it is ignored. + * + * \param param_value_size is used to specify the size in bytes of memory + * pointed to by \a param_value. This size must be >= size of return type as + * described for \a param_name argumnet (GLenum or GLint). + * \a param_value_size_ret returns the actual size in bytes of data copied to + * \a param_value. If \a param_value_size_ret is NULL, it is ignored + * + * \return one of the following values is returned: + * - CL_SUCCESS if the function is executed successfully. + * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the + * function does nothing. + * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a + * NULL value or if \a num_objects > 0 and \a mem_objects is NULL. + * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid + * OpenCL memory objects. + * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue. + * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not + * created from an OpenGL clContext. + * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been + * created from a GL object(s). + * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and + * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and + * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list + * are not valid events. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources + * required by the OpenCL implementation on the host. + * - CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if + * + * \version 1.0r47 + */ +RUNTIME_ENTRY(cl_int, clGetGLContextInfoKHR, + (const cl_context_properties* properties, cl_gl_context_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + cl_int errcode=0; + cl_device_id* gpu_devices; + cl_uint num_gpu_devices = 0; + amd::Context::Info info; + static const bool VALIDATE_ONLY = true; + + errcode = amd::Context::checkProperties(properties, &info); + if (CL_SUCCESS != errcode) { + return errcode; + } + + if (!(info.flags_ & amd::Context::GLDeviceKhr)) { + // No GL context is specified + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } + + // Get devices + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } + + if (!num_gpu_devices) { + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } + + switch (param_name) { + case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR: + // Return the CL device currently associated with the specified OpenGL context. + if (num_gpu_devices) { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); + + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; + } + + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + return amd::clGetInfo(device, param_value_size, param_value, param_value_size_ret); + } + } + + *not_null(param_value_size_ret) = 0; + } + break; + + case CL_DEVICES_FOR_GL_CONTEXT_KHR: { + // List of all CL devices that can be associated with the specified OpenGL context. + cl_uint total_devices = num_gpu_devices; + size_t size = total_devices * sizeof(cl_device_id); + + cl_device_id* devices = (cl_device_id*)alloca(size); + + //errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, total_devices, devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; + } + + std::vector compatible_devices; + + for (cl_uint i = 0; i < total_devices; ++i) { + cl_device_id device = devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + + size_t deviceCount = compatible_devices.size(); + size_t deviceCountSize = deviceCount * sizeof(cl_device_id); + + if (param_value != NULL && param_value_size < deviceCountSize) { + return CL_INVALID_VALUE; + } + + *not_null(param_value_size_ret) = deviceCountSize; + + if (param_value != NULL) { + cl_device_id* deviceList = (cl_device_id*)param_value; + for (const auto& it : compatible_devices) { + *deviceList++ = as_cl(it); + } + } + + return CL_SUCCESS; + } break; + + default: + LogWarning("\"param_name\" is not valid"); + return CL_INVALID_VALUE; + } + return CL_SUCCESS; +} +RUNTIME_EXIT + +// +// +// namespace amd +// +// +namespace amd { + +typedef struct { + GLenum glBinding; + GLenum glTarget; +} TargetBindings_t; + +/*! @} + * \addtogroup CL-GL interop helper functions + * @{ + */ + +//! Function clearGLErrors() to clear all GL error bits, if any +void clearGLErrors(const Context& amdContext) { + GLenum glErr, glLastErr = GL_NO_ERROR; + while (1) { + glErr = amdContext.glenv()->glGetError_(); + if (glErr == GL_NO_ERROR || glErr == glLastErr) { + break; + } + glLastErr = glErr; + LogWarning("GL error"); + } +} + +GLenum checkForGLError(const Context& amdContext) { + GLenum glRetErr = GL_NO_ERROR; + GLenum glErr; + while (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + glRetErr = glErr; // Just return the last GL error + LogWarning("Check GL error"); + } + return glRetErr; +} + +//! Function getCLFormatFromGL returns "true" if GL format +//! is compatible with CL format, "false" otherwise. +bool getCLFormatFromGL(const Context& amdContext, GLint gliInternalFormat, + cl_image_format* pclImageFormat, int* piBytesPerPixel, cl_mem_flags flags) { + bool bRetVal = false; + + /* + Available values for "image_channel_order" + ========================================== + CL_R + CL_A + CL_INTENSITY + CL_LUMINANCE + CL_RG + CL_RA + CL_RGB + CL_RGBA + CL_ARGB + CL_BGRA + + Available values for "image_channel_data_type" + ============================================== + CL_SNORM_INT8 + CL_SNORM_INT16 + CL_UNORM_INT8 + CL_UNORM_INT16 + CL_UNORM_SHORT_565 + CL_UNORM_SHORT_555 + CL_UNORM_INT_101010 + CL_SIGNED_INT8 + CL_SIGNED_INT16 + CL_SIGNED_INT32 + CL_UNSIGNED_INT8 + CL_UNSIGNED_INT16 + CL_UNSIGNED_INT32 + CL_HALF_FLOAT + CL_FLOAT + */ + + switch (gliInternalFormat) { + case GL_RGB10_EXT: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGB10_A2: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_BGR8_ATI: + case GL_BGRA8_ATI: + pclImageFormat->image_channel_order = CL_BGRA; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_ALPHA8: + pclImageFormat->image_channel_order = CL_A; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_R8: + case GL_R8UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_R8I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; + + case GL_RG8: + case GL_RG8UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RG8I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RGB8: + case GL_RGB8UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; + + case GL_RGB8I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; + + case GL_RGBA: + case GL_RGBA8: + case GL_RGBA8UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA8UI) ? CL_UNSIGNED_INT8 : CL_UNORM_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGBA8I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R16: + case GL_R16UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + bRetVal = true; + *piBytesPerPixel = 2; + break; + + case GL_R16I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_R16F: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 2; + bRetVal = true; + break; + + case GL_RG16: + case GL_RG16UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG16I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG16F: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RGB16: + case GL_RGB16UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGB16I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGB16F: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 6; + bRetVal = true; + break; + + case GL_RGBA16: + case GL_RGBA16UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGBA16I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGBA16F: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_R32I: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R32UI: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_R32F: + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + + case GL_RG32I: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RG32UI: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RG32F: + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; + + case GL_RGB32I: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGB32UI: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGB32F: + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 12; + bRetVal = true; + break; + + case GL_RGBA32I: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; + + case GL_RGBA32UI: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; + + case GL_RGBA32F: + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 16; + bRetVal = true; + break; + case GL_DEPTH_COMPONENT32F: + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; + case GL_DEPTH_COMPONENT16: + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_UNORM_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; + case GL_DEPTH24_STENCIL8: + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_UNORM_INT24; + *piBytesPerPixel = 4; + bRetVal = true; + break; + case GL_DEPTH32F_STENCIL8: + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 5; + bRetVal = true; + break; + default: + LogWarning("unsupported GL internal format"); + break; + } + amd::Image::Format imageFormat(*pclImageFormat); + if (bRetVal && !imageFormat.isSupported(amdContext, 0, flags)) { + bRetVal = false; + } + return bRetVal; +} + +void BufferGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +static GLenum clChannelDataTypeToGlType(cl_channel_type channel_type) { + // Pick + // GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT, + // GL_UNSIGNED_INT, GL_FLOAT, GL_2_BYTES, GL_3_BYTES, GL_4_BYTES + // or GL_DOUBLE + switch (channel_type) { + case CL_SNORM_INT8: + return GL_BYTE; + case CL_SNORM_INT16: + return GL_SHORT; + case CL_UNORM_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNORM_INT16: + return GL_UNSIGNED_SHORT; + case CL_SIGNED_INT8: + return GL_BYTE; + case CL_SIGNED_INT16: + return GL_SHORT; + case CL_SIGNED_INT32: + return GL_INT; + case CL_UNSIGNED_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNSIGNED_INT16: + return GL_UNSIGNED_SHORT; + case CL_UNSIGNED_INT32: + return GL_UNSIGNED_INT; + case CL_FLOAT: + return GL_FLOAT; + case CL_UNORM_INT_101010: + return GL_UNSIGNED_INT_10_10_10_2; + case CL_HALF_FLOAT: + case CL_UNORM_SHORT_565: + case CL_UNORM_SHORT_555: + default: + guarantee(false, "Unexpected CL type."); + return 0; + } +} + +static GLenum glInternalFormatToGlFormat(GLenum internalFormat) { + switch (internalFormat) { + // Base internal formats + case GL_RGBA: + case GL_BGRA: + return internalFormat; + // Sized internal formats + case GL_RGBA8: + case GL_RGBA16: + case GL_RGBA16F: + case GL_RGBA32F: + return GL_RGBA; + case GL_RGBA8I: + case GL_RGBA8UI: + case GL_RGBA16I: + case GL_RGBA16UI: + case GL_RGBA32I: + case GL_RGBA32UI: + return GL_RGBA_INTEGER; + + default: + guarantee(false, "Unexpected GL internal format."); + return 0; + } +} + +void ImageGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(ImageGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); +} + +//******************************************************************* +// +// Internal implementation of CL API functions +// +//******************************************************************* + +// +// clCreateFromGLBufferAMD +// +cl_mem clCreateFromGLBufferAMD(Context& amdContext, cl_mem_flags flags, GLuint bufobj, + cl_int* errcode_ret) { + BufferGL* pBufferGL = NULL; + GLenum glErr; + GLenum glTarget = GL_ARRAY_BUFFER; + GLint gliSize = 0; + GLint gliMapped = 0; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL buffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsBuffer_(bufobj)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"bufobj\" is not a GL buffer object"); + return (cl_mem)0; + } + + // It seems that CL spec is not concerned with GL_BUFFER_USAGE, so skip it + + // Check if size is available - data store is created + + amdContext.glenv()->glBindBuffer_(glTarget, bufobj); + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &gliSize); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot get the GL buffer size"); + return (cl_mem)0; + } + if (gliSize == 0) { + //@todo - check why sometime the size is zero + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("the GL buffer's data store is not created"); + return (cl_mem)0; + } + + // Mapping will be done at acquire time (sync point) + + } // Release scoped lock + + // Now create BufferGL object + pBufferGL = new (amdContext) BufferGL(amdContext, flags, gliSize, 0, bufobj); + + if (!pBufferGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + if (!pBufferGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + + // Create interop object + if (pBufferGL->getInteropObj() == NULL) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + // Fixme: If more than one device is present in the context, we choose the first device. + // We should come up with a more elegant solution to handle this. + assert(amdContext.devices().size() == 1); + + const auto it = amdContext.devices().cbegin(); + const amd::Device& dev = *(*it); + + device::Memory* mem = pBufferGL->getDeviceMemory(dev); + if (NULL == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", pBufferGL->getSize()); + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + return (cl_mem)0; + } + mem->processGLResource(device::Memory::GLDecompressResource); + + return as_cl(pBufferGL); +} + +cl_mem clCreateFromGLTextureAMD(Context& amdContext, cl_mem_flags clFlags, GLenum target, + GLint miplevel, GLuint texture, int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; + GLenum glTarget = 0; + GLenum glInternalFormat; + cl_image_format clImageFormat; + uint dim = 1; + cl_mem_object_type clType; + cl_gl_object_type clGLType; + GLsizei numSamples = 1; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return static_cast(0); + } + + GLint gliTexWidth = 1; + GLint gliTexHeight = 1; + GLint gliTexDepth = 1; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL texture object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsTexture_(texture)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"texture\" is not a GL texture object"); + return static_cast(0); + } + + bool image = true; + + // Check target value validity + switch (target) { + case GL_TEXTURE_BUFFER: + glTarget = GL_TEXTURE_BUFFER; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D_BUFFER; + clGLType = CL_GL_OBJECT_TEXTURE_BUFFER; + image = false; + break; + + case GL_TEXTURE_1D: + glTarget = GL_TEXTURE_1D; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D; + clGLType = CL_GL_OBJECT_TEXTURE1D; + break; + + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + glTarget = GL_TEXTURE_CUBE_MAP; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_1D_ARRAY: + glTarget = GL_TEXTURE_1D_ARRAY; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE1D_ARRAY; + break; + + case GL_TEXTURE_2D: + glTarget = GL_TEXTURE_2D; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_MULTISAMPLE: + glTarget = GL_TEXTURE_2D_MULTISAMPLE; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_RECTANGLE_ARB: + glTarget = GL_TEXTURE_RECTANGLE_ARB; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_ARRAY: + glTarget = GL_TEXTURE_2D_ARRAY; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE2D_ARRAY; + break; + + case GL_TEXTURE_3D: + glTarget = GL_TEXTURE_3D; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE3D; + clGLType = CL_GL_OBJECT_TEXTURE3D; + break; + + default: + // wrong value + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + break; + } + + amdContext.glenv()->glBindTexture_(glTarget, texture); + + // Check if size is available - data store is created + if (image) { + // Check mipmap level for "texture" name + GLint gliTexBaseLevel; + GLint gliTexMaxLevel; + + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_BASE_LEVEL, &gliTexBaseLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get base mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_MAX_LEVEL, &gliTexMaxLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get max mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + if ((gliTexBaseLevel > miplevel) || (miplevel > gliTexMaxLevel)) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("\"miplevel\" is not a valid mipmap level of the GL \"texture\" object"); + return static_cast(0); + } + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_SAMPLES, + (GLint*)&numSamples); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get numbers of samples of GL \"texture\" object"); + return static_cast(0); + } + if (numSamples > 1) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("MSAA \"texture\" object is not suppoerted for the device"); + return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + switch (dim) { + case 3: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_DEPTH, + &gliTexDepth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the depth of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + // Fall trough to process other dimensions... + case 2: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_HEIGHT, + &gliTexHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + // Fall trough to process other dimensions... + case 1: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_WIDTH, + &gliTexWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + break; + default: + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + } + } else { + GLint size; + + // In case target is GL_TEXTURE_BUFFER + GLint backingBuffer; + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_( + glTarget, 0, GL_TEXTURE_BUFFER_DATA_STORE_BINDING, &backingBuffer); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get backing buffer for GL \"texture buffer\" object"); + return static_cast(0); + } + amdContext.glenv()->glBindBuffer_(glTarget, backingBuffer); + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetIntegerv_(GL_TEXTURE_BUFFER_FORMAT_EXT, + reinterpret_cast(&glInternalFormat)); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &size); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + gliTexWidth = size / iBytesPerPixel; + } + size_t imageSize = (clType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? static_cast(gliTexHeight) + : static_cast(gliTexDepth); + + if (!amd::Image::validateDimensions( + amdContext.devices(), clType, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), imageSize)) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"texture\" data store is not created or out of supported dimensions"); + return static_cast(0); + } + + // PBO and mapping will be done at "acquire" time (sync point) + + } // Release scoped lock + + target = (glTarget == GL_TEXTURE_CUBE_MAP) ? target : 0; + + pImageGL = new (amdContext) + ImageGL(amdContext, clType, clFlags, clImageFormat, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), glTarget, + texture, miplevel, glInternalFormat, clGLType, numSamples, target); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL - out of memory?"); + return static_cast(0); + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return static_cast(0); + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); +} + +// +// clCreateFromGLRenderbufferDAMD +// +cl_mem clCreateFromGLRenderbufferAMD(Context& amdContext, cl_mem_flags clFlags, GLuint renderbuffer, + int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; + + GLenum glTarget = GL_RENDERBUFFER; + GLenum glInternalFormat; + cl_image_format clImageFormat; + + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + GLint gliRbWidth; + GLint gliRbHeight; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); + } + + // Verify GL renderbuffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsRenderbufferEXT_(renderbuffer)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"renderbuffer\" is not a GL texture object"); + return (cl_mem)0; + } + + amdContext.glenv()->glBindRenderbuffer_(glTarget, renderbuffer); + + // Get GL RB format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of GL \"renderbuffer\" object"); + return (cl_mem)0; + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"renderbuffer\" format does not map to an appropriate CL image format"); + return (cl_mem)0; + } + + // Check if size is available - data store is created + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_WIDTH, + &gliRbWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbWidth == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_HEIGHT, + &gliRbHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbHeight == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + + // PBO and mapping will be done at "acquire" time (sync point) + + } // Release scoped lock + + pImageGL = + new (amdContext) ImageGL(amdContext, CL_MEM_OBJECT_IMAGE2D, clFlags, clImageFormat, + (size_t)gliRbWidth, (size_t)gliRbHeight, 1, glTarget, renderbuffer, + 0, glInternalFormat, CL_GL_OBJECT_RENDERBUFFER, 0); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL from renderbuffer - out of memory?"); + return (cl_mem)0; + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); +} + +// +// clEnqueueAcquireExtObjectsAMD +// + +static cl_int clSetInteropObjects(cl_uint num_objects, const cl_mem* mem_objects, + std::vector& interopObjects) { + if ((num_objects == 0 && mem_objects != NULL) || (num_objects != 0 && mem_objects == NULL)) { + return CL_INVALID_VALUE; + } + + while (num_objects-- > 0) { + cl_mem obj = *mem_objects++; + if (!is_valid(obj)) { + return CL_INVALID_MEM_OBJECT; + } + + amd::Memory* mem = as_amd(obj); + if (mem->getInteropObj() == NULL) { + return CL_INVALID_GL_OBJECT; + } + + interopObjects.push_back(mem); + } + return CL_SUCCESS; +} + +cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (cmd_type == CL_COMMAND_ACQUIRE_GL_OBJECTS) { + // Verify context init'ed for interop + if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) { + LogWarning("\"amdContext\" is not created from GL context or share list"); + return CL_INVALID_CONTEXT; + } + } + + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } + + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + +#ifdef _WIN32 + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure D3D10 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) { + SyncD3D10Objects(memObjects); + } + //! Make sure D3D11 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR) { + SyncD3D11Objects(memObjects); + } + //! Make sure D3D9 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) { + SyncD3D9Objects(memObjects); + } + } +#endif //_WIN32 + + //! Now create command and enqueue + amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; +} + + +// +// clEnqueueReleaseExtObjectsAMD +// +cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } + + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + //! Now create command and enqueue + amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + +#ifdef _WIN32 + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure CL command queue is flushed and all commands are finished + //! before D3D10 side would access interop resources + if (cmd_type == CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR) { + command->awaitCompletion(); + } + } +#endif //_WIN32 + + *not_null(event) = as_cl(&command->event()); + + if (event == NULL) { + command->release(); + } + + return CL_SUCCESS; +} + +// Placed here as opposed to command.cpp, as glext.h and cl_gl_amd.hpp will have +// to be included because of the GL calls +bool ClGlEvent::waitForFence() { + GLenum ret; + // get fence id associated with fence event + GLsync gs = reinterpret_cast(command().data()); + if (!gs) return false; + +// Try to use DC and GLRC of current thread, if it doesn't exist +// create a new GL context on this thread, which is shared with the original context + +#ifdef _WIN32 + HDC tempDC_ = wglGetCurrentDC(); + HGLRC tempGLRC_ = wglGetCurrentContext(); + // Set DC and GLRC + if (tempDC_ && tempGLRC_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + tempDC_ = context().glenv()->getDC(); + tempGLRC_ = context().glenv()->getIntGLRC(); + if (!context().glenv()->init(reinterpret_cast(tempDC_), + reinterpret_cast(tempGLRC_))) + return false; + + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } +#else // Lnx + Display* tempDpy_ = context().glenv()->glXGetCurrentDisplay_(); + GLXDrawable tempDrawable_ = context().glenv()->glXGetCurrentDrawable_(); + GLXContext tempCtx_ = context().glenv()->glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ && tempCtx_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + if (!context().glenv()->init(reinterpret_cast(context().glenv()->getIntDpy()), + reinterpret_cast(context().glenv()->getIntCtx()))) + return false; + + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } +#endif + // If we reach this point, fence should have completed + setStatus(CL_COMPLETE); + return true; +} + +// +// GLFunctions implementation +// + +#ifdef _WIN32 +#define CONVERT_CHAR_GLUBYTE +#else //!_WIN32 +#define CONVERT_CHAR_GLUBYTE (GLubyte*) +#endif //!_WIN32 + +#define GLPREFIX(rtype, fcn, dclargs) \ + if (!(fcn##_ = (PFN_##fcn)GETPROCADDRESS(libHandle_, #fcn))) { \ + if (!(fcn##_ = (PFN_##fcn)GetProcAddress_(reinterpret_cast(#fcn)))) ++missed_; \ + } + +GLFunctions::SetIntEnv::SetIntEnv(GLFunctions* env) : env_(env) { + env_->getLock().lock(); + + // Set environment (DC and GLRC) + isValid_ = env_->setIntEnv(); +} + +GLFunctions::SetIntEnv::~SetIntEnv() { + // Restore environment (CL DC and CL GLRC) + env_->restoreEnv(); + + env_->getLock().unlock(); +} + +GLFunctions::GLFunctions(HMODULE h, bool isEGL) + : libHandle_(h), + missed_(0), + eglDisplay_(EGL_NO_DISPLAY), + eglOriginalContext_(EGL_NO_CONTEXT), + eglInternalContext_(EGL_NO_CONTEXT), + eglTempContext_(EGL_NO_CONTEXT), + isEGL_(isEGL), +#ifdef _WIN32 + hOrigGLRC_(0), + hDC_(0), + hIntGLRC_(0) +#else //!_WIN32 + Dpy_(0), + Drawable_(0), + origCtx_(0), + intDpy_(0), + intDrawable_(0), + intCtx_(0), + XOpenDisplay_(NULL), + XCloseDisplay_(NULL), + glXGetCurrentDrawable_(NULL), + glXGetCurrentDisplay_(NULL), + glXGetCurrentContext_(NULL), + glXChooseVisual_(NULL), + glXCreateContext_(NULL), + glXDestroyContext_(NULL), + glXMakeCurrent_(NULL) +#endif //!_WIN32 +{ +#define VERIFY_POINTER(p) \ + if (NULL == p) { \ + missed_++; \ + } + + if (isEGL_) { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, "eglGetProcAddress"); + } else { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, API_GETPROCADDR); + } +#ifndef _WIN32 + // Initialize pointers to X11/GLX functions + // We can not link with these functions on compile time since we need to support + // console mode. In console mode X server and X server components may be absent. + // Hence linking with X11 or libGL will fail module image loading in console mode.-tzachi cohen + + if (!isEGL_) { + glXGetCurrentDrawable_ = (PFNglXGetCurrentDrawable)GETPROCADDRESS(h, "glXGetCurrentDrawable"); + VERIFY_POINTER(glXGetCurrentDrawable_) + glXGetCurrentDisplay_ = (PFNglXGetCurrentDisplay)GETPROCADDRESS(h, "glXGetCurrentDisplay"); + VERIFY_POINTER(glXGetCurrentDisplay_) + glXGetCurrentContext_ = (PFNglXGetCurrentContext)GETPROCADDRESS(h, "glXGetCurrentContext"); + VERIFY_POINTER(glXGetCurrentContext_) + glXChooseVisual_ = (PFNglXChooseVisual)GETPROCADDRESS(h, "glXChooseVisual"); + VERIFY_POINTER(glXChooseVisual_) + glXCreateContext_ = (PFNglXCreateContext)GETPROCADDRESS(h, "glXCreateContext"); + VERIFY_POINTER(glXCreateContext_) + glXDestroyContext_ = (PFNglXDestroyContext)GETPROCADDRESS(h, "glXDestroyContext"); + VERIFY_POINTER(glXDestroyContext_) + glXMakeCurrent_ = (PFNglXMakeCurrent)GETPROCADDRESS(h, "glXMakeCurrent"); + VERIFY_POINTER(glXMakeCurrent_) + + HMODULE hXModule = (HMODULE)Os::loadLibrary("libX11.so.6"); + if (NULL != hXModule) { + XOpenDisplay_ = (PFNXOpenDisplay)GETPROCADDRESS(hXModule, "XOpenDisplay"); + VERIFY_POINTER(XOpenDisplay_) + XCloseDisplay_ = (PFNXCloseDisplay)GETPROCADDRESS(hXModule, "XCloseDisplay"); + VERIFY_POINTER(XCloseDisplay_) + } else { + missed_ += 2; + } + } +// Initialize pointers to GL functions +#include "gl_functions.hpp" +#else + if (!isEGL_) { + wglCreateContext_ = (PFN_wglCreateContext)GETPROCADDRESS(h, "wglCreateContext"); + VERIFY_POINTER(wglCreateContext_) + wglGetCurrentContext_ = (PFN_wglGetCurrentContext)GETPROCADDRESS(h, "wglGetCurrentContext"); + VERIFY_POINTER(wglGetCurrentContext_) + wglGetCurrentDC_ = (PFN_wglGetCurrentDC)GETPROCADDRESS(h, "wglGetCurrentDC"); + VERIFY_POINTER(wglGetCurrentDC_) + wglDeleteContext_ = (PFN_wglDeleteContext)GETPROCADDRESS(h, "wglDeleteContext"); + VERIFY_POINTER(wglDeleteContext_) + wglMakeCurrent_ = (PFN_wglMakeCurrent)GETPROCADDRESS(h, "wglMakeCurrent"); + VERIFY_POINTER(wglMakeCurrent_) + wglShareLists_ = (PFN_wglShareLists)GETPROCADDRESS(h, "wglShareLists"); + VERIFY_POINTER(wglShareLists_) + } +#endif +} + +GLFunctions::~GLFunctions() { +#ifdef _WIN32 + if (hIntGLRC_) { + if (!wglDeleteContext_(hIntGLRC_)) { + DWORD dwErr = GetLastError(); + LogWarning("Cannot delete GLRC"); + } + } +#else //!_WIN32 + if (intDpy_) { + if (intCtx_) { + glXDestroyContext_(intDpy_, intCtx_); + intCtx_ = NULL; + } + XCloseDisplay_(intDpy_); + intDpy_ = NULL; + } +#endif //!_WIN32 +} +// in case of HIP GL interop we want to make sure we have the updated context +bool GLFunctions::update(intptr_t hglrc) { +#ifdef _WIN32 + DWORD err; + if (hOrigGLRC_ == (HGLRC)hglrc) { + return true; + } + hOrigGLRC_ = (HGLRC)hglrc; + if (hIntGLRC_ != nullptr) { + wglDeleteContext_(hIntGLRC_); + } + if (!(hIntGLRC_ = wglCreateContext_(wglGetCurrentDC_()))) { + err = GetLastError(); + return false; + } + if (!wglShareLists_(hOrigGLRC_, hIntGLRC_)) { + err = GetLastError(); + return false; + } +#else //!_WIN32 + Dpy_ = glXGetCurrentDisplay_(); + Drawable_ = glXGetCurrentDrawable_(); + if (origCtx_ == (GLXContext)hglrc) { + return true; + } + + origCtx_ = (GLXContext)hglrc; + if (intCtx_ != nullptr) { + glXDestroyContext_(Dpy_,intCtx_); + } + + int attribList[] = {GLX_RGBA, None}; + XVisualInfo* vis; + int defaultScreen = DefaultScreen(intDpy_); + if (!(vis = glXChooseVisual_(intDpy_, defaultScreen, attribList))) { + return false; + } + if (!(intCtx_ = glXCreateContext_(intDpy_, vis, origCtx_, true))) { + return false; + } +#endif + return true; +} + +bool GLFunctions::init(intptr_t hdc, intptr_t hglrc) { + if (isEGL_) { + eglDisplay_ = (EGLDisplay)hdc; + eglOriginalContext_ = (EGLContext)hglrc; + return true; + } + +#ifdef _WIN32 + DWORD err; + + if (missed_) { + return false; + } + + if (!hdc) { + hDC_ = wglGetCurrentDC_(); + } else { + hDC_ = (HDC)hdc; + } + hOrigGLRC_ = (HGLRC)hglrc; + if (!(hIntGLRC_ = wglCreateContext_(hDC_))) { + err = GetLastError(); + return false; + } + if (!wglShareLists_(hOrigGLRC_, hIntGLRC_)) { + err = GetLastError(); + return false; + } + + bool makeCurrentNull = false; + + if (wglGetCurrentContext_() == NULL) { + wglMakeCurrent_(hDC_, hIntGLRC_); + + makeCurrentNull = true; + } + +// Initialize pointers to GL functions +#include "gl_functions.hpp" + + if (makeCurrentNull) { + wglMakeCurrent_(NULL, NULL); + } + + if (missed_ == 0) { + return true; + } +#else //!_WIN32 + if (!missed_) { + if (!hdc) { + Dpy_ = glXGetCurrentDisplay_(); + } else { + Dpy_ = (Display*)hdc; + } + Drawable_ = glXGetCurrentDrawable_(); + origCtx_ = (GLXContext)hglrc; + + int attribList[] = {GLX_RGBA, None}; + if (!(intDpy_ = XOpenDisplay_(DisplayString(Dpy_)))) { +#if defined(ATI_ARCH_X86) + asm("int $3"); +#endif + } + intDrawable_ = DefaultRootWindow(intDpy_); + + XVisualInfo* vis; + int defaultScreen = DefaultScreen(intDpy_); + if (!(vis = glXChooseVisual_(intDpy_, defaultScreen, attribList))) { + return false; + } + if (!(intCtx_ = glXCreateContext_(intDpy_, vis, origCtx_, true))) { + return false; + } + return true; + } +#endif //!_WIN32 + return false; +} + +bool GLFunctions::setIntEnv() { + if (isEGL_) { + return true; + } +#ifdef _WIN32 + // Save current DC and GLRC + tempDC_ = wglGetCurrentDC_(); + tempGLRC_ = wglGetCurrentContext_(); + // Set internal DC and GLRC + if (tempDC_ != getDC() || tempGLRC_ != getIntGLRC()) { + if (!wglMakeCurrent_(getDC(), getIntGLRC())) { + DWORD err = GetLastError(); + LogWarning("cannot set internal GL environment"); + return false; + } + } +#else //!_WIN32 + tempDpy_ = glXGetCurrentDisplay_(); + tempDrawable_ = glXGetCurrentDrawable_(); + tempCtx_ = glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ != getDpy() || tempCtx_ != getIntCtx()) { + if (!glXMakeCurrent_(getIntDpy(), getIntDrawable(), getIntCtx())) { + LogWarning("cannot set internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +bool GLFunctions::restoreEnv() { + if (isEGL_) { + // eglMakeCurrent( ); + return true; + } +#ifdef _WIN32 + // Restore original DC and GLRC + if (!wglMakeCurrent_(tempDC_, tempGLRC_)) { + DWORD err = GetLastError(); + LogWarning("cannot restore original GL environment"); + return false; + } +#else //!_WIN32 + // Restore Display and GLXContext + if (tempDpy_) { + if (!glXMakeCurrent_(tempDpy_, tempDrawable_, tempCtx_)) { + LogWarning("cannot restore original GL environment"); + return false; + } + } else { + // Just release internal context + if (!glXMakeCurrent_(getIntDpy(), None, NULL)) { + LogWarning("cannot reelase internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +} // namespace amd diff --git a/projects/clr/hipamd/src/cl_gl_amd.hpp b/projects/clr/hipamd/src/cl_gl_amd.hpp new file mode 100644 index 0000000000..625082c8f9 --- /dev/null +++ b/projects/clr/hipamd/src/cl_gl_amd.hpp @@ -0,0 +1,398 @@ +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef CL_GL_AMD_HPP_ +#define CL_GL_AMD_HPP_ + +#ifdef _WIN32 +#include +#else //!_WIN32 +#include +#endif //!_WIN32 + +#include +#include +#include "CL/cl_gl.h" +#ifndef _WIN32 +#include +#endif //!_WIN32 + +#include +#include +#include + +#include "platform/context.hpp" +#include "platform/command.hpp" + +namespace amd +{ + +//! Class GLObject keeps all the info about the GL object +//! from which the CL object is created +class GLObject : public InteropObject +{ +protected: + cl_gl_object_type clGLType_; //!< CL GL object type + GLenum glTarget_; + GLuint gluiName_; + GLint gliMipLevel_; + GLenum glInternalFormat_; + GLint gliWidth_; + GLint gliHeight_; + GLint gliDepth_; + GLenum glCubemapFace_; + GLsizei glNumSamples_; + +public: +//! GLObject constructor initializes member variables + GLObject( + GLenum glTarget, + GLuint gluiName, + GLint gliMipLevel, + GLenum glInternalFormat, + GLint gliWidth, + GLint gliHeight, + GLint gliDepth, + cl_gl_object_type clGLType, + GLenum glCubemapFace, + GLsizei glNumSamples + ): // Initialization of member variables + clGLType_(clGLType), + glTarget_(glTarget), + gluiName_(gluiName), + gliMipLevel_(gliMipLevel), + glInternalFormat_(glInternalFormat), + gliWidth_(gliWidth), + gliHeight_(gliHeight), + gliDepth_(gliDepth), + glCubemapFace_(glCubemapFace), + glNumSamples_(glNumSamples) + { + } + + virtual ~GLObject() {} + virtual GLObject* asGLObject() {return this;} + +//! GLObject query functions to get GL info from member variables + GLenum getGLTarget() const {return glTarget_;} + GLuint getGLName() const {return gluiName_;} + GLint getGLMipLevel() const {return gliMipLevel_;} + GLenum getGLInternalFormat() const {return glInternalFormat_;} + GLint getGLSize() const {return gliWidth_;} + GLint getGLWidth() const {return gliWidth_;} + GLint getGLHeight() const {return gliHeight_;} + GLint getGLDepth() const {return gliDepth_;} + cl_gl_object_type getCLGLObjectType() const { return clGLType_; } + GLenum getCubemapFace() const {return glCubemapFace_;} + GLsizei getNumSamples() const { return glNumSamples_;} +}; + + +//! Class BufferGL is drived from classes Buffer and GLObject +//! where the former keeps all data for CL object and +//! the latter keeps all data for GL object +class BufferGL : public Buffer, public GLObject +{ +protected: + //! Initializes the device memory array which is nested + // after'BufferGL' object in memory layout. + virtual void initDeviceMemory(); +public: +//! BufferGL constructor just calls constructors of base classes +//! to pass down the parameters + BufferGL( + Context& amdContext, + cl_mem_flags clFlags, + size_t uiSizeInBytes, + GLenum glTarget, + GLuint gluiName) + : // Call base classes constructors + Buffer( + amdContext, + clFlags, + uiSizeInBytes + ), + GLObject( + glTarget, + gluiName, + 0, // Mipmap level default + GL_ARRAY_BUFFER, // Just init to some value + (GLint) uiSizeInBytes, + 1, + 1, + CL_GL_OBJECT_BUFFER, + 0, + 0 + ) + { + setInteropObj(this); + } + virtual ~BufferGL() {} + + virtual BufferGL* asBufferGL() { return this; } +}; + + +//! Class ImageGL is derived from classes Image and GLObject +//! where the former keeps all data for CL object and +//! the latter keeps all data for GL object +class ImageGL : public Image, public GLObject +{ +public: + //! ImageGL constructor just calls constructors of base classes + //! to pass down the parameters + ImageGL( + Context& amdContext, + cl_mem_object_type clType, + cl_mem_flags clFlags, + const Format& format, + size_t width, + size_t height, + size_t depth, + GLenum glTarget, + GLuint gluiName, + GLint gliMipLevel, + GLenum glInternalFormat, + cl_gl_object_type clGLType, + GLsizei numSamples, + GLenum glCubemapFace = 0) + : Image(amdContext, clType, clFlags, format, width, height, depth, + Format(format).getElementSize() * width, + Format(format).getElementSize() * width * depth) + , GLObject(glTarget, gluiName, gliMipLevel, glInternalFormat, + static_cast(width), static_cast(height), + static_cast(depth), clGLType, glCubemapFace,numSamples) + { + setInteropObj(this); + } + + virtual ~ImageGL() {} + +protected: + //! Initializes the device memory array which is nested + // after'BufferGL' object in memory layout. + virtual void initDeviceMemory(); +}; + + typedef EGLContext (*PFN_eglGetCurrentContext) (); +#ifdef _WIN32 +#define APICALL WINAPI +#define GETPROCADDRESS GetProcAddress +#define API_GETPROCADDR "wglGetProcAddress" +#define FCN_STR_TYPE LPCSTR + typedef PROC (WINAPI* PFN_xxxGetProcAddress) (LPCSTR fcnName); + typedef HGLRC (APICALL* PFN_wglCreateContext) (HDC hdc); + typedef HGLRC (APICALL* PFN_wglGetCurrentContext) (void); + typedef HDC (APICALL* PFN_wglGetCurrentDC) (void); + typedef BOOL (APICALL* PFN_wglDeleteContext) (HGLRC hglrc); + typedef BOOL (APICALL* PFN_wglMakeCurrent) (HDC hdc, HGLRC hglrc); + typedef BOOL (APICALL* PFN_wglShareLists) (HGLRC hglrc1, HGLRC hglrc2); +#else //!_WIN32 +#define APICALL // __stdcall //??? todo odintsov +#define API_GETPROCADDR "glXGetProcAddress" +#define GETPROCADDRESS dlsym +#define FCN_STR_TYPE const GLubyte* +#define WINAPI +#define PROC void* + typedef void* (*PFN_xxxGetProcAddress) (const GLubyte* procName); + // X11 typedef + typedef Display* (*PFNXOpenDisplay)(_Xconst char* display_name ); + typedef int (*PFNXCloseDisplay)(Display* display ); + + //glx typedefs + typedef GLXDrawable (*PFNglXGetCurrentDrawable)(); + typedef Display* (*PFNglXGetCurrentDisplay)(); + typedef GLXContext (*PFNglXGetCurrentContext)( void ); + typedef XVisualInfo* (*PFNglXChooseVisual)(Display *dpy, int screen, int *attribList); + typedef GLXContext(*PFNglXCreateContext)(Display* dpy,XVisualInfo* vis,GLXContext shareList,Bool direct); + typedef void(*PFNglXDestroyContext)(Display* dpy, GLXContext ctx); + typedef Bool(*PFNglXMakeCurrent)( Display* dpy, GLXDrawable drawable, GLXContext ctx); + typedef void* HMODULE; +#endif //!_WIN32 + +#define GLPREFIX(rtype, fcn, dclargs) \ + typedef rtype (APICALL* PFN_##fcn) dclargs; + +// Declare prototypes for GL functions +#include "gl_functions.hpp" + +class GLFunctions +{ +public: + //! Locks any access to the virtual GPUs + class SetIntEnv : public amd::StackObject { + public: + //! Default constructor + SetIntEnv(GLFunctions* env); + + //! Destructor + ~SetIntEnv(); + + //! Checks if the environment setup was successful + bool isValid() const { return isValid_; } + + private: + GLFunctions* env_; //!< GL environment + bool isValid_; //!< If TRUE, then it's a valid setup + }; + +private: + HMODULE libHandle_; + int missed_; // Indicates how many GL functions not init'ed, if any + + amd::Monitor lock_; + + EGLDisplay eglDisplay_; + EGLContext eglOriginalContext_; + EGLContext eglInternalContext_; + EGLContext eglTempContext_; + bool isEGL_; + PFN_eglGetCurrentContext eglGetCurrentContext_; + +#ifdef _WIN32 + HGLRC hOrigGLRC_; + HDC hDC_; + HGLRC hIntGLRC_; // handle for internal GLRC to access shared context + HDC tempDC_; + HGLRC tempGLRC_; + +public: + PFN_wglCreateContext wglCreateContext_; + PFN_wglGetCurrentContext wglGetCurrentContext_; + PFN_wglGetCurrentDC wglGetCurrentDC_; + PFN_wglDeleteContext wglDeleteContext_; + PFN_wglMakeCurrent wglMakeCurrent_; + PFN_wglShareLists wglShareLists_; +#else +public: + Display* Dpy_; + GLXDrawable Drawable_; + GLXContext origCtx_; + Display* intDpy_; + Window intDrawable_; + GLXContext intCtx_; + Display* tempDpy_; + GLXDrawable tempDrawable_; + GLXContext tempCtx_; + + //pointers to X11 functions + PFNXOpenDisplay XOpenDisplay_; + PFNXCloseDisplay XCloseDisplay_; + + //pointers to GLX functions + PFNglXGetCurrentDrawable glXGetCurrentDrawable_; + PFNglXGetCurrentDisplay glXGetCurrentDisplay_; + PFNglXGetCurrentContext glXGetCurrentContext_; + PFNglXChooseVisual glXChooseVisual_; + PFNglXCreateContext glXCreateContext_; + PFNglXDestroyContext glXDestroyContext_; + PFNglXMakeCurrent glXMakeCurrent_; +#endif +public: + + GLFunctions(HMODULE h, bool isEGL); + ~GLFunctions(); + + bool update(intptr_t hglrc); + bool IsCurrentGlContext(const amd::Context::Info& info) const { + if (isEGL_) { + return ((info.hCtx_ != nullptr) && (eglGetCurrentContext_ != nullptr) && + (info.hCtx_ == eglGetCurrentContext_())); + } else { +#ifdef _WIN32 + return ((info.hCtx_ != nullptr) && (info.hCtx_ == wglGetCurrentContext_())); +#else + return ((info.hCtx_ != nullptr) && (info.hCtx_ == glXGetCurrentContext_())); +#endif // _WIN32 + } + } + + void WaitCurrentGlContext(const amd::Context::Info& info) const; + + // Query CL-GL context association + bool isAssociated() const + { + if (isEGL_ && eglDisplay_ && eglOriginalContext_) return true; +#ifdef _WIN32 + if(hDC_ && hOrigGLRC_) return true; +#else //!_WIN32 + if(Dpy_ && origCtx_) return true; +#endif //!_WIN32 + return false; + } + bool isEGL() const + { + return isEGL_; + } + // Accessor methods +#ifdef _WIN32 + HGLRC getOrigGLRC() const {return hOrigGLRC_;} + HDC getDC() const {return hDC_;} + HGLRC getIntGLRC() const {return hIntGLRC_;} +#else //!_WIN32 + Display* getDpy() const {return Dpy_;} + GLXDrawable getDrawable() const {return Drawable_;} + GLXContext getOrigCtx() const {return origCtx_;} + + Display* getIntDpy() const {return intDpy_;} + GLXDrawable getIntDrawable() const {return intDrawable_;} + GLXContext getIntCtx() const {return intCtx_;} + + EGLDisplay getEglDpy() const { return eglDisplay_; } + EGLContext getEglOrigCtx() const { return eglOriginalContext_; } +#endif //!_WIN32 + + // Initialize GL dynamic library and function pointers + bool init(intptr_t hdc, intptr_t hglrc); + + // Return true if successful, false - if error occurred + bool setIntEnv(); + bool restoreEnv(); + + amd::Monitor& getLock() { return lock_; } + + PFN_xxxGetProcAddress GetProcAddress_; + +#define GLPREFIX(rtype, fcn, dclargs) \ + PFN_##fcn fcn##_; +// Declare pointers to GL functions +#include "gl_functions.hpp" +}; + +//! Functions for executing the GL related stuff +cl_mem clCreateFromGLBufferAMD(Context& amdContext, cl_mem_flags flags, + GLuint bufobj, cl_int* errcode_ret); +cl_mem clCreateFromGLTextureAMD(Context& amdContext, cl_mem_flags flags, + GLenum target, GLint miplevel, GLuint texture, int* errcode_ret); +cl_mem clCreateFromGLRenderbufferAMD(Context& amdContext, cl_mem_flags flags, + GLuint renderbuffer, int* errcode_ret); + +bool +getCLFormatFromGL( + const Context& amdContext, + GLint gliInternalFormat, + cl_image_format* pclImageFormat, + int* piBytesPerPixel, + cl_mem_flags flags +); + +} //namespace amd + +#endif //CL_GL_AMD_HPP_ diff --git a/projects/clr/hipamd/src/cmake/FindROCclr.cmake b/projects/clr/hipamd/src/cmake/FindROCclr.cmake new file mode 100644 index 0000000000..b9ff2b1c78 --- /dev/null +++ b/projects/clr/hipamd/src/cmake/FindROCclr.cmake @@ -0,0 +1,51 @@ +# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(ROCCLR_FOUND) + return() +endif() + +find_path(ROCCLR_INCLUDE_DIR top.hpp + HINTS + ${ROCCLR_PATH} + PATHS + # gerrit repo name + ${CMAKE_SOURCE_DIR}/vdi + ${CMAKE_SOURCE_DIR}/../vdi + ${CMAKE_SOURCE_DIR}/../../vdi + # github repo name + ${CMAKE_SOURCE_DIR}/ROCclr + ${CMAKE_SOURCE_DIR}/../ROCclr + ${CMAKE_SOURCE_DIR}/../../ROCclr + # jenkins repo name + ${CMAKE_SOURCE_DIR}/rocclr + ${CMAKE_SOURCE_DIR}/../rocclr + ${CMAKE_SOURCE_DIR}/../../rocclr + PATH_SUFFIXES + include) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ROCclr + "\nROCclr not found" + ROCCLR_INCLUDE_DIR) +mark_as_advanced(ROCCLR_INCLUDE_DIR) + +list(APPEND CMAKE_MODULE_PATH "${ROCCLR_INCLUDE_DIR}/../cmake") +include(ROCclr) diff --git a/projects/clr/hipamd/src/fixme.cpp b/projects/clr/hipamd/src/fixme.cpp new file mode 100644 index 0000000000..3560eff8c1 --- /dev/null +++ b/projects/clr/hipamd/src/fixme.cpp @@ -0,0 +1,40 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "vdi_common.hpp" +#ifdef _WIN32 +#include +#include +#include +#include +#include +#include +#endif +#include + +cl_icd_dispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = {0}; +amd::PlatformIDS amd::PlatformID::Platform = {amd::ICDDispatchedObject::icdVendorDispatch_}; + +RUNTIME_ENTRY(cl_int, clGetDeviceIDs, + (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + return CL_SUCCESS; +} +RUNTIME_EXIT diff --git a/projects/clr/hipamd/src/hip_activity.cpp b/projects/clr/hipamd/src/hip_activity.cpp new file mode 100644 index 0000000000..a13a33126b --- /dev/null +++ b/projects/clr/hipamd/src/hip_activity.cpp @@ -0,0 +1,26 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "platform/activity.hpp" +#include + +extern "C" const char* hipGetCmdName(unsigned op) { + return getOclCommandKindString(static_cast(op)); +} \ No newline at end of file diff --git a/projects/clr/hipamd/src/hip_code_object.cpp b/projects/clr/hipamd/src/hip_code_object.cpp new file mode 100644 index 0000000000..ae697775b5 --- /dev/null +++ b/projects/clr/hipamd/src/hip_code_object.cpp @@ -0,0 +1,910 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip_code_object.hpp" +#include "amd_hsa_elf.hpp" + +#include + +#include +#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include + +hipError_t ihipFree(void* ptr); +// forward declaration of methods required for managed variables +hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); +namespace { +size_t constexpr strLiteralLength(char const* str) { + return *str ? 1 + strLiteralLength(str + 1) : 0; +} +constexpr char const* CLANG_OFFLOAD_BUNDLER_MAGIC_STR = "__CLANG_OFFLOAD_BUNDLE__"; +constexpr char const* OFFLOAD_KIND_HIP = "hip"; +constexpr char const* OFFLOAD_KIND_HIPV4 = "hipv4"; +constexpr char const* OFFLOAD_KIND_HCC = "hcc"; +constexpr char const* AMDGCN_TARGET_TRIPLE = "amdgcn-amd-amdhsa-"; + +// ClangOFFLOADBundle info. +static constexpr size_t bundle_magic_string_size = + strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC_STR); + +// Clang Offload bundler description & Header. +struct __ClangOffloadBundleInfo { + uint64_t offset; + uint64_t size; + uint64_t bundleEntryIdSize; + const char bundleEntryId[1]; +}; + +struct __ClangOffloadBundleHeader { + const char magic[bundle_magic_string_size - 1]; + uint64_t numOfCodeObjects; + __ClangOffloadBundleInfo desc[1]; +}; +} // namespace + +namespace hip { + +bool CodeObject::IsClangOffloadMagicBundle(const void* data) { + std::string magic(reinterpret_cast(data), bundle_magic_string_size); + return magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR) ? false : true; +} + +uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } + +static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported, + bool& sramEccSupported) { + switch (EFlags & EF_AMDGPU_MACH) { + case EF_AMDGPU_MACH_AMDGCN_GFX700: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx700"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX701: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx701"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX702: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx702"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX703: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx703"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX704: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx704"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX705: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx705"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX801: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx801"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX802: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx802"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX803: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx803"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX805: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx805"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX810: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx810"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX900: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx900"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX902: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx902"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX904: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx904"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX906: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx906"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX908: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx908"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX909: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx909"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX90A: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx90a"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX90C: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx90c"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX940: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx940"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1010: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1010"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1011: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1011"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1012: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1012"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1013: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1013"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1030: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1030"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1031: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1031"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1032: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1032"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1033: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1033"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1034: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1034"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1035: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1035"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1036: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1036"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1100: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1100"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1101: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1101"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1102: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1102"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1103: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1103"; + break; + default: + return false; + } + return true; +} + +static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string& target_id) { + if (!code_object) return false; + const Elf64_Ehdr* ehdr = reinterpret_cast(code_object); + if (ehdr->e_machine != EM_AMDGPU) return false; + if (ehdr->e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return false; + + bool isXnackSupported{false}, isSramEccSupported{false}; + + std::string proc_name; + if (!getProcName(ehdr->e_flags, proc_name, isXnackSupported, isSramEccSupported)) return false; + target_id = std::string(AMDGCN_TARGET_TRIPLE) + '-' + proc_name; + + switch (ehdr->e_ident[EI_ABIVERSION]) { + case ELFABIVERSION_AMDGPU_HSA_V2: { + LogPrintfInfo("[Code Object V2, target id:%s]", target_id.c_str()); + return false; + } + + case ELFABIVERSION_AMDGPU_HSA_V3: { + LogPrintfInfo("[Code Object V3, target id:%s]", target_id.c_str()); + if (isSramEccSupported) { + if (ehdr->e_flags & EF_AMDGPU_FEATURE_SRAMECC_V3) + target_id += ":sramecc+"; + else + target_id += ":sramecc-"; + } + if (isXnackSupported) { + if (ehdr->e_flags & EF_AMDGPU_FEATURE_XNACK_V3) + target_id += ":xnack+"; + else + target_id += ":xnack-"; + } + break; + } + + case ELFABIVERSION_AMDGPU_HSA_V4: + case ELFABIVERSION_AMDGPU_HSA_V5: { + if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) { + LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str()); + } else { + LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str()); + } + unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4; + if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4) + target_id += ":sramecc-"; + else if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_ON_V4) + target_id += ":sramecc+"; + + unsigned co_xnack_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_XNACK_V4; + if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_OFF_V4) + target_id += ":xnack-"; + else if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_ON_V4) + target_id += ":xnack+"; + break; + } + + default: { + return false; + } + } + return true; +} + +// Consumes the string 'consume_' from the starting of the given input +// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa-- +// input will become gfx908. +static bool consume(std::string& input, std::string consume_) { + if (input.substr(0, consume_.size()) != consume_) { + return false; + } + input = input.substr(consume_.size()); + return true; +} + +// Trim String till character, will be used to get gpuname +// example: input is gfx908:sram-ecc+ and trim char is : +// input will become sram-ecc+. +static std::string trimName(std::string& input, char trim) { + auto pos_ = input.find(trim); + auto res = input; + if (pos_ == std::string::npos) { + input = ""; + } else { + res = input.substr(0, pos_); + input = input.substr(pos_); + } + return res; +} + +static char getFeatureValue(std::string& input, std::string feature) { + char res = ' '; + if (consume(input, std::move(feature))) { + res = input[0]; + input = input.substr(1); + } + return res; +} + +static bool getTargetIDValue(std::string& input, std::string& processor, char& sramecc_value, + char& xnack_value) { + processor = trimName(input, ':'); + sramecc_value = getFeatureValue(input, std::string(":sramecc")); + if (sramecc_value != ' ' && sramecc_value != '+' && sramecc_value != '-') return false; + xnack_value = getFeatureValue(input, std::string(":xnack")); + if (xnack_value != ' ' && xnack_value != '+' && xnack_value != '-') return false; + return true; +} + +static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object, + std::string& co_triple_target_id) { + std::string offload_kind = trimName(bundled_co_entry_id, '-'); + if (offload_kind != OFFLOAD_KIND_HIPV4 && offload_kind != OFFLOAD_KIND_HIP && + offload_kind != OFFLOAD_KIND_HCC) + return false; + + if (offload_kind != OFFLOAD_KIND_HIPV4) + return getTripleTargetIDFromCodeObject(code_object, co_triple_target_id); + + // For code object V4 onwards the bundled code object entry ID correctly + // specifies the target triple. + co_triple_target_id = bundled_co_entry_id.substr(1); + return true; +} + +static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, + std::string agent_triple_target_id) { + // Primitive Check + if (co_triple_target_id == agent_triple_target_id) return true; + + // Parse code object triple target id + if (!consume(co_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { + return false; + } + + std::string co_processor; + char co_sram_ecc, co_xnack; + if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) { + return false; + } + + if (!co_triple_target_id.empty()) return false; + + // Parse agent isa triple target id + if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { + return false; + } + + std::string agent_isa_processor; + char isa_sram_ecc, isa_xnack; + if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) { + return false; + } + + if (!agent_triple_target_id.empty()) return false; + + // Check for compatibility + if (agent_isa_processor != co_processor) return false; + if (co_sram_ecc != ' ') { + if (co_sram_ecc != isa_sram_ecc) return false; + } + if (co_xnack != ' ') { + if (co_xnack != isa_xnack) return false; + } + + return true; +} + +// This will be moved to COMGR eventually +hipError_t CodeObject::ExtractCodeObjectFromFile( + amd::Os::FileDesc fdesc, size_t fsize, const void** image, + const std::vector& device_names, + std::vector>& code_objs) { + hipError_t hip_error = hipSuccess; + + if (fdesc < 0) { + return hipErrorFileNotFound; + } + + // Map the file to memory, with offset 0. + // file will be unmapped in ModuleUnload + // const void* image = nullptr; + if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) { + return hipErrorInvalidValue; + } + + // retrieve code_objs{binary_image, binary_size} for devices + hip_error = extractCodeObjectFromFatBinary(*image, device_names, code_objs); + + return hip_error; +} + +// This will be moved to COMGR eventually +hipError_t CodeObject::ExtractCodeObjectFromMemory( + const void* data, const std::vector& device_names, + std::vector>& code_objs, std::string& uri) { + // Get the URI from memory + if (!amd::Os::GetURIFromMemory(data, 0, uri)) { + return hipErrorInvalidValue; + } + + return extractCodeObjectFromFatBinary(data, device_names, code_objs); +} + +// This will be moved to COMGR eventually +hipError_t CodeObject::extractCodeObjectFromFatBinary( + const void* data, const std::vector& agent_triple_target_ids, + std::vector>& code_objs) { + std::string magic((const char*)data, bundle_magic_string_size); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + return hipErrorInvalidKernelFile; + } + + // Initialize Code objects + code_objs.reserve(agent_triple_target_ids.size()); + for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { + code_objs.push_back(std::make_pair(nullptr, 0)); + } + + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + size_t num_code_objs = code_objs.size(); + for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + + desc->bundleEntryIdSize)) { + const void* image = + reinterpret_cast(reinterpret_cast(obheader) + desc->offset); + const size_t image_size = desc->size; + + if (num_code_objs == 0) break; + std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + + std::string co_triple_target_id; + if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue; + + for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { + if (code_objs[dev].first) continue; + if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) { + code_objs[dev] = std::make_pair(image, image_size); + --num_code_objs; + } + } + } + if (num_code_objs == 0) { + return hipSuccess; + } else { + LogPrintfError("%s", + "hipErrorNoBinaryForGpu: Unable to find code object for all current devices!"); + LogPrintfError("%s", " Devices:"); + for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { + LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(), + ((code_objs[i].first) ? "Found" : "Not Found")); + } + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + LogPrintfError("%s", " Bundled Code Objects:"); + for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + + desc->bundleEntryIdSize)) { + std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + const void* image = + reinterpret_cast(reinterpret_cast(obheader) + desc->offset); + + std::string co_triple_target_id; + bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id); + + if (valid_co) { + LogPrintfError(" %s - [code object targetID is %s]", bundleEntryId.c_str(), + co_triple_target_id.c_str()); + } else { + LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str()); + } + } + + LogPrintfError("hipErrorNoBinaryForGpu: Unable to find code object for all current devices! - %d",hipErrorNoBinaryForGpu); + return hipErrorNoBinaryForGpu; + } +} + +hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { + amd::ScopedLock lock(dclock_); + + // Number of devices = 1 in dynamic code object + fb_info_ = new FatBinaryInfo(fname, image); + std::vector devices = {g_devices[ihipGetDevice()]}; + IHIP_RETURN_ONFAIL(fb_info_->ExtractFatBinary(devices)); + + // No Lazy loading for DynCO + IHIP_RETURN_ONFAIL(fb_info_->BuildProgram(ihipGetDevice())); + + // Define Global variables + IHIP_RETURN_ONFAIL(populateDynGlobalVars()); + + // Define Global functions + IHIP_RETURN_ONFAIL(populateDynGlobalFuncs()); + + return hipSuccess; +} + +// Dynamic Code Object +DynCO::~DynCO() { + amd::ScopedLock lock(dclock_); + + for (auto& elem : vars_) { + if (elem.second->getVarKind() == Var::DVK_Managed) { + hipError_t err = ihipFree(elem.second->getManagedVarPtr()); + assert(err == hipSuccess); + } + delete elem.second; + } + vars_.clear(); + + for (auto& elem : functions_) { + delete elem.second; + } + functions_.clear(); + + delete fb_info_; +} + +hipError_t DynCO::getDeviceVar(DeviceVar** dvar, std::string var_name) { + amd::ScopedLock lock(dclock_); + + CheckDeviceIdMatch(); + + auto it = vars_.find(var_name); + if (it == vars_.end()) { + LogPrintfError("Cannot find the Var: %s ", var_name.c_str()); + return hipErrorNotFound; + } + + hipError_t err = it->second->getDeviceVar(dvar, device_id_, module()); + return err; +} + +hipError_t DynCO::getDynFunc(hipFunction_t* hfunc, std::string func_name) { + amd::ScopedLock lock(dclock_); + + CheckDeviceIdMatch(); + + if (hfunc == nullptr) { + return hipErrorInvalidValue; + } + + auto it = functions_.find(func_name); + if (it == functions_.end()) { + LogPrintfError("Cannot find the function: %s ", func_name.c_str()); + return hipErrorNotFound; + } + + /* See if this could be solved */ + return it->second->getDynFunc(hfunc, module()); +} + +hipError_t DynCO::initDynManagedVars(const std::string& managedVar) { + amd::ScopedLock lock(dclock_); + DeviceVar* dvar; + void* pointer = nullptr; + hipError_t status = hipSuccess; + // To get size of the managed variable + status = getDeviceVar(&dvar, managedVar + ".managed"); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to get .managed device variable:%s", + status, managedVar.c_str()); + return status; + } + // Allocate managed memory for these symbols + status = ihipMallocManaged(&pointer, dvar->size()); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to allocate managed memory", status); + guarantee(false, "Error during allocation of managed memory!"); + } + // update as manager variable and set managed memory pointer and size + auto it = vars_.find(managedVar); + it->second->setManagedVarInfo(pointer, dvar->size()); + + // copy initial value to the managed variable to the managed memory allocated + hip::Stream* stream = hip::getNullStream(); + if (stream != nullptr) { + status = ihipMemcpy(pointer, reinterpret_cast
(dvar->device_ptr()), dvar->size(), + hipMemcpyDeviceToDevice, *stream); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status, + managedVar.c_str()); + return status; + } + } else { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); + return hipErrorInvalidResourceHandle; + } + + // Get deivce ptr to initialize with managed memory pointer + status = getDeviceVar(&dvar, managedVar); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to get managed device variable:%s", + status, managedVar.c_str()); + return status; + } + // copy managed memory pointer to the managed device variable + status = ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), &pointer, dvar->size(), + hipMemcpyHostToDevice, *stream); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status, + managedVar.c_str()); + return status; + } + return status; +} + +hipError_t DynCO::populateDynGlobalVars() { + amd::ScopedLock lock(dclock_); + hipError_t err = hipSuccess; + std::vector var_names; + std::string managedVarExt = ".managed"; + // For Dynamic Modules there is only one hipFatBinaryDevInfo_ + device::Program* dev_program = fb_info_->GetProgram(ihipGetDevice()) + ->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + LogPrintfError("Could not get Global vars from Code Obj for Module: 0x%x \n", module()); + return hipErrorSharedObjectSymbolNotFound; + } + + for (auto& elem : var_names) { + vars_.insert( + std::make_pair(elem, new Var(elem, Var::DeviceVarKind::DVK_Variable, 0, 0, 0, nullptr))); + } + + for (auto& elem : var_names) { + if (elem.find(managedVarExt) != std::string::npos) { + std::string managedVar = elem; + managedVar.erase(managedVar.length() - managedVarExt.length(), managedVarExt.length()); + err = initDynManagedVars(managedVar); + } + } + return err; +} + +hipError_t DynCO::populateDynGlobalFuncs() { + amd::ScopedLock lock(dclock_); + + std::vector func_names; + device::Program* dev_program = fb_info_->GetProgram(ihipGetDevice()) + ->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + // Get all the global func names from COMGR + if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { + LogPrintfError("Could not get Global Funcs from Code Obj for Module: 0x%x \n", module()); + return hipErrorSharedObjectSymbolNotFound; + } + + for (auto& elem : func_names) { + functions_.insert(std::make_pair(elem, new Function(elem))); + } + + return hipSuccess; +} + +// Static Code Object +StatCO::StatCO() {} + +StatCO::~StatCO() { + amd::ScopedLock lock(sclock_); + + for (auto& elem : functions_) { + delete elem.second; + } + functions_.clear(); + + for (auto& elem : vars_) { + delete elem.second; + } + vars_.clear(); +} + +hipError_t StatCO::digestFatBinary(const void* data, FatBinaryInfo*& programs) { + amd::ScopedLock lock(sclock_); + + if (programs != nullptr) { + return hipSuccess; + } + + // Create a new fat binary object and extract the fat binary for all devices. + programs = new FatBinaryInfo(nullptr, data); + IHIP_RETURN_ONFAIL(programs->ExtractFatBinary(g_devices)); + + return hipSuccess; +} + +FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized) { + amd::ScopedLock lock(sclock_); + + if (initialized) { + hipError_t err = digestFatBinary(data, modules_[data]); + assert(err == hipSuccess); + } + return &modules_[data]; +} + +hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { + amd::ScopedLock lock(sclock_); + + auto vit = vars_.begin(); + while (vit != vars_.end()) { + if (vit->second->moduleInfo() == module) { + delete vit->second; + vit = vars_.erase(vit); + } else { + ++vit; + } + } + + auto it = managedVars_.begin(); + while (it != managedVars_.end()) { + if ((*it)->moduleInfo() == module) { + for (auto dev : g_devices) { + DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL((*it)->getStatDeviceVar(&dvar, dev->deviceId())); + // free also deletes the device ptr + hipError_t err = ihipFree(dvar->device_ptr()); + assert(err == hipSuccess); + } + it = managedVars_.erase(it); + } else { + ++it; + } + } + + auto fit = functions_.begin(); + while (fit != functions_.end()) { + if (fit->second->moduleInfo() == module) { + delete fit->second; + fit = functions_.erase(fit); + } else { + ++fit; + } + } + + auto mit = modules_.begin(); + while (mit != modules_.end()) { + if (&mit->second == module) { + delete mit->second; + mit = modules_.erase(mit); + } else { + ++mit; + } + } + + return hipSuccess; +} + +hipError_t StatCO::registerStatFunction(const void* hostFunction, Function* func) { + amd::ScopedLock lock(sclock_); + + if (functions_.find(hostFunction) != functions_.end()) { + DevLogPrintfError("hostFunctionPtr: 0x%x already exists", hostFunction); + } + functions_.insert(std::make_pair(hostFunction, func)); + + return hipSuccess; +} + +const char* StatCO::getStatFuncName(const void* hostFunction) { + amd::ScopedLock lock(sclock_); + + const auto it = functions_.find(hostFunction); + if (it == functions_.end()) { + return nullptr; + } + return it->second->name().c_str(); +} + +hipError_t StatCO::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { + amd::ScopedLock lock(sclock_); + + const auto it = functions_.find(hostFunction); + if (it == functions_.end()) { + return hipErrorInvalidSymbol; + } + + return it->second->getStatFunc(hfunc, deviceId); +} + +hipError_t StatCO::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, + int deviceId) { + amd::ScopedLock lock(sclock_); + + const auto it = functions_.find(hostFunction); + if (it == functions_.end()) { + return hipErrorInvalidSymbol; + } + + return it->second->getStatFuncAttr(func_attr, deviceId); +} + +hipError_t StatCO::registerStatGlobalVar(const void* hostVar, Var* var) { + amd::ScopedLock lock(sclock_); + + if (vars_.find(hostVar) != vars_.end()) { + return hipErrorInvalidSymbol; + } + + vars_.insert(std::make_pair(hostVar, var)); + return hipSuccess; +} + +hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr) { + amd::ScopedLock lock(sclock_); + + const auto it = vars_.find(hostVar); + if (it == vars_.end()) { + return hipErrorInvalidSymbol; + } + + DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getStatDeviceVar(&dvar, deviceId)); + + *dev_ptr = dvar->device_ptr(); + *size_ptr = dvar->size(); + return hipSuccess; +} + +hipError_t StatCO::registerStatManagedVar(Var* var) { + managedVars_.emplace_back(var); + return hipSuccess; +} + +hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { + amd::ScopedLock lock(sclock_); + hipError_t err = hipSuccess; + if (managedVarsDevicePtrInitalized_.find(deviceId) == managedVarsDevicePtrInitalized_.end() || + !managedVarsDevicePtrInitalized_[deviceId]) { + for (auto var : managedVars_) { + DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId)); + + hip::Stream* stream = g_devices.at(deviceId)->NullStream(); + if (stream != nullptr) { + err = ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), var->getManagedVarPtr(), + dvar->size(), hipMemcpyHostToDevice, *stream); + } else { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); + return hipErrorInvalidResourceHandle; + } + } + managedVarsDevicePtrInitalized_[deviceId] = true; + } + return err; +} +}; // namespace hip diff --git a/projects/clr/hipamd/src/hip_code_object.hpp b/projects/clr/hipamd/src/hip_code_object.hpp new file mode 100644 index 0000000000..db1225af39 --- /dev/null +++ b/projects/clr/hipamd/src/hip_code_object.hpp @@ -0,0 +1,168 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_CODE_OBJECT_HPP +#define HIP_CODE_OBJECT_HPP + +#include "hip_global.hpp" + +#include +#include + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "hip_internal.hpp" +#include "device/device.hpp" +#include "platform/program.hpp" + +//Forward Declaration for friend usage +class PlatformState; + +namespace hip { + +//Code Object base class +class CodeObject { + public: + virtual ~CodeObject() {} + + // Functions to add_dev_prog and build + static hipError_t add_program(int deviceId, hipModule_t hmod, const void* binary_ptr, + size_t binary_size); + static hipError_t build_module(hipModule_t hmod, const std::vector& devices); + + // Given an file desc and file size, extracts to code object for corresponding devices, + // return code_objs{binary_ptr, binary_size}, which could be used to determine foffset + static hipError_t ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize, + const void ** image, const std::vector& device_names, + std::vector>& code_objs); + + // Given an ptr to memory, extracts to code object for corresponding devices, + // returns code_objs{binary_ptr, binary_size} and uniform resource indicator + static hipError_t ExtractCodeObjectFromMemory(const void* data, + const std::vector& device_names, + std::vector>& code_objs, + std::string& uri); + + static uint64_t ElfSize(const void* emi); + + static bool IsClangOffloadMagicBundle(const void* data); + +protected: + //Given an ptr to image or file, extracts to code object + //for corresponding devices + static hipError_t extractCodeObjectFromFatBinary(const void*, + const std::vector&, + std::vector>&); + + CodeObject() {} +private: + friend const std::vector& modules(); +}; + +//Dynamic Code Object +class DynCO : public CodeObject { + amd::Monitor dclock_{"Guards Dynamic Code object", true}; + +public: + DynCO() : device_id_(ihipGetDevice()), fb_info_(nullptr) {} + virtual ~DynCO(); + + //LoadsCodeObject and its data + hipError_t loadCodeObject(const char* fname, const void* image=nullptr); + hipModule_t module() const { return fb_info_->Module(ihipGetDevice()); }; + + //Gets GlobalVar/Functions from a dynamically loaded code object + hipError_t getDynFunc(hipFunction_t* hfunc, std::string func_name); + hipError_t getDeviceVar(DeviceVar** dvar, std::string var_name); + + hipError_t getManagedVarPointer(std::string name, void** pointer, size_t* size_ptr) const { + auto it = vars_.find(name); + if (it != vars_.end() && it->second->getVarKind() == Var::DVK_Managed) { + *pointer = it->second->getManagedVarPtr(); + *size_ptr = it->second->getSize(); + } + return hipSuccess; + } + // Device ID Check to check if module is launched in the same device it was loaded. + inline void CheckDeviceIdMatch() const { + if (device_id_ != ihipGetDevice()) { + guarantee(false, "Device mismatch from where this module is loaded"); + } + } + +private: + int device_id_; + FatBinaryInfo* fb_info_; + + //Maps for vars/funcs, could be keyed in with std::string name + std::unordered_map functions_; + std::unordered_map vars_; + + //Populate Global Vars/Funcs from an code object(@ module_load) + hipError_t populateDynGlobalFuncs(); + hipError_t populateDynGlobalVars(); + hipError_t initDynManagedVars(const std::string& managedVar); +}; + +//Static Code Object +class StatCO: public CodeObject { + amd::Monitor sclock_{"Guards Static Code object", true}; +public: + StatCO(); + virtual ~StatCO(); + + //Add/Remove/Digest Fat Binaries passed to us from "__hipRegisterFatBinary" + FatBinaryInfo** addFatBinary(const void* data, bool initialized); + hipError_t removeFatBinary(FatBinaryInfo** module); + hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs); + + //Register vars/funcs given to use from __hipRegister[Var/Func/ManagedVar] + hipError_t registerStatFunction(const void* hostFunction, Function* func); + hipError_t registerStatGlobalVar(const void* hostVar, Var* var); + hipError_t registerStatManagedVar(Var *var); + + //Retrive Vars/Funcs for a given hostSidePtr(const void*), unless stated otherwise. + const char* getStatFuncName(const void* hostFunction); + hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId); + hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + + //Managed variable is a defined symbol in code object + //pointer to the alocated managed memory has to be copied to the address of symbol + hipError_t initStatManagedVarDevicePtr(int deviceId); +private: + friend class ::PlatformState; + //Populated during __hipRegisterFatBinary + std::unordered_map modules_; + //Populated during __hipRegisterFuncs + std::unordered_map functions_; + //Populated during __hipRegisterVars + std::unordered_map vars_; + //Populated during __hipRegisterManagedVar + std::vector managedVars_; + std::unordered_map managedVarsDevicePtrInitalized_; +}; + +}; // namespace hip + +#endif /* HIP_CODE_OBJECT_HPP */ diff --git a/projects/clr/hipamd/src/hip_context.cpp b/projects/clr/hipamd/src/hip_context.cpp new file mode 100644 index 0000000000..f639d4ff66 --- /dev/null +++ b/projects/clr/hipamd/src/hip_context.cpp @@ -0,0 +1,402 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_platform.hpp" +#include "platform/runtime.hpp" +#include "utils/flags.hpp" +#include "utils/versions.hpp" + +std::vector g_devices; + +namespace hip { +thread_local TlsAggregator tls; +amd::Context* host_context = nullptr; + +//init() is only to be called from the HIP_INIT macro only once +bool init() { + amd::IS_HIP = true; + GPU_NUM_MEM_DEPENDENCY = 0; +#if DISABLE_DIRECT_DISPATCH + constexpr bool kDirectDispatch = false; +#else + constexpr bool kDirectDispatch = IS_LINUX; +#endif + AMD_DIRECT_DISPATCH = flagIsDefault(AMD_DIRECT_DISPATCH) ? kDirectDispatch : AMD_DIRECT_DISPATCH; + if (!amd::Runtime::init()) { + return false; + } + ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Direct Dispatch: %d", AMD_DIRECT_DISPATCH); + + + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); + + for (unsigned int i=0; i device(1, devices[i]); + amd::Context* context = new amd::Context(device, amd::Context::Info()); + if (!context) return false; + + // Enable active wait on the device by default + devices[i]->SetActiveWait(true); + + if (context && CL_SUCCESS != context->create(nullptr)) { + context->release(); + } else { + auto device = new Device(context, i); + if ((device == nullptr) || !device->Create()) { + return false; + } + g_devices.push_back(device); + } + } + + amd::Context* hContext = new amd::Context(devices, amd::Context::Info()); + if (!hContext) return false; + + if (CL_SUCCESS != hContext->create(nullptr)) { + hContext->release(); + } + host_context = hContext; + + PlatformState::instance().init(); + return true; +} + +Device* getCurrentDevice() { + return tls.device_; +} + +void setCurrentDevice(unsigned int index) { + assert(indexdevices()[0]->getPreferredNumaNode(); + amd::Os::setPreferredNumaNode(preferredNumaNode); +} + +hip::Stream* getStream(hipStream_t stream) { + if (stream == nullptr) { + return getNullStream(); + } else { + hip::Stream* hip_stream = reinterpret_cast(stream); + if (!(hip_stream->Flags() & hipStreamNonBlocking)) { + constexpr bool WaitNullStreamOnly = true; + iHipWaitActiveStreams(hip_stream, WaitNullStreamOnly); + } + return hip_stream; + } +} + +// ================================================================================================ +hip::Stream* getNullStream(amd::Context& ctx) { + for (auto& it : g_devices) { + if (it->asContext() == &ctx) { + return it->NullStream(); + } + } + // If it's a pure SVM allocation with system memory access, then it shouldn't matter which device + // runtime selects by default + if (hip::host_context == &ctx) { + // Return current... + return getNullStream(); + } + return nullptr; +} + +// ================================================================================================ +int getDeviceID(amd::Context& ctx) { + for (auto& it : g_devices) { + if (it->asContext() == &ctx) { + return it->deviceId(); + } + } + return -1; +} + +// ================================================================================================ +hip::Stream* getNullStream() { + Device* device = getCurrentDevice(); + return device ? device->NullStream() : nullptr; +} + +}; + +using namespace hip; + +hipError_t hipInit(unsigned int flags) { + HIP_INIT_API(hipInit, flags); + + if (flags != 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) { + HIP_INIT_API(hipCtxCreate, ctx, flags, device); + + if (static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + + *ctx = reinterpret_cast(g_devices[device]); + + // Increment ref count for device primary context + g_devices[device]->retain(); + tls.ctxt_stack_.push(g_devices[device]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxSetCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxSetCurrent, ctx); + + if (ctx == nullptr) { + if(!tls.ctxt_stack_.empty()) { + tls.ctxt_stack_.pop(); + } + } else { + hip::tls.device_ = reinterpret_cast(ctx); + if(!tls.ctxt_stack_.empty()) { + tls.ctxt_stack_.pop(); + } + tls.ctxt_stack_.push(hip::getCurrentDevice()); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxGetCurrent, ctx); + + *ctx = reinterpret_cast(hip::getCurrentDevice()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { + HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig); + + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipRuntimeGetVersion(int *runtimeVersion) { + HIP_INIT_API_NO_RETURN(hipRuntimeGetVersion, runtimeVersion); + + if (!runtimeVersion) { + HIP_RETURN(hipErrorInvalidValue); + } + + // HIP_VERSION = HIP_VERSION_MAJOR*100 + HIP_MINOR_VERSION + *runtimeVersion = HIP_VERSION; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDestroy(hipCtx_t ctx) { + HIP_INIT_API(hipCtxDestroy, ctx); + + hip::Device* dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Need to remove the ctx of calling thread if its the top one + if (!tls.ctxt_stack_.empty() && tls.ctxt_stack_.top() == dev) { + tls.ctxt_stack_.pop(); + } + + // Remove context from global context list + for (unsigned int i = 0; i < g_devices.size(); i++) { + if (g_devices[i] == dev) { + // Decrement ref count for device primary context + dev->release(); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { + HIP_INIT_API(hipCtxPopCurrent, ctx); + + hip::Device** dev = reinterpret_cast(ctx); + if (!tls.ctxt_stack_.empty()) { + if (dev != nullptr) { + *dev = tls.ctxt_stack_.top(); + } + tls.ctxt_stack_.pop(); + } else { + DevLogError("Context Stack empty \n"); + HIP_RETURN(hipErrorInvalidContext); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxPushCurrent(hipCtx_t ctx) { + HIP_INIT_API(hipCtxPushCurrent, ctx); + + hip::Device* dev = reinterpret_cast(ctx); + if (dev == nullptr) { + HIP_RETURN(hipErrorInvalidContext); + } + + hip::tls.device_ = dev; + tls.ctxt_stack_.push(hip::getCurrentDevice()); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDriverGetVersion(int* driverVersion) { + HIP_INIT_API_NO_RETURN(hipDriverGetVersion, driverVersion); + + if (!driverVersion) { + HIP_RETURN(hipErrorInvalidValue); + } + + // HIP_VERSION = HIP_VERSION_MAJOR*100 + HIP_MINOR_VERSION + *driverVersion = HIP_VERSION; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxGetDevice(hipDevice_t* device) { + HIP_INIT_API(hipCtxGetDevice, device); + + if (device != nullptr) { + *device = hip::getCurrentDevice()->deviceId(); + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorInvalidContext); +} + +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { + HIP_INIT_API(hipCtxGetApiVersion, apiVersion); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) { + HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) { + HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { + HIP_INIT_API(hipCtxSetSharedMemConfig, config); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxSynchronize(void) { + HIP_INIT_API(hipCtxSynchronize, 1); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipCtxGetFlags(unsigned int* flags) { + HIP_INIT_API(hipCtxGetFlags, flags); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) { + HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (flags != nullptr) { + *flags = 0; + } + + if (active != nullptr) { + *active = g_devices[dev]->GetActiveStatus() ? 1 : 0; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRelease, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (pctx == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pctx = reinterpret_cast(g_devices[dev]); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { + HIP_INIT_API(hipDevicePrimaryCtxReset, dev); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { + HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags); + + if (static_cast(dev) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } else { + HIP_RETURN(hipErrorContextAlreadyInUse); + } +} diff --git a/projects/clr/hipamd/src/hip_conversions.hpp b/projects/clr/hipamd/src/hip_conversions.hpp new file mode 100644 index 0000000000..ef928225eb --- /dev/null +++ b/projects/clr/hipamd/src/hip_conversions.hpp @@ -0,0 +1,944 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +namespace hip +{ +inline +cl_channel_type getCLChannelType(const hipArray_Format hipFormat, + const hipTextureReadMode hipReadMode) { + if (hipReadMode == hipReadModeElementType) { + switch (hipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return CL_UNSIGNED_INT8; + case HIP_AD_FORMAT_SIGNED_INT8: + return CL_SIGNED_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return CL_UNSIGNED_INT16; + case HIP_AD_FORMAT_SIGNED_INT16: + return CL_SIGNED_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return CL_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT32: + return CL_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return CL_HALF_FLOAT; + case HIP_AD_FORMAT_FLOAT: + return CL_FLOAT; + } + } else if (hipReadMode == hipReadModeNormalizedFloat) { + switch (hipFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + return CL_UNORM_INT8; + case HIP_AD_FORMAT_SIGNED_INT8: + return CL_SNORM_INT8; + case HIP_AD_FORMAT_UNSIGNED_INT16: + return CL_UNORM_INT16; + case HIP_AD_FORMAT_SIGNED_INT16: + return CL_SNORM_INT16; + case HIP_AD_FORMAT_UNSIGNED_INT32: + return CL_UNSIGNED_INT32; + case HIP_AD_FORMAT_SIGNED_INT32: + return CL_SIGNED_INT32; + case HIP_AD_FORMAT_HALF: + return CL_HALF_FLOAT; + case HIP_AD_FORMAT_FLOAT: + return CL_FLOAT; + } + } + + //error scenario + return {}; +} + +inline +cl_channel_order getCLChannelOrder(const unsigned int hipNumChannels, + const int sRGB) { + switch (hipNumChannels) { + case 1: + return CL_R; + case 2: + return CL_RG; + case 4: + return (sRGB == 1) ? CL_sRGBA : CL_RGBA; + default: + break; + } + + //error scenario + return {}; +} + +inline +cl_mem_object_type getCLMemObjectType(const unsigned int hipWidth, + const unsigned int hipHeight, + const unsigned int hipDepth, + const unsigned int flags) { + if (flags == hipArrayDefault) { + if ((hipWidth != 0) && (hipHeight == 0) && (hipDepth == 0)) { + return CL_MEM_OBJECT_IMAGE1D; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth == 0)) { + return CL_MEM_OBJECT_IMAGE2D; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE3D; + } + } else if (flags == hipArrayLayered) { + if ((hipWidth != 0) && (hipHeight == 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE1D_ARRAY; + } else if ((hipWidth != 0) && (hipHeight != 0) && (hipDepth != 0)) { + return CL_MEM_OBJECT_IMAGE2D_ARRAY; + } + } + // error scenario. ShouldNotReachHere() + return CL_MEM_OBJECT_ALLOCATION_FAILURE; +} + +inline +cl_addressing_mode getCLAddressingMode(const hipTextureAddressMode hipAddressMode) { + switch (hipAddressMode) { + case hipAddressModeWrap: + return CL_ADDRESS_REPEAT; + case hipAddressModeClamp: + return CL_ADDRESS_CLAMP_TO_EDGE; + case hipAddressModeMirror: + return CL_ADDRESS_MIRRORED_REPEAT; + case hipAddressModeBorder: + return CL_ADDRESS_CLAMP; + } + + //error scenario + return {}; +} + +inline +cl_filter_mode getCLFilterMode(const hipTextureFilterMode hipFilterMode) { + switch (hipFilterMode) { + case hipFilterModePoint: + return CL_FILTER_NEAREST; + case hipFilterModeLinear: + return CL_FILTER_LINEAR; + } + + //error scenario + return {}; +} + +inline +cl_mem_object_type getCLMemObjectType(const hipResourceType hipResType) { + switch (hipResType) { + case hipResourceTypeLinear: + return CL_MEM_OBJECT_IMAGE1D_BUFFER; + case hipResourceTypePitch2D: + return CL_MEM_OBJECT_IMAGE2D; + default: + break; + } + + //error scenario + return {}; +} + +inline +hipArray_Format getCL2hipArrayFormat(const cl_channel_type type) { + switch (type) { + case CL_SNORM_INT8: + case CL_SIGNED_INT8: + return HIP_AD_FORMAT_SIGNED_INT8; + + case CL_UNSIGNED_INT16: + return HIP_AD_FORMAT_UNSIGNED_INT16; + + case CL_SIGNED_INT16: + return HIP_AD_FORMAT_SIGNED_INT16; + + case CL_SIGNED_INT32: + return HIP_AD_FORMAT_SIGNED_INT32; + + case CL_UNSIGNED_INT32: + return HIP_AD_FORMAT_UNSIGNED_INT32; + + case CL_FLOAT: + return HIP_AD_FORMAT_FLOAT; + + case CL_UNSIGNED_INT8: + case CL_UNORM_INT8: + case CL_UNORM_INT_101010: + default: + return HIP_AD_FORMAT_UNSIGNED_INT8; + } +} +inline +size_t getElementSize(const hipArray_const_t array) { + switch (array->Format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + case HIP_AD_FORMAT_SIGNED_INT8: + return 1 * array->NumChannels; + case HIP_AD_FORMAT_UNSIGNED_INT16: + case HIP_AD_FORMAT_SIGNED_INT16: + case HIP_AD_FORMAT_HALF: + return 2 * array->NumChannels; + case HIP_AD_FORMAT_UNSIGNED_INT32: + case HIP_AD_FORMAT_SIGNED_INT32: + case HIP_AD_FORMAT_FLOAT: + return 4 * array->NumChannels; + } + + //error scenario + return {}; +} + +inline +hipChannelFormatDesc getChannelFormatDesc(int numChannels, + hipArray_Format arrayFormat) { + switch (arrayFormat) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + switch (numChannels) { + case 1: + return {8, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {8, 8, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {8, 8, 8, 8, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT8: + switch (numChannels) { + case 1: + return {8, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {8, 8, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {8, 8, 8, 8, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_UNSIGNED_INT16: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT16: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_UNSIGNED_INT32: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindUnsigned}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindUnsigned}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindUnsigned}; + } + case HIP_AD_FORMAT_SIGNED_INT32: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindSigned}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindSigned}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindSigned}; + } + case HIP_AD_FORMAT_HALF: + switch (numChannels) { + case 1: + return {16, 0, 0, 0, hipChannelFormatKindFloat}; + case 2: + return {16, 16, 0, 0, hipChannelFormatKindFloat}; + case 4: + return {16, 16, 16, 16, hipChannelFormatKindFloat}; + } + case HIP_AD_FORMAT_FLOAT: + switch (numChannels) { + case 1: + return {32, 0, 0, 0, hipChannelFormatKindFloat}; + case 2: + return {32, 32, 0, 0, hipChannelFormatKindFloat}; + case 4: + return {32, 32, 32, 32, hipChannelFormatKindFloat}; + } + } + + //error scenario + return {}; +} + +inline +unsigned int getNumChannels(const hipChannelFormatDesc& desc) { + return ((desc.x != 0) + (desc.y != 0) + (desc.z != 0) + (desc.w != 0)); +} + +inline +bool CheckArrayFormat(const hipChannelFormatDesc& desc) { + if(desc.x == 0) { + return false; + } else { + if(desc.y != 0 && desc.y != desc.x) { + return false; + } + if(desc.z !=0 && desc.z != desc.x) { + return false; + } + if(desc.w !=0 && desc.w != desc.x) { + return false; + } + } + // The bit channel description should not allow any channels after a zero channel + if (desc.y == 0) { + return !(desc.z > 0 || desc.w > 0); + } + else if (desc.z == 0) { + return !(desc.w > 0); + } + + return true; +} + +inline +hipArray_Format getArrayFormat(const hipChannelFormatDesc& desc) { + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (desc.x) { + case 8: + return HIP_AD_FORMAT_UNSIGNED_INT8; + case 16: + return HIP_AD_FORMAT_UNSIGNED_INT16; + case 32: + return HIP_AD_FORMAT_UNSIGNED_INT32; + } + case hipChannelFormatKindSigned: + switch (desc.x) { + case 8: + return HIP_AD_FORMAT_SIGNED_INT8; + case 16: + return HIP_AD_FORMAT_SIGNED_INT16; + case 32: + return HIP_AD_FORMAT_SIGNED_INT32; + } + case hipChannelFormatKindFloat: + switch (desc.x) { + case 16: + return HIP_AD_FORMAT_HALF; + case 32: + return HIP_AD_FORMAT_FLOAT; + } + default: + break; + } + + //error scenario + return {}; +} + +inline +int getNumChannels(const hipResourceViewFormat hipFormat) { + switch (hipFormat) { + case hipResViewFormatUnsignedChar1: + case hipResViewFormatSignedChar1: + case hipResViewFormatUnsignedShort1: + case hipResViewFormatSignedShort1: + case hipResViewFormatUnsignedInt1: + case hipResViewFormatSignedInt1: + case hipResViewFormatHalf1: + case hipResViewFormatFloat1: + return 1; + case hipResViewFormatUnsignedChar2: + case hipResViewFormatSignedChar2: + case hipResViewFormatUnsignedShort2: + case hipResViewFormatSignedShort2: + case hipResViewFormatUnsignedInt2: + case hipResViewFormatSignedInt2: + case hipResViewFormatHalf2: + case hipResViewFormatFloat2: + return 2; + case hipResViewFormatUnsignedChar4: + case hipResViewFormatSignedChar4: + case hipResViewFormatUnsignedShort4: + case hipResViewFormatSignedShort4: + case hipResViewFormatUnsignedInt4: + case hipResViewFormatSignedInt4: + case hipResViewFormatHalf4: + case hipResViewFormatFloat4: + return 4; + default: + break; + } + + //error scenario + return {}; +} + +inline +hipArray_Format getArrayFormat(const hipResourceViewFormat hipFormat) { + switch (hipFormat) { + case hipResViewFormatUnsignedChar1: + case hipResViewFormatUnsignedChar2: + case hipResViewFormatUnsignedChar4: + return HIP_AD_FORMAT_UNSIGNED_INT8; + case hipResViewFormatSignedChar1: + case hipResViewFormatSignedChar2: + case hipResViewFormatSignedChar4: + return HIP_AD_FORMAT_SIGNED_INT8; + case hipResViewFormatUnsignedShort1: + case hipResViewFormatUnsignedShort2: + case hipResViewFormatUnsignedShort4: + return HIP_AD_FORMAT_UNSIGNED_INT16; + case hipResViewFormatSignedShort1: + case hipResViewFormatSignedShort2: + case hipResViewFormatSignedShort4: + return HIP_AD_FORMAT_SIGNED_INT16; + case hipResViewFormatUnsignedInt1: + case hipResViewFormatUnsignedInt2: + case hipResViewFormatUnsignedInt4: + return HIP_AD_FORMAT_UNSIGNED_INT32; + case hipResViewFormatSignedInt1: + case hipResViewFormatSignedInt2: + case hipResViewFormatSignedInt4: + return HIP_AD_FORMAT_SIGNED_INT32; + case hipResViewFormatHalf1: + case hipResViewFormatHalf2: + case hipResViewFormatHalf4: + return HIP_AD_FORMAT_HALF; + case hipResViewFormatFloat1: + case hipResViewFormatFloat2: + case hipResViewFormatFloat4: + return HIP_AD_FORMAT_FLOAT; + default: + break; + } + + //error scenario + return {}; +} + +inline +hipResourceViewFormat getResourceViewFormat(const hipChannelFormatDesc& desc) { + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar1; + case 16: + return hipResViewFormatUnsignedShort1; + case 32: + return hipResViewFormatUnsignedInt1; + } + case 2: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar2; + case 16: + return hipResViewFormatUnsignedShort2; + case 32: + return hipResViewFormatUnsignedInt2; + } + case 4: + switch (desc.x) { + case 8: + return hipResViewFormatUnsignedChar4; + case 16: + return hipResViewFormatUnsignedShort4; + case 32: + return hipResViewFormatUnsignedInt4; + } + } + case hipChannelFormatKindSigned: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar1; + case 16: + return hipResViewFormatSignedShort1; + case 32: + return hipResViewFormatSignedInt1; + } + case 2: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar2; + case 16: + return hipResViewFormatSignedShort2; + case 32: + return hipResViewFormatSignedInt2; + } + case 4: + switch (desc.x) { + case 8: + return hipResViewFormatSignedChar4; + case 16: + return hipResViewFormatSignedShort4; + case 32: + return hipResViewFormatSignedInt4; + } + } + case hipChannelFormatKindFloat: + switch (getNumChannels(desc)) { + case 1: + switch (desc.x) { + case 16: + return hipResViewFormatHalf1; + case 32: + return hipResViewFormatFloat1; + } + case 2: + switch (desc.x) { + case 16: + return hipResViewFormatHalf2; + case 32: + return hipResViewFormatFloat2; + } + case 4: + switch (desc.x) { + case 16: + return hipResViewFormatHalf4; + case 32: + return hipResViewFormatFloat4; + } + } + default: + break; + } + + //error scenario + return {}; +} + +inline +hipTextureDesc getTextureDesc(const textureReference* texRef) { + hipTextureDesc texDesc = {}; + std::memcpy(texDesc.addressMode, texRef->addressMode, sizeof(texDesc.addressMode)); + texDesc.filterMode = texRef->filterMode; + texDesc.readMode = texRef->readMode; + texDesc.sRGB = texRef->sRGB; + texDesc.normalizedCoords = texRef->normalized; + texDesc.maxAnisotropy = texRef->maxAnisotropy; + texDesc.mipmapFilterMode = texRef->mipmapFilterMode; + texDesc.mipmapLevelBias = texRef->mipmapLevelBias; + texDesc.minMipmapLevelClamp = texRef->minMipmapLevelClamp; + texDesc.maxMipmapLevelClamp = texRef->maxMipmapLevelClamp; + + return texDesc; +} + +inline +hipResourceViewDesc getResourceViewDesc(hipArray_const_t array, + const hipResourceViewFormat format) { + hipResourceViewDesc resViewDesc = {}; + resViewDesc.format = format; + resViewDesc.width = array->width; + resViewDesc.height = array->height; + resViewDesc.depth = array->depth; + resViewDesc.firstMipmapLevel = 0; + resViewDesc.lastMipmapLevel = 0; + resViewDesc.firstLayer = 0; + resViewDesc.lastLayer = 0; /* TODO add hipArray::numLayers */ + + return resViewDesc; +} + +inline +hipResourceViewDesc getResourceViewDesc(hipMipmappedArray_const_t array, + const hipResourceViewFormat format) { + hipResourceViewDesc resViewDesc = {}; + resViewDesc.format = format; + resViewDesc.width = array->width; + resViewDesc.height = array->height; + resViewDesc.depth = array->depth; + resViewDesc.firstMipmapLevel = 0; + resViewDesc.lastMipmapLevel = 0; /* TODO add hipMipmappedArray::numMipLevels */ + resViewDesc.firstLayer = 0; + resViewDesc.lastLayer = 0; /* TODO add hipArray::numLayers */ + + return resViewDesc; +} + +inline +std::pair getMemoryType(const hipMemcpyKind kind) { + switch (kind) { + case hipMemcpyHostToHost: + return {hipMemoryTypeHost, hipMemoryTypeHost}; + case hipMemcpyHostToDevice: + return {hipMemoryTypeHost, hipMemoryTypeDevice}; + case hipMemcpyDeviceToHost: + return {hipMemoryTypeDevice, hipMemoryTypeHost}; + case hipMemcpyDeviceToDevice: + return {hipMemoryTypeDevice, hipMemoryTypeDevice}; + case hipMemcpyDefault: + return {hipMemoryTypeUnified, hipMemoryTypeUnified}; + } + + //error scenario + return {}; +} + +inline +HIP_MEMCPY3D getDrvMemcpy3DDesc(const hip_Memcpy2D& desc2D) { + HIP_MEMCPY3D desc3D = {}; + + desc3D.srcXInBytes = desc2D.srcXInBytes; + desc3D.srcY = desc2D.srcY; + desc3D.srcZ = 0; + desc3D.srcLOD = 0; + desc3D.srcMemoryType = desc2D.srcMemoryType; + desc3D.srcHost = desc2D.srcHost; + desc3D.srcDevice = desc2D.srcDevice; + desc3D.srcArray = desc2D.srcArray; + desc3D.srcPitch = desc2D.srcPitch; + desc3D.srcHeight = 0; + + desc3D.dstXInBytes = desc2D.dstXInBytes; + desc3D.dstY = desc2D.dstY; + desc3D.dstZ = 0; + desc3D.dstLOD = 0; + desc3D.dstMemoryType = desc2D.dstMemoryType; + desc3D.dstHost = desc2D.dstHost; + desc3D.dstDevice = desc2D.dstDevice; + desc3D.dstArray = desc2D.dstArray; + desc3D.dstPitch = desc2D.dstPitch; + desc3D.dstHeight = 0; + + desc3D.WidthInBytes = desc2D.WidthInBytes; + desc3D.Height = desc2D.Height; + desc3D.Depth = 1; + + return desc3D; +} + +inline +HIP_MEMCPY3D getDrvMemcpy3DDesc(const hipMemcpy3DParms& desc) { + HIP_MEMCPY3D descDrv = {}; + + descDrv.WidthInBytes = desc.extent.width; + descDrv.Height = desc.extent.height; + descDrv.Depth = desc.extent.depth; + + descDrv.srcXInBytes = desc.srcPos.x; + descDrv.srcY = desc.srcPos.y; + descDrv.srcZ = desc.srcPos.z; + descDrv.srcLOD = 0; + + descDrv.dstXInBytes = desc.dstPos.x; + descDrv.dstY = desc.dstPos.y; + descDrv.dstZ = desc.dstPos.z; + descDrv.dstLOD = 0; + + if (desc.srcArray != nullptr) { + descDrv.srcMemoryType = hipMemoryTypeArray; + descDrv.srcArray = desc.srcArray; + // When reffering to array memory, hipPos::x is in elements. + descDrv.srcXInBytes *= getElementSize(desc.srcArray); + } + + if (desc.srcPtr.ptr != nullptr) { + descDrv.srcMemoryType = std::get<0>(hip::getMemoryType(desc.kind)); + descDrv.srcHost = desc.srcPtr.ptr; + descDrv.srcDevice = desc.srcPtr.ptr; + descDrv.srcPitch = desc.srcPtr.pitch; + descDrv.srcHeight = desc.srcPtr.ysize; + } + + if (desc.dstArray != nullptr) { + descDrv.dstMemoryType = hipMemoryTypeArray; + descDrv.dstArray = desc.dstArray; + // When reffering to array memory, hipPos::x is in elements. + descDrv.dstXInBytes *= getElementSize(desc.dstArray); + } + + if (desc.dstPtr.ptr != nullptr) { + descDrv.dstMemoryType = std::get<1>(getMemoryType(desc.kind)); + descDrv.dstHost = desc.dstPtr.ptr; + descDrv.dstDevice = desc.dstPtr.ptr; + descDrv.dstPitch = desc.dstPtr.pitch; + descDrv.dstHeight = desc.dstPtr.ysize; + } + + // If a HIP array is participating in the copy, the extent is defined in terms of that array's elements. + if ((desc.srcArray != nullptr) && (desc.dstArray == nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.srcArray); + } else if ((desc.srcArray == nullptr) && (desc.dstArray != nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.dstArray); + } else if ((desc.srcArray != nullptr) && (desc.dstArray != nullptr)) { + descDrv.WidthInBytes *= getElementSize(desc.dstArray); + } + + return descDrv; +} + +inline +hipResourceType getResourceType(const HIPresourcetype resType) { + // These two enums should be isomorphic. + return static_cast(resType); +} + +inline +HIPresourcetype getResourceType(const hipResourceType resType) { + // These two enums should be isomorphic. + return static_cast(resType); +} + +inline +hipResourceDesc getResourceDesc(const HIP_RESOURCE_DESC& resDesc) { + hipResourceDesc desc; + + desc.resType = getResourceType(resDesc.resType); + switch (desc.resType) { + case hipResourceTypeArray: + desc.res.array.array = resDesc.res.array.hArray; + break; + case hipResourceTypeMipmappedArray: + desc.res.mipmap.mipmap = resDesc.res.mipmap.hMipmappedArray; + break; + case hipResourceTypeLinear: + desc.res.linear.devPtr = resDesc.res.linear.devPtr; + desc.res.linear.desc = getChannelFormatDesc(resDesc.res.linear.numChannels, resDesc.res.linear.format); + desc.res.linear.sizeInBytes = resDesc.res.linear.sizeInBytes; + break; + case hipResourceTypePitch2D: + desc.res.pitch2D.devPtr = resDesc.res.pitch2D.devPtr; + desc.res.pitch2D.desc = getChannelFormatDesc(resDesc.res.pitch2D.numChannels, resDesc.res.pitch2D.format); + desc.res.pitch2D.width = resDesc.res.pitch2D.width; + desc.res.pitch2D.height = resDesc.res.pitch2D.height; + desc.res.pitch2D.pitchInBytes = resDesc.res.pitch2D.pitchInBytes; + break; + default: + break; + } + + return desc; +} + +inline +HIP_RESOURCE_DESC getResourceDesc(const hipResourceDesc& resDesc) { + HIP_RESOURCE_DESC desc; + + desc.resType = getResourceType(resDesc.resType); + switch (desc.resType) { + case HIP_RESOURCE_TYPE_ARRAY: + desc.res.array.hArray = resDesc.res.array.array; + break; + case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY: + desc.res.mipmap.hMipmappedArray = resDesc.res.mipmap.mipmap; + break; + case HIP_RESOURCE_TYPE_LINEAR: + desc.res.linear.devPtr = resDesc.res.linear.devPtr; + desc.res.linear.numChannels = getNumChannels(resDesc.res.linear.desc); + desc.res.linear.format = getArrayFormat(resDesc.res.linear.desc); + desc.res.linear.sizeInBytes = resDesc.res.linear.sizeInBytes; + break; + case HIP_RESOURCE_TYPE_PITCH2D: + desc.res.pitch2D.devPtr = resDesc.res.pitch2D.devPtr; + desc.res.pitch2D.numChannels = getNumChannels(resDesc.res.pitch2D.desc); + desc.res.pitch2D.format = getArrayFormat(resDesc.res.pitch2D.desc); + desc.res.pitch2D.width = resDesc.res.pitch2D.width; + desc.res.pitch2D.height = resDesc.res.pitch2D.height; + desc.res.pitch2D.pitchInBytes = resDesc.res.pitch2D.pitchInBytes; + break; + default: + break; + } + + return desc; +} + +inline +hipTextureAddressMode getAddressMode(const HIPaddress_mode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +HIPaddress_mode getAddressMode(const hipTextureAddressMode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +hipTextureFilterMode getFilterMode(const HIPfilter_mode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +HIPfilter_mode getFilterMode(const hipTextureFilterMode mode) { + // These two enums should be isomorphic. + return static_cast(mode); +} + +inline +hipTextureReadMode getReadMode(const unsigned int flags) { + if (flags & HIP_TRSF_READ_AS_INTEGER) { + return hipReadModeElementType; + } else { + return hipReadModeNormalizedFloat; + } +} + +inline +unsigned int getReadMode(const hipTextureReadMode mode) { + if (mode == hipReadModeElementType) { + return HIP_TRSF_READ_AS_INTEGER; + } else { + return 0; + } +} + +inline +int getsRGB(const unsigned int flags) { + if (flags & HIP_TRSF_SRGB) { + return 1; + } else { + return 0; + } +} + +inline +unsigned int getsRGB(const int sRGB) { + if (sRGB == 1) { + return HIP_TRSF_SRGB; + } else { + return 0; + } +} + +inline +int getNormalizedCoords(const unsigned int flags) { + if (flags & HIP_TRSF_NORMALIZED_COORDINATES) { + return 1; + } else { + return 0; + } +} + +inline +unsigned int getNormalizedCoords(const int normalizedCoords) { + if (normalizedCoords == 1) { + return HIP_TRSF_NORMALIZED_COORDINATES; + } else { + return 0; + } +} + +inline +hipTextureDesc getTextureDesc(const HIP_TEXTURE_DESC& texDesc) { + hipTextureDesc desc; + + desc.addressMode[0] = getAddressMode(texDesc.addressMode[0]); + desc.addressMode[1] = getAddressMode(texDesc.addressMode[1]); + desc.addressMode[2] = getAddressMode(texDesc.addressMode[2]); + desc.filterMode = getFilterMode(texDesc.filterMode); + desc.readMode = getReadMode(texDesc.flags); + desc.sRGB = getsRGB(texDesc.flags); + std::memcpy(desc.borderColor, texDesc.borderColor, sizeof(desc.borderColor)); + desc.normalizedCoords = getNormalizedCoords(texDesc.flags); + desc.maxAnisotropy = texDesc.maxAnisotropy; + desc.mipmapFilterMode = getFilterMode(texDesc.mipmapFilterMode); + desc.mipmapLevelBias = texDesc.mipmapLevelBias; + desc.minMipmapLevelClamp = texDesc.minMipmapLevelClamp; + desc.maxMipmapLevelClamp = texDesc.maxMipmapLevelClamp; + + return desc; +} + +inline +HIP_TEXTURE_DESC getTextureDesc(const hipTextureDesc& texDesc) { + HIP_TEXTURE_DESC desc; + + desc.addressMode[0] = getAddressMode(texDesc.addressMode[0]); + desc.addressMode[1] = getAddressMode(texDesc.addressMode[1]); + desc.addressMode[2] = getAddressMode(texDesc.addressMode[2]); + desc.filterMode = getFilterMode(texDesc.filterMode); + desc.flags = 0; + desc.flags |= getReadMode(texDesc.readMode); + desc.flags |= getsRGB(texDesc.sRGB); + desc.flags |= getNormalizedCoords(texDesc.normalizedCoords); + desc.maxAnisotropy = texDesc.maxAnisotropy; + desc.mipmapFilterMode = getFilterMode(texDesc.mipmapFilterMode); + desc.mipmapLevelBias = texDesc.mipmapLevelBias; + desc.minMipmapLevelClamp = texDesc.minMipmapLevelClamp; + desc.maxMipmapLevelClamp = texDesc.maxMipmapLevelClamp; + std::memcpy(desc.borderColor, texDesc.borderColor, sizeof(desc.borderColor)); + + return desc; +} + +inline +hipResourceViewFormat getResourceViewFormat(const HIPresourceViewFormat format) { + // These two enums should be isomorphic. + return static_cast(format); +} + +inline +HIPresourceViewFormat getResourceViewFormat(const hipResourceViewFormat format) { + // These two enums should be isomorphic. + return static_cast(format); +} + +inline +hipResourceViewDesc getResourceViewDesc(const HIP_RESOURCE_VIEW_DESC& resViewDesc) { + hipResourceViewDesc desc; + + desc.format = getResourceViewFormat(resViewDesc.format); + desc.width = resViewDesc.width; + desc.height = resViewDesc.height; + desc.depth = resViewDesc.depth; + desc.firstMipmapLevel = resViewDesc.firstMipmapLevel; + desc.lastMipmapLevel = resViewDesc.lastMipmapLevel; + desc.firstLayer = resViewDesc.firstLayer; + desc.lastLayer = resViewDesc.lastLayer; + + return desc; +} + +inline +HIP_RESOURCE_VIEW_DESC getResourceViewDesc(const hipResourceViewDesc& resViewDesc) { + HIP_RESOURCE_VIEW_DESC desc; + + desc.format = getResourceViewFormat(resViewDesc.format); + desc.width = resViewDesc.width; + desc.height = resViewDesc.height; + desc.depth = resViewDesc.depth; + desc.firstMipmapLevel = resViewDesc.firstMipmapLevel; + desc.lastMipmapLevel = resViewDesc.lastMipmapLevel; + desc.firstLayer = resViewDesc.firstLayer; + desc.lastLayer = resViewDesc.lastLayer; + + return desc; +} + +inline +size_t getElementSize(const hipChannelFormatDesc &desc) { + return (desc.x / 8) * getNumChannels(desc); +} +}; diff --git a/projects/clr/hipamd/src/hip_device.cpp b/projects/clr/hipamd/src/hip_device.cpp new file mode 100644 index 0000000000..8782b6c35e --- /dev/null +++ b/projects/clr/hipamd/src/hip_device.cpp @@ -0,0 +1,382 @@ +/* Copyright (c) 2018 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" +#include "hip_mempool_impl.hpp" + +namespace hip { + +// ================================================================================================ +hip::Stream* Device::NullStream(bool skip_alloc) { + if (null_stream_ == nullptr && !skip_alloc) { + null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true); + } + + if (null_stream_ == nullptr) { + return nullptr; + } + // Wait for all active streams before executing commands on the default + iHipWaitActiveStreams(null_stream_); + return null_stream_; +} + +// ================================================================================================ +bool Device::Create() { + // Create default memory pool + default_mem_pool_ = new MemoryPool(this); + if (default_mem_pool_ == nullptr) { + return false; + } + + // Create graph memory pool + graph_mem_pool_ = new MemoryPool(this); + if (graph_mem_pool_ == nullptr) { + return false; + } + + uint64_t max_size = std::numeric_limits::max(); + // Use maximum value to hold memory, because current implementation doesn't support VM + // Note: the call for the threshold is always successful + auto error = graph_mem_pool_->SetAttribute(hipMemPoolAttrReleaseThreshold, &max_size); + + // Current is default pool after device creation + current_mem_pool_ = default_mem_pool_; + return true; +} + +// ================================================================================================ +void Device::AddMemoryPool(MemoryPool* pool) { + amd::ScopedLock lock(lock_); + if (auto it = mem_pools_.find(pool); it == mem_pools_.end()) { + mem_pools_.insert(pool); + } +} + +// ================================================================================================ +void Device::RemoveMemoryPool(MemoryPool* pool) { + amd::ScopedLock lock(lock_); + if (auto it = mem_pools_.find(pool); it != mem_pools_.end()) { + mem_pools_.erase(it); + } +} + +// ================================================================================================ +bool Device::FreeMemory(amd::Memory* memory, Stream* stream) { + amd::ScopedLock lock(lock_); + // Search for memory in the entire list of pools + for (auto it : mem_pools_) { + if (it->FreeMemory(memory, stream)) { + return true; + } + } + return false; +} + +// ================================================================================================ +void Device::ReleaseFreedMemory(Stream* stream) { + amd::ScopedLock lock(lock_); + // Search for memory in the entire list of pools + for (auto it : mem_pools_) { + it->ReleaseFreedMemory(stream); + } +} + +// ================================================================================================ +void Device::RemoveStreamFromPools(Stream* stream) { + amd::ScopedLock lock(lock_); + // Update all pools with the destroyed stream + for (auto it : mem_pools_) { + it->RemoveStream(stream); + } +} + +// ================================================================================================ +void Device::Reset() { + { + amd::ScopedLock lock(lock_); + auto it = mem_pools_.begin(); + while (it != mem_pools_.end()) { + auto current = it++; + (*current)->ReleaseAllMemory(); + delete *current; + } + mem_pools_.clear(); + } + flags_ = hipDeviceScheduleSpin; + hip::Stream::destroyAllStreams(deviceId_); + amd::MemObjMap::Purge(devices()[0]); + Create(); +} + +// ================================================================================================ +Device::~Device() { + if (default_mem_pool_ != nullptr) { + default_mem_pool_->release(); + } + + if (graph_mem_pool_ != nullptr) { + graph_mem_pool_->release(); + } + + if (null_stream_!= nullptr) { + null_stream_->release(); + } +} + +} + +void ihipDestroyDevice() { + for (auto deviceHandle : g_devices) { + delete deviceHandle; + } +} + +hipError_t ihipDeviceGet(hipDevice_t* device, int deviceId) { + if (device == nullptr) { + return hipErrorInvalidValue; + } + + if (deviceId < 0 || static_cast(deviceId) >= g_devices.size()) { + return hipErrorInvalidDevice; + } + + *device = deviceId; + return hipSuccess; +} + +hipError_t hipDeviceGet(hipDevice_t* device, int deviceId) { + HIP_INIT_API(hipDeviceGet, device, deviceId); + + HIP_RETURN(ihipDeviceGet(device, deviceId)); +} + +hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { + + HIP_INIT_API(hipDeviceTotalMem, bytes, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (bytes == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + + *bytes = info.globalMemSize_; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { + + HIP_INIT_API(hipDeviceComputeCapability, major, minor, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (major == nullptr || minor == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& isa = deviceHandle->isa(); + *major = isa.versionMajor(); + *minor = isa.versionMinor(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCount(int* count) { + HIP_INIT_API(hipDeviceGetCount, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t ihipDeviceGetCount(int* count) { + if (count == nullptr) { + return hipErrorInvalidValue; + } + + // Get all available devices + *count = g_devices.size(); + + if (*count < 1) { + return hipErrorNoDevice; + } + + return hipSuccess; +} + +hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { + + HIP_INIT_API(hipDeviceGetName, (void*)name, len, device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (name == nullptr || len <= 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + const auto nameLen = ::strlen(info.boardName_); + + // Only copy partial name if size of `dest` is smaller than size of `src` including + // trailing zero byte + auto memcpySize = (len <= (nameLen + 1) ? (len - 1) : nameLen); + ::memcpy(name, info.boardName_, memcpySize); + name[memcpySize] = '\0'; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device) { + HIP_INIT_API(hipDeviceGetUuid, reinterpret_cast(uuid), device); + + if (device < 0 || static_cast(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (uuid == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto* deviceHandle = g_devices[device]->devices()[0]; + const auto& info = deviceHandle->info(); + + ::strncpy(uuid->bytes, info.uuid_, 16); + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) { + if (props == nullptr) { + return hipErrorInvalidValue; + } + + if (unsigned(device) >= g_devices.size()) { + return hipErrorInvalidDevice; + } + auto* deviceHandle = g_devices[device]->devices()[0]; + + constexpr auto int32_max = static_cast(std::numeric_limits::max()); + hipDeviceProp_t deviceProps = {0}; + + const auto& info = deviceHandle->info(); + const auto& isa = deviceHandle->isa(); + ::strncpy(deviceProps.name, info.boardName_, 128); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.regsPerBlock = info.availableRegistersPerCU_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = int32_max; + deviceProps.maxGridSize[1] = int32_max; + deviceProps.maxGridSize[2] = int32_max; + deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000; + deviceProps.memoryBusWidth = info.globalMemChannels_; + deviceProps.totalConstMem = std::min(info.maxConstantBufferSize_, int32_max); + deviceProps.major = isa.versionMajor(); + deviceProps.minor = isa.versionMinor(); + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_; + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 1; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 1; + deviceProps.arch.hasWarpBallot = 1; + deviceProps.arch.hasWarpShuffle = 1; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.pciDomainID; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + deviceProps.canMapHostMemory = 1; + // FIXME: This should be removed, targets can have character names as well. + deviceProps.gcnArch = isa.versionMajor() * 100 + isa.versionMinor() * 10 + isa.versionStepping(); + sprintf(deviceProps.gcnArchName, "%s", isa.targetId()); + deviceProps.cooperativeLaunch = info.cooperativeGroups_; + deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_; + + deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_; + + deviceProps.maxTexture1DLinear = std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes + deviceProps.maxTexture1D = std::min(info.image1DMaxWidth_, int32_max); + deviceProps.maxTexture2D[0] = std::min(info.image2DMaxWidth_, int32_max); + deviceProps.maxTexture2D[1] = std::min(info.image2DMaxHeight_, int32_max); + deviceProps.maxTexture3D[0] = std::min(info.image3DMaxWidth_, int32_max); + deviceProps.maxTexture3D[1] = std::min(info.image3DMaxHeight_, int32_max); + deviceProps.maxTexture3D[2] = std::min(info.image3DMaxDepth_, int32_max); + deviceProps.hdpMemFlushCntl = info.hdpMemFlushCntl; + deviceProps.hdpRegFlushCntl = info.hdpRegFlushCntl; + + deviceProps.memPitch = std::min(info.maxMemAllocSize_, int32_max); + deviceProps.textureAlignment = info.imageBaseAddressAlignment_; + deviceProps.texturePitchAlignment = info.imagePitchAlignment_; + deviceProps.kernelExecTimeoutEnabled = 0; + deviceProps.ECCEnabled = info.errorCorrectionSupport_ ? 1 : 0; + deviceProps.isLargeBar = info.largeBar_ ? 1 : 0; + deviceProps.asicRevision = info.asicRevision_; + + // HMM capabilities + deviceProps.managedMemory = info.hmmSupported_; + deviceProps.concurrentManagedAccess = info.hmmSupported_; + deviceProps.directManagedMemAccessFromHost = info.hmmDirectHostAccess_; + deviceProps.pageableMemoryAccess = info.hmmCpuMemoryAccessible_; + deviceProps.pageableMemoryAccessUsesHostPageTables = info.hostUnifiedMemory_; + + *props = deviceProps; + return hipSuccess; +} + +hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) { + HIP_INIT_API(hipGetDeviceProperties, props, device); + + HIP_RETURN(ihipGetDeviceProperties(props, device)); +} diff --git a/projects/clr/hipamd/src/hip_device_runtime.cpp b/projects/clr/hipamd/src/hip_device_runtime.cpp new file mode 100644 index 0000000000..19bed5ef83 --- /dev/null +++ b/projects/clr/hipamd/src/hip_device_runtime.cpp @@ -0,0 +1,632 @@ +/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { + + HIP_INIT_API(hipChooseDevice, device, properties); + + if (device == nullptr || properties == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *device = 0; + cl_uint maxMatchedCount = 0; + int count = 0; + HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count)); + + for (cl_int i = 0; i< count; ++i) { + hipDeviceProp_t currentProp = {0}; + cl_uint validPropCount = 0; + cl_uint matchedCount = 0; + hipError_t err = ihipGetDeviceProperties(¤tProp, i); + if (properties->major != 0) { + validPropCount++; + if(currentProp.major >= properties->major) { + matchedCount++; + } + } + if (properties->minor != 0) { + validPropCount++; + if(currentProp.minor >= properties->minor) { + matchedCount++; + } + } + if(properties->totalGlobalMem != 0) { + validPropCount++; + if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { + matchedCount++; + } + } + if(properties->sharedMemPerBlock != 0) { + validPropCount++; + if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { + matchedCount++; + } + } + if(properties->maxThreadsPerBlock != 0) { + validPropCount++; + if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { + matchedCount++; + } + } + if(properties->totalConstMem != 0) { + validPropCount++; + if(currentProp.totalConstMem >= properties->totalConstMem ) { + matchedCount++; + } + } + if(properties->multiProcessorCount != 0) { + validPropCount++; + if(currentProp.multiProcessorCount >= + properties->multiProcessorCount ) { + matchedCount++; + } + } + if(properties->maxThreadsPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxThreadsPerMultiProcessor >= + properties->maxThreadsPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->memoryClockRate != 0) { + validPropCount++; + if(currentProp.memoryClockRate >= properties->memoryClockRate ) { + matchedCount++; + } + } + if(properties->memoryBusWidth != 0) { + validPropCount++; + if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { + matchedCount++; + } + } + if(properties->l2CacheSize != 0) { + validPropCount++; + if(currentProp.l2CacheSize >= properties->l2CacheSize ) { + matchedCount++; + } + } + if(properties->regsPerBlock != 0) { + validPropCount++; + if(currentProp.regsPerBlock >= properties->regsPerBlock ) { + matchedCount++; + } + } + if(properties->maxSharedMemoryPerMultiProcessor != 0) { + validPropCount++; + if(currentProp.maxSharedMemoryPerMultiProcessor >= + properties->maxSharedMemoryPerMultiProcessor ) { + matchedCount++; + } + } + if(properties->warpSize != 0) { + validPropCount++; + if(currentProp.warpSize >= properties->warpSize ) { + matchedCount++; + } + } + if(validPropCount == matchedCount) { + *device = matchedCount > maxMatchedCount ? i : *device; + maxMatchedCount = std::max(matchedCount, maxMatchedCount); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { + + HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device); + + if (pi == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int count = 0; + HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count)); + + if (device < 0 || device >= count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + //FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_t prop = {0}; + HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device)); + + constexpr auto int32_max = static_cast(std::numeric_limits::max()); + + switch (attr) { + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop.maxThreadsPerBlock; + break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop.maxThreadsDim[0]; + break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop.maxThreadsDim[1]; + break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop.maxThreadsDim[2]; + break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop.maxGridSize[0]; + break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop.maxGridSize[1]; + break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop.maxGridSize[2]; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop.sharedMemPerBlock; + break; + case hipDeviceAttributeTotalConstantMemory: + // size_t to int casting + *pi = std::min(prop.totalConstMem, int32_max); + break; + case hipDeviceAttributeWarpSize: + *pi = prop.warpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop.regsPerBlock; + break; + case hipDeviceAttributeClockRate: + *pi = prop.clockRate; + break; + case hipDeviceAttributeWallClockRate: + *pi = g_devices[device]->devices()[0]->info().wallClockFrequency_; + break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop.memoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop.memoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop.multiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + *pi = prop.computeMode; + break; + case hipDeviceAttributeL2CacheSize: + *pi = prop.l2CacheSize; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop.maxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop.major; + break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop.minor; + break; + case hipDeviceAttributePciBusId: + *pi = prop.pciBusID; + break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop.concurrentKernels; + break; + case hipDeviceAttributePciDeviceId: + *pi = prop.pciDeviceID; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop.maxSharedMemoryPerMultiProcessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop.isMultiGpuBoard; + break; + case hipDeviceAttributeCooperativeLaunch: + *pi = prop.cooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + *pi = prop.cooperativeMultiDeviceLaunch; + break; + case hipDeviceAttributeIntegrated: + *pi = prop.integrated; + break; + case hipDeviceAttributeMaxTexture1DWidth: + *pi = prop.maxTexture1D; + break; + case hipDeviceAttributeMaxTexture2DWidth: + *pi = prop.maxTexture2D[0]; + break; + case hipDeviceAttributeMaxTexture2DHeight: + *pi = prop.maxTexture2D[1]; + break; + case hipDeviceAttributeMaxTexture3DWidth: + *pi = prop.maxTexture3D[0]; + break; + case hipDeviceAttributeMaxTexture3DHeight: + *pi = prop.maxTexture3D[1]; + break; + case hipDeviceAttributeMaxTexture3DDepth: + *pi = prop.maxTexture3D[2]; + break; + case hipDeviceAttributeHdpMemFlushCntl: + *reinterpret_cast(pi) = prop.hdpMemFlushCntl; + break; + case hipDeviceAttributeHdpRegFlushCntl: + *reinterpret_cast(pi) = prop.hdpRegFlushCntl; + break; + case hipDeviceAttributeMaxPitch: + // size_t to int casting + *pi = std::min(prop.memPitch, int32_max); + break; + case hipDeviceAttributeTextureAlignment: + *pi = prop.textureAlignment; + break; + case hipDeviceAttributeTexturePitchAlignment: + *pi = prop.texturePitchAlignment; + break; + case hipDeviceAttributeKernelExecTimeout: + *pi = prop.kernelExecTimeoutEnabled; + break; + case hipDeviceAttributeCanMapHostMemory: + *pi = prop.canMapHostMemory; + break; + case hipDeviceAttributeEccEnabled: + *pi = prop.ECCEnabled; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: + *pi = prop.cooperativeMultiDeviceUnmatchedFunc; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: + *pi = prop.cooperativeMultiDeviceUnmatchedGridDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: + *pi = prop.cooperativeMultiDeviceUnmatchedBlockDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: + *pi = prop.cooperativeMultiDeviceUnmatchedSharedMem; + break; + case hipDeviceAttributeAsicRevision: + *pi = prop.asicRevision; + break; + case hipDeviceAttributeManagedMemory: + *pi = prop.managedMemory; + break; + case hipDeviceAttributeDirectManagedMemAccessFromHost: + *pi = prop.directManagedMemAccessFromHost; + break; + case hipDeviceAttributeConcurrentManagedAccess: + *pi = prop.concurrentManagedAccess; + break; + case hipDeviceAttributePageableMemoryAccess: + *pi = prop.pageableMemoryAccess; + break; + case hipDeviceAttributePageableMemoryAccessUsesHostPageTables: + *pi = prop.pageableMemoryAccessUsesHostPageTables; + break; + case hipDeviceAttributeUnifiedAddressing: + // HIP runtime always uses SVM for host memory allocations. + // Note: Host registered memory isn't covered by this feature + // and still requires hipMemHostGetDevicePointer() call + *pi = true; + break; + case hipDeviceAttributeCanUseStreamWaitValue: + // hipStreamWaitValue64() and hipStreamWaitValue32() support + *pi = g_devices[device]->devices()[0]->info().aqlBarrierValue_; + break; + case hipDeviceAttributeImageSupport: + *pi = static_cast(g_devices[device]->devices()[0]->info().imageSupport_); + break; + case hipDeviceAttributePhysicalMultiProcessorCount: + *pi = g_devices[device]->devices()[0]->info().maxPhysicalComputeUnits_; + break; + case hipDeviceAttributeFineGrainSupport: + *pi = static_cast(g_devices[device]->devices()[0]->isFineGrainSupported()); + break; + case hipDeviceAttributeMemoryPoolsSupported: + *pi = HIP_MEM_POOL_SUPPORT; + break; + case hipDeviceAttributeVirtualMemoryManagementSupported: + *pi = static_cast(g_devices[device]->devices()[0]->info().virtualMemoryManagement_); + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { + + HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr); + + if (device == nullptr || pciBusIdstr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + bool found = false; + if (sscanf (pciBusIdstr, "%04x:%02x:%02x", reinterpret_cast(&pciDomainID), + reinterpret_cast(&pciBusID), + reinterpret_cast(&pciDeviceID)) == 0x3) { + int count = 0; + HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count)); + for (cl_int i = 0; i < count; i++) { + hipDevice_t dev; + hipDeviceProp_t prop; + HIP_RETURN_ONFAIL(ihipDeviceGet(&dev, i)); + HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, dev)); + + if ((pciBusID == prop.pciBusID) && (pciDomainID == prop.pciDomainID) + && (pciDeviceID == prop.pciDeviceID)) { + *device = i; + found = true; + break; + } + } + } + if (!found) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { + HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig); + + if(cacheConfig == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *cacheConfig = hipFuncCache_t(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { + + HIP_INIT_API(hipDeviceGetLimit, pValue, limit); + + if (pValue == nullptr || limit >= hipLimitRange) { + HIP_RETURN(hipErrorInvalidValue); + } + + switch (limit) { + case hipLimitMallocHeapSize: + hipDeviceProp_t prop; + HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, ihipGetDevice())); + *pValue = prop.totalGlobalMem; + break; + case hipLimitStackSize: + *pValue = hip::getCurrentDevice()->devices()[0]->StackSize(); + break; + default: + LogPrintfError("UnsupportedLimit = %d is passed", limit); + HIP_RETURN(hipErrorUnsupportedLimit); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { + + HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device); + + int count; + HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count)); + + if (device < 0 || device >= count) { + HIP_RETURN(hipErrorInvalidDevice); + } + + //pciBusId should be large enough to store 13 characters including the NULL-terminator. + if (pciBusId == nullptr || len <= 12) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceProp_t prop; + HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device)); + snprintf (pciBusId, len, "%04x:%02x:%02x.0", + prop.pciDomainID, + prop.pciBusID, + prop.pciDeviceID); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { + HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig); + if (pConfig == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *pConfig = hipSharedMemBankSizeFourByte; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceReset ( void ) { + HIP_INIT_API(hipDeviceReset); + + hip::getCurrentDevice()->Reset(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { + HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { + HIP_INIT_API(hipDeviceSetLimit, limit, value); + if (limit >= hipLimitRange) { + HIP_RETURN(hipErrorInvalidValue); + } + switch(limit) { + case hipLimitStackSize : + // need to query device size and take action + if (!hip::getCurrentDevice()->devices()[0]->UpdateStackSize(value)) { + HIP_RETURN(hipErrorInvalidValue); + } + break; + case hipLimitMallocHeapSize: + if (!hip::getCurrentDevice()->devices()[0]->UpdateInitialHeapSize(value)) { + HIP_RETURN(hipErrorInvalidValue); + } + break; + default: + LogPrintfError("UnsupportedLimit = %d is passed", limit); + HIP_RETURN(hipErrorUnsupportedLimit); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { + HIP_INIT_API(hipDeviceSetSharedMemConfig, config); + + // No way to set cache config yet. + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipDeviceSynchronize ( void ) { + HIP_INIT_API(hipDeviceSynchronize); + + hip::Stream* stream = hip::getNullStream(); + + if (!stream) { + HIP_RETURN(hipErrorOutOfMemory); + } + + if (hip::Stream::StreamCaptureOngoing() == true) { + HIP_RETURN(hipErrorStreamCaptureUnsupported); + } + + stream->finish(); + + hip::Stream::syncNonBlockingStreams(hip::getCurrentDevice()->deviceId()); + + HIP_RETURN(hipSuccess); +} + +int ihipGetDevice() { + hip::Device* device = hip::getCurrentDevice(); + if(device == nullptr){ + return -1; + } + return device->deviceId(); +} + +hipError_t hipGetDevice ( int* deviceId ) { + HIP_INIT_API(hipGetDevice, deviceId); + + if (deviceId != nullptr) { + int dev = ihipGetDevice(); + if (dev == -1) { + HIP_RETURN(hipErrorNoDevice); + } + *deviceId = dev; + HIP_RETURN(hipSuccess); + } else { + HIP_RETURN(hipErrorInvalidValue); + } +} + +hipError_t hipGetDeviceCount ( int* count ) { + HIP_INIT_API_NO_RETURN(hipGetDeviceCount, count); + + HIP_RETURN(ihipDeviceGetCount(count)); +} + +hipError_t hipGetDeviceFlags ( unsigned int* flags ) { + HIP_INIT_API(hipGetDeviceFlags, flags); + if (flags == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *flags = hip::getCurrentDevice()->getFlags(); + HIP_RETURN(hipSuccess); +} + +hipError_t hipSetDevice ( int device ) { + HIP_INIT_API(hipSetDevice, device); + + if (static_cast(device) < g_devices.size()) { + hip::setCurrentDevice(device); + + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorInvalidDevice); +} + +hipError_t hipSetDeviceFlags ( unsigned int flags ) { + HIP_INIT_API(hipSetDeviceFlags, flags); + + constexpr uint32_t supportedFlags = + hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax; + constexpr uint32_t mutualExclusiveFlags = + hipDeviceScheduleSpin | hipDeviceScheduleYield | hipDeviceScheduleBlockingSync; + // Only one scheduling flag allowed a time + uint32_t scheduleFlag = flags & hipDeviceScheduleMask; + + if (((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleSpin) && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleYield) + && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleBlockingSync) + && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleAuto)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (flags & ~supportedFlags) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + switch (scheduleFlag) { + case hipDeviceScheduleAuto: + // Current behavior is different from the spec, due to MT usage in runtime + if (hip::host_context->devices().size() >= std::thread::hardware_concurrency()) { + device->SetActiveWait(false); + break; + } + // Fall through for active wait... + case hipDeviceScheduleSpin: + case hipDeviceScheduleYield: + // The both options falls into yield, because MT usage in runtime + device->SetActiveWait(true); + break; + case hipDeviceScheduleBlockingSync: + device->SetActiveWait(false); + break; + default: + break; + } + hip::getCurrentDevice()->setFlags(flags & hipDeviceScheduleMask); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipSetValidDevices ( int* device_arr, int len ) { + HIP_INIT_API(hipSetValidDevices, device_arr, len); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/projects/clr/hipamd/src/hip_embed_pch.sh b/projects/clr/hipamd/src/hip_embed_pch.sh new file mode 100755 index 0000000000..0a1572b2d7 --- /dev/null +++ b/projects/clr/hipamd/src/hip_embed_pch.sh @@ -0,0 +1,209 @@ +#!/bin/bash +# Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +printUsage() { + echo + echo "Usage: $(basename "$0") HIP_BUILD_INC_DIR HIP_INC_DIR HIP_AMD_INC_DIR LLVM_DIR [option] [RTC_LIB_OUTPUT]" + echo + echo "Options:" + echo " -p, --generate_pch Generate pre-compiled header (default)" + echo " -r, --generate_rtc Generate preprocessor expansion (hiprtc_header.o)" + echo " -h, --help Prints this help" + echo + echo + return 0 +} + +if [ "$1" == "" ]; then + printUsage + exit 0 +fi + +HIP_BUILD_INC_DIR="$1" +HIP_INC_DIR="$2" +HIP_AMD_INC_DIR="$3" +LLVM_DIR="$4" +# By default, generate pch +TARGET="generatepch" + +while [ "$5" != "" ]; +do + case "$5" in + -h | --help ) + printUsage ; exit 0 ;; + -p | --generate_pch ) + TARGET="generatepch" ; break ;; + -r | --generate_rtc ) + TARGET="generatertc" ; break ;; + *) + echo " UNEXPECTED ERROR Parm : [$4] ">&2 ; exit 20 ;; + esac + shift 1 +done + +# Allow hiprtc lib name to be set by argument 7 +if [[ "$6" != "" ]]; then + rtc_shared_lib_out="$6" +else + if [[ "$OSTYPE" == cygwin ]]; then + rtc_shared_lib_out=hiprtc-builtins64.dll + else + rtc_shared_lib_out=libhiprtc-builtins.so + fi +fi + +if [[ "$OSTYPE" == cygwin || "$OSTYPE" == msys ]]; then + isWindows=1 + tmpdir=. +else + isWindows=0 + tmpdir=/tmp +fi + +# Expected first argument $1 to be output file name. +create_hip_macro_file() { +cat >$1 <$tmp/hip_pch.h <$tmp/hip_pch.mcin <$tmp/pch_wave32.cui && + + cat $tmp/hip_macros.h >> $tmp/pch_wave32.cui && + + $LLVM_DIR/bin/clang -cc1 -O3 -emit-pch -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -std=c++17 -fgnuc-version=4.2.1 -o $tmp/hip_wave32.pch -x hip-cpp-output - <$tmp/pch_wave32.cui && + + # For other devices + $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++17 -nogpulib -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -x hip $tmp/hip_pch.h -E >$tmp/pch_wave64.cui && + + cat $tmp/hip_macros.h >> $tmp/pch_wave64.cui && + + $LLVM_DIR/bin/clang -cc1 -O3 -emit-pch -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -std=c++17 -fgnuc-version=4.2.1 -o $tmp/hip_wave64.pch -x hip-cpp-output - <$tmp/pch_wave64.cui && + + $LLVM_DIR/bin/llvm-mc -o hip_pch.o $tmp/hip_pch.mcin --filetype=obj && + + rm -rf $tmp +} + +generate_rtc_header() { + tmp=$tmpdir/hip_rtc.$$ + mkdir -p $tmp + local macroFile="$tmp/hip_macros.h" + local headerFile="$tmp/hipRTC_header.h" + local mcinFile="$tmp/hipRTC_header.mcin" + + create_hip_macro_file $macroFile + +cat >$headerFile < $mcinFile + if [[ $isWindows -eq 0 ]]; then + echo " .type __hipRTC_header,@object" >> $mcinFile + echo " .type __hipRTC_header_size,@object" >> $mcinFile + fi +cat >>$mcinFile <> $tmp/hiprtc && + $LLVM_DIR/bin/llvm-mc -o $tmp/hiprtc_header.o $tmp/hipRTC_header.mcin --filetype=obj && + $LLVM_DIR/bin/clang $tmp/hiprtc_header.o -o $rtc_shared_lib_out -shared && + $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++14 -nogpulib -nogpuinc -emit-llvm -c -o $tmp/tmp.bc --cuda-device-only -D__HIPCC_RTC__ --offload-arch=gfx906 -x hip-cpp-output $tmp/hiprtc && + rm -rf $tmp +} + +case $TARGET in + (generatertc) generate_rtc_header ;; + (generatepch) generate_pch ;; + (*) die "Invalid target $TARGET" ;; +esac + diff --git a/projects/clr/hipamd/src/hip_error.cpp b/projects/clr/hipamd/src/hip_error.cpp new file mode 100644 index 0000000000..7ffc1ce987 --- /dev/null +++ b/projects/clr/hipamd/src/hip_error.cpp @@ -0,0 +1,382 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipGetLastError() +{ + HIP_INIT_API(hipGetLastError); + hipError_t err = hip::tls.last_error_; + hip::tls.last_error_ = hipSuccess; + return err; +} + +hipError_t hipPeekAtLastError() +{ + HIP_INIT_API(hipPeekAtLastError); + hipError_t err = hip::tls.last_error_; + HIP_RETURN(err); +} + +const char *ihipGetErrorName(hipError_t hip_error) +{ + switch (hip_error) { + case hipSuccess: + return "hipSuccess"; + case hipErrorInvalidValue: + return "hipErrorInvalidValue"; + case hipErrorOutOfMemory: + return "hipErrorOutOfMemory"; + case hipErrorNotInitialized: + return "hipErrorNotInitialized"; + case hipErrorDeinitialized: + return "hipErrorDeinitialized"; + case hipErrorProfilerDisabled: + return "hipErrorProfilerDisabled"; + case hipErrorProfilerNotInitialized: + return "hipErrorProfilerNotInitialized"; + case hipErrorProfilerAlreadyStarted: + return "hipErrorProfilerAlreadyStarted"; + case hipErrorProfilerAlreadyStopped: + return "hipErrorProfilerAlreadyStopped"; + case hipErrorInvalidConfiguration: + return "hipErrorInvalidConfiguration"; + case hipErrorInvalidSymbol: + return "hipErrorInvalidSymbol"; + case hipErrorInvalidDevicePointer: + return "hipErrorInvalidDevicePointer"; + case hipErrorInvalidMemcpyDirection: + return "hipErrorInvalidMemcpyDirection"; + case hipErrorInsufficientDriver: + return "hipErrorInsufficientDriver"; + case hipErrorMissingConfiguration: + return "hipErrorMissingConfiguration"; + case hipErrorPriorLaunchFailure: + return "hipErrorPriorLaunchFailure"; + case hipErrorInvalidDeviceFunction: + return "hipErrorInvalidDeviceFunction"; + case hipErrorNoDevice: + return "hipErrorNoDevice"; + case hipErrorInvalidDevice: + return "hipErrorInvalidDevice"; + case hipErrorInvalidPitchValue: + return "hipErrorInvalidPitchValue"; + case hipErrorInvalidImage: + return "hipErrorInvalidImage"; + case hipErrorInvalidContext: + return "hipErrorInvalidContext"; + case hipErrorContextAlreadyCurrent: + return "hipErrorContextAlreadyCurrent"; + case hipErrorMapFailed: + return "hipErrorMapFailed"; + case hipErrorUnmapFailed: + return "hipErrorUnmapFailed"; + case hipErrorArrayIsMapped: + return "hipErrorArrayIsMapped"; + case hipErrorAlreadyMapped: + return "hipErrorAlreadyMapped"; + case hipErrorNoBinaryForGpu: + return "hipErrorNoBinaryForGpu"; + case hipErrorAlreadyAcquired: + return "hipErrorAlreadyAcquired"; + case hipErrorNotMapped: + return "hipErrorNotMapped"; + case hipErrorNotMappedAsArray: + return "hipErrorNotMappedAsArray"; + case hipErrorNotMappedAsPointer: + return "hipErrorNotMappedAsPointer"; + case hipErrorECCNotCorrectable: + return "hipErrorECCNotCorrectable"; + case hipErrorUnsupportedLimit: + return "hipErrorUnsupportedLimit"; + case hipErrorContextAlreadyInUse: + return "hipErrorContextAlreadyInUse"; + case hipErrorPeerAccessUnsupported: + return "hipErrorPeerAccessUnsupported"; + case hipErrorInvalidKernelFile: + return "hipErrorInvalidKernelFile"; + case hipErrorInvalidGraphicsContext: + return "hipErrorInvalidGraphicsContext"; + case hipErrorInvalidSource: + return "hipErrorInvalidSource"; + case hipErrorFileNotFound: + return "hipErrorFileNotFound"; + case hipErrorSharedObjectSymbolNotFound: + return "hipErrorSharedObjectSymbolNotFound"; + case hipErrorSharedObjectInitFailed: + return "hipErrorSharedObjectInitFailed"; + case hipErrorOperatingSystem: + return "hipErrorOperatingSystem"; + case hipErrorInvalidHandle: + return "hipErrorInvalidHandle"; + case hipErrorIllegalState: + return "hipErrorIllegalState"; + case hipErrorNotFound: + return "hipErrorNotFound"; + case hipErrorNotReady: + return "hipErrorNotReady"; + case hipErrorIllegalAddress: + return "hipErrorIllegalAddress"; + case hipErrorLaunchOutOfResources: + return "hipErrorLaunchOutOfResources"; + case hipErrorLaunchTimeOut: + return "hipErrorLaunchTimeOut"; + case hipErrorPeerAccessAlreadyEnabled: + return "hipErrorPeerAccessAlreadyEnabled"; + case hipErrorPeerAccessNotEnabled: + return "hipErrorPeerAccessNotEnabled"; + case hipErrorSetOnActiveProcess: + return "hipErrorSetOnActiveProcess"; + case hipErrorContextIsDestroyed: + return "hipErrorContextIsDestroyed"; + case hipErrorAssert: + return "hipErrorAssert"; + case hipErrorHostMemoryAlreadyRegistered: + return "hipErrorHostMemoryAlreadyRegistered"; + case hipErrorHostMemoryNotRegistered: + return "hipErrorHostMemoryNotRegistered"; + case hipErrorLaunchFailure: + return "hipErrorLaunchFailure"; + case hipErrorNotSupported: + return "hipErrorNotSupported"; + case hipErrorUnknown: + return "hipErrorUnknown"; + case hipErrorRuntimeMemory: + return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther: + return "hipErrorRuntimeOther"; + case hipErrorCooperativeLaunchTooLarge: + return "hipErrorCooperativeLaunchTooLarge"; + case hipErrorStreamCaptureUnsupported: + return "hipErrorStreamCaptureUnsupported"; + case hipErrorStreamCaptureInvalidated: + return "hipErrorStreamCaptureInvalidated"; + case hipErrorStreamCaptureMerge: + return "hipErrorStreamCaptureMerge"; + case hipErrorStreamCaptureUnmatched: + return "hipErrorStreamCaptureUnmatched"; + case hipErrorStreamCaptureUnjoined: + return "hipErrorStreamCaptureUnjoined"; + case hipErrorStreamCaptureIsolation: + return "hipErrorStreamCaptureIsolation"; + case hipErrorStreamCaptureImplicit: + return "hipErrorStreamCaptureImplicit"; + case hipErrorCapturedEvent: + return "hipErrorCapturedEvent"; + case hipErrorStreamCaptureWrongThread: + return "hipErrorStreamCaptureWrongThread"; + case hipErrorGraphExecUpdateFailure: + return "hipErrorGraphExecUpdateFailure"; + case hipErrorTbd: + return "hipErrorTbd"; + default: + return "hipErrorUnknown"; + }; +} + +const char *ihipGetErrorString(hipError_t hip_error) { + switch(hip_error) { + case hipSuccess: + return "no error"; + case hipErrorInvalidValue: + return "invalid argument"; + case hipErrorOutOfMemory: + return "out of memory"; + case hipErrorNotInitialized: + return "initialization error"; + case hipErrorDeinitialized: + return "driver shutting down"; + case hipErrorProfilerDisabled: + return "profiler disabled while using external profiling tool"; + case hipErrorProfilerNotInitialized: + return "profiler is not initialized"; + case hipErrorProfilerAlreadyStarted: + return "profiler already started"; + case hipErrorProfilerAlreadyStopped: + return "profiler already stopped"; + case hipErrorInvalidConfiguration: + return "invalid configuration argument"; + case hipErrorInvalidPitchValue: + return "invalid pitch argument"; + case hipErrorInvalidSymbol: + return "invalid device symbol"; + case hipErrorInvalidDevicePointer: + return "invalid device pointer"; + case hipErrorInvalidMemcpyDirection: + return "invalid copy direction for memcpy"; + case hipErrorInsufficientDriver: + return "driver version is insufficient for runtime version"; + case hipErrorMissingConfiguration: + return "__global__ function call is not configured"; + case hipErrorPriorLaunchFailure: + return "unspecified launch failure in prior launch"; + case hipErrorInvalidDeviceFunction: + return "invalid device function"; + case hipErrorNoDevice: + return "no ROCm-capable device is detected"; + case hipErrorInvalidDevice: + return "invalid device ordinal"; + case hipErrorInvalidImage: + return "device kernel image is invalid"; + case hipErrorInvalidContext: + return "invalid device context"; + case hipErrorContextAlreadyCurrent: + return "context is already current context"; + case hipErrorMapFailed: + return "mapping of buffer object failed"; + case hipErrorUnmapFailed: + return "unmapping of buffer object failed"; + case hipErrorArrayIsMapped: + return "array is mapped"; + case hipErrorAlreadyMapped: + return "resource already mapped"; + case hipErrorNoBinaryForGpu: + return "no kernel image is available for execution on the device"; + case hipErrorAlreadyAcquired: + return "resource already acquired"; + case hipErrorNotMapped: + return "resource not mapped"; + case hipErrorNotMappedAsArray: + return "resource not mapped as array"; + case hipErrorNotMappedAsPointer: + return "resource not mapped as pointer"; + case hipErrorECCNotCorrectable: + return "uncorrectable ECC error encountered"; + case hipErrorUnsupportedLimit: + return "limit is not supported on this architecture"; + case hipErrorContextAlreadyInUse: + return "exclusive-thread device already in use by a different thread"; + case hipErrorPeerAccessUnsupported: + return "peer access is not supported between these two devices"; + case hipErrorInvalidKernelFile: + return "invalid kernel file"; + case hipErrorInvalidGraphicsContext: + return "invalid OpenGL or DirectX context"; + case hipErrorInvalidSource: + return "device kernel image is invalid"; + case hipErrorFileNotFound: + return "file not found"; + case hipErrorSharedObjectSymbolNotFound: + return "shared object symbol not found"; + case hipErrorSharedObjectInitFailed: + return "shared object initialization failed"; + case hipErrorOperatingSystem: + return "OS call failed or operation not supported on this OS"; + case hipErrorInvalidHandle: + return "invalid resource handle"; + case hipErrorIllegalState: + return "the operation cannot be performed in the present state"; + case hipErrorNotFound: + return "named symbol not found"; + case hipErrorNotReady: + return "device not ready"; + case hipErrorIllegalAddress: + return "an illegal memory access was encountered"; + case hipErrorLaunchOutOfResources: + return "too many resources requested for launch"; + case hipErrorLaunchTimeOut: + return "the launch timed out and was terminated"; + case hipErrorPeerAccessAlreadyEnabled: + return "peer access is already enabled"; + case hipErrorPeerAccessNotEnabled: + return "peer access has not been enabled"; + case hipErrorSetOnActiveProcess: + return "cannot set while device is active in this process"; + case hipErrorContextIsDestroyed: + return "context is destroyed"; + case hipErrorAssert: + return "device-side assert triggered"; + case hipErrorHostMemoryAlreadyRegistered: + return "part or all of the requested memory range is already mapped"; + case hipErrorHostMemoryNotRegistered: + return "pointer does not correspond to a registered memory region"; + case hipErrorLaunchFailure: + return "unspecified launch failure"; + case hipErrorCooperativeLaunchTooLarge: + return "too many blocks in cooperative launch"; + case hipErrorNotSupported: + return "operation not supported"; + case hipErrorStreamCaptureUnsupported: + return "operation not permitted when stream is capturing"; + case hipErrorStreamCaptureInvalidated: + return "operation failed due to a previous error during capture"; + case hipErrorStreamCaptureMerge: + return "operation would result in a merge of separate capture sequences"; + case hipErrorStreamCaptureUnmatched: + return "capture was not ended in the same stream as it began"; + case hipErrorStreamCaptureUnjoined: + return "capturing stream has unjoined work"; + case hipErrorStreamCaptureIsolation: + return "dependency created on uncaptured work in another stream"; + case hipErrorStreamCaptureImplicit: + return "operation would make the legacy stream depend on a capturing blocking stream"; + case hipErrorCapturedEvent: + return "operation not permitted on an event last recorded in a capturing stream"; + case hipErrorStreamCaptureWrongThread: + return "attempt to terminate a thread-local capture sequence from another thread"; + case hipErrorGraphExecUpdateFailure: + return "the graph update was not performed because it included changes which violated constraints specific to instantiated graph update"; + case hipErrorRuntimeMemory: + return "runtime memory call returned error"; + case hipErrorRuntimeOther: + return "runtime call other than memory returned error"; + case hipErrorUnknown: + default: + return "unknown error"; + } +} + +const char* hipGetErrorName(hipError_t hip_error) +{ + return ihipGetErrorName(hip_error); +} + +const char *hipGetErrorString(hipError_t hip_error) +{ + return ihipGetErrorString(hip_error); +} + +hipError_t hipDrvGetErrorName(hipError_t hip_error, const char** errStr) +{ + if (errStr == nullptr) { + return hipErrorInvalidValue; + } + *errStr = ihipGetErrorName(hip_error); + if (hip_error == hipErrorUnknown || strcmp( *errStr, "hipErrorUnknown") != 0) { + return hipSuccess; + } else { + return hipErrorInvalidValue; + } +} + +hipError_t hipDrvGetErrorString(hipError_t hip_error, const char** errStr) +{ + if (errStr == nullptr) { + return hipErrorInvalidValue; + } + *errStr = ihipGetErrorString(hip_error); + if (hip_error == hipErrorUnknown || strcmp( *errStr, "unknown error") != 0) { + return hipSuccess; + } else { + return hipErrorInvalidValue; + } +} diff --git a/projects/clr/hipamd/src/hip_event.cpp b/projects/clr/hipamd/src/hip_event.cpp new file mode 100644 index 0000000000..d2d94602ec --- /dev/null +++ b/projects/clr/hipamd/src/hip_event.cpp @@ -0,0 +1,429 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_event.hpp" +#if !defined(_MSC_VER) +#include +#endif + +namespace hip { + +static amd::Monitor eventSetLock{"Guards global event set"}; +static std::unordered_set eventSet; + +bool Event::ready(eventType type) { + if (event_->status() != CL_COMPLETE) { + event_->notifyCmdQueue(); + } + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status + bool ready = CheckHwEvent(type); + if (!ready) { + ready = (event_->status() == CL_COMPLETE); + } + return ready; +} + +bool EventDD::ready(eventType type) { + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status + bool ready = CheckHwEvent(type); + // FIXME: Remove status check entirely + if (!ready) { + ready = (event_->status() == CL_COMPLETE); + } + return ready; +} + +hipError_t Event::query() { + amd::ScopedLock lock(lock_); + + // If event is not recorded, event_ is null, hence return hipSuccess + if (event_ == nullptr) { + return hipSuccess; + } + + return ready(Query) ? hipSuccess : hipErrorNotReady; +} + +hipError_t Event::synchronize() { + amd::ScopedLock lock(lock_); + + // If event is not recorded, event_ is null, hence return hipSuccess + if (event_ == nullptr) { + return hipSuccess; + } + + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status + static constexpr bool kWaitCompletion = true; + if (!g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, kWaitCompletion)) { + if (event_->HwEvent() != nullptr) { + amd::Command* command = nullptr; + hipError_t status = recordCommand(command, event_->command().queue(), flags); + command->enqueue(); + g_devices[deviceId()]->devices()[0]->IsHwEventReady(command->event(), kWaitCompletion); + command->release(); + } else { + event_->awaitCompletion(); + } + } + + return hipSuccess; +} + +bool Event::awaitEventCompletion() { + return event_->awaitCompletion(); +} + +bool EventDD::awaitEventCompletion() { + return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true); +} + +hipError_t Event::elapsedTime(Event& eStop, float& ms) { + amd::ScopedLock startLock(lock_); + if (this == &eStop) { + ms = 0.f; + if (event_ == nullptr) { + return hipErrorInvalidHandle; + } + + if (flags & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready(ElapsedTime)) { + return hipErrorNotReady; + } + + return hipSuccess; + } + amd::ScopedLock stopLock(eStop.lock()); + + if (event_ == nullptr || eStop.event() == nullptr) { + return hipErrorInvalidHandle; + } + + if ((flags | eStop.flags) & hipEventDisableTiming) { + return hipErrorInvalidHandle; + } + + if (!ready(ElapsedTime) || !eStop.ready(ElapsedTime)) { + return hipErrorNotReady; + } + + if (event_ == eStop.event_) { + // Events are the same, which indicates the stream is empty and likely + // eventRecord is called on another stream. For such cases insert and measure a + // marker. + amd::Command* command = new amd::Marker(*event_->command().queue(), kMarkerDisableFlush); + command->enqueue(); + command->awaitCompletion(); + ms = static_cast(static_cast(command->event().profilingInfo().end_) - time(false)) / + 1000000.f; + command->release(); + } else { + // Note: with direct dispatch eStop.ready() relies on HW event, but CPU status can be delayed. + // Hence for now make sure CPU status is updated by calling awaitCompletion(); + awaitEventCompletion(); + eStop.awaitEventCompletion(); + if (unrecorded_ && eStop.isUnRecorded()) { + // Both the events are not recorded, just need the end and start of stop event + ms = static_cast(eStop.time(false) - eStop.time(true)) / 1000000.f; + } else { + ms = static_cast(eStop.time(false) - time(false)) / 1000000.f; + } + } + return hipSuccess; +} + +int64_t Event::time(bool getStartTs) const { + assert(event_ != nullptr); + if (getStartTs) { + return static_cast(event_->profilingInfo().start_); + } else { + return static_cast(event_->profilingInfo().end_); + } +} + +int64_t EventDD::time(bool getStartTs) const { + uint64_t start = 0, end = 0; + assert(event_ != nullptr); + g_devices[deviceId()]->devices()[0]->getHwEventTime(*event_, &start, &end); + // FIXME: This is only needed if the command had to wait CL_COMPLETE status + if (start == 0 || end == 0) { + return Event::time(getStartTs); + } + if (getStartTs) { + return static_cast(start); + } else { + return static_cast(end); + } +} + +hipError_t Event::streamWaitCommand(amd::Command*& command, hip::Stream* stream) { + amd::Command::EventWaitList eventWaitList; + if (event_ != nullptr) { + eventWaitList.push_back(event_); + } + command = new amd::Marker(*stream, kMarkerDisableFlush, eventWaitList); + + if (command == NULL) { + return hipErrorOutOfMemory; + } + return hipSuccess; +} + +hipError_t Event::enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command) { + command->enqueue(); + return hipSuccess; +} + +hipError_t Event::streamWait(hipStream_t stream, uint flags) { + hip::Stream* hip_stream = hip::getStream(stream); + // Access to event_ object must be lock protected + amd::ScopedLock lock(lock_); + if ((event_ == nullptr) || (event_->command().queue() == hip_stream) || ready(StreamWait)) { + return hipSuccess; + } + if (!event_->notifyCmdQueue()) { + return hipErrorLaunchOutOfResources; + } + amd::Command* command; + hipError_t status = streamWaitCommand(command, hip_stream); + if (status != hipSuccess) { + return status; + } + status = enqueueStreamWaitCommand(stream, command); + if (status != hipSuccess) { + return status; + } + command->release(); + return hipSuccess; +} + +hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* stream, + uint32_t ext_flags ) { + if (command == nullptr) { + int32_t releaseFlags = ((ext_flags == 0) ? flags : ext_flags) & + (hipEventReleaseToDevice | hipEventReleaseToSystem | + hipEventDisableSystemFence); + if (releaseFlags & hipEventDisableSystemFence) { + releaseFlags = amd::Device::kCacheStateIgnore; + } else { + releaseFlags = amd::Device::kCacheStateInvalid; + } + // Always submit a EventMarker. + command = new hip::EventMarker(*stream, !kMarkerDisableFlush, true, releaseFlags); + } + return hipSuccess; +} + +hipError_t Event::enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record) { + command->enqueue(); + if (event_ == &command->event()) return hipSuccess; + if (event_ != nullptr) { + event_->release(); + } + event_ = &command->event(); + unrecorded_ = !record; + + return hipSuccess; +} + +hipError_t Event::addMarker(hipStream_t stream, amd::Command* command, bool record) { + hip::Stream* hip_stream = hip::getStream(stream); + // Keep the lock always at the beginning of this to avoid a race. SWDEV-277847 + amd::ScopedLock lock(lock_); + hipError_t status = recordCommand(command, hip_stream); + if (status != hipSuccess) { + return hipSuccess; + } + status = enqueueRecordCommand(stream, command, record); + return status; +} + +// ================================================================================================ +bool isValid(hipEvent_t event) { + // NULL event is always valid + if (event == nullptr) { + return true; + } + + amd::ScopedLock lock(eventSetLock); + if (eventSet.find(event) == eventSet.end()) { + return false; + } + + return true; +} + +} // namespace hip +// ================================================================================================ +hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | + hipEventReleaseToDevice | hipEventReleaseToSystem | + hipEventInterprocess | hipEventDisableSystemFence; + + const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem | + hipEventDisableSystemFence); + // can't set any unsupported flags. + // can set only one of the release flags. + // if hipEventInterprocess flag is set, then hipEventDisableTiming flag also must be set + const bool illegalFlags = (flags & ~supportedFlags) || + ([](unsigned int num){ + unsigned int bitcount; + for (bitcount = 0; num; bitcount++) { + num &= num - 1; + } + return bitcount; } (flags & releaseFlags) > 1) || + ((flags & hipEventInterprocess) && !(flags & hipEventDisableTiming)); + if (!illegalFlags) { + hip::Event* e = nullptr; + if (flags & hipEventInterprocess) { + e = new hip::IPCEvent(); + } else { + if (AMD_DIRECT_DISPATCH) { + e = new hip::EventDD(flags); + } else { + e = new hip::Event(flags); + } + } + if (e == nullptr) { + return hipErrorOutOfMemory; + } + *event = reinterpret_cast(e); + amd::ScopedLock lock(hip::eventSetLock); + hip::eventSet.insert(*event); + } else { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) { + HIP_INIT_API(hipEventCreateWithFlags, event, flags); + + if (event == nullptr) { + return hipErrorInvalidValue; + } + + HIP_RETURN(ihipEventCreateWithFlags(event, flags), *event); +} + +hipError_t hipEventCreate(hipEvent_t* event) { + HIP_INIT_API(hipEventCreate, event); + + if (event == nullptr) { + return hipErrorInvalidValue; + } + + HIP_RETURN(ihipEventCreateWithFlags(event, 0), *event); +} + +hipError_t hipEventDestroy(hipEvent_t event) { + HIP_INIT_API(hipEventDestroy, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::ScopedLock lock(hip::eventSetLock); + if (hip::eventSet.erase(event) == 0 ) { + return hipErrorContextIsDestroyed; + } + + hip::Event* e = reinterpret_cast(event); + if (e->GetCaptureStream() != nullptr) { + reinterpret_cast(e->GetCaptureStream())->EraseCaptureEvent(event); + } + delete e; + HIP_RETURN(hipSuccess); +} + +hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { + HIP_INIT_API(hipEventElapsedTime, ms, start, stop); + + if (ms == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (start == nullptr || stop == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* eStart = reinterpret_cast(start); + hip::Event* eStop = reinterpret_cast(stop); + + if (eStart->deviceId() != eStop->deviceId()) { + HIP_RETURN(hipErrorInvalidHandle); + } + + HIP_RETURN(eStart->elapsedTime(*eStop, *ms), "Elapsed Time = ", *ms); +} + +hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream) { + STREAM_CAPTURE(hipEventRecord, stream, event); + + if (event == nullptr) { + return hipErrorInvalidHandle; + } + hip::Event* e = reinterpret_cast(event); + hip::Stream* hip_stream = hip::getStream(stream); + if (g_devices[e->deviceId()]->devices()[0] != &hip_stream->device()) { + return hipErrorInvalidHandle; + } + return e->addMarker(stream, nullptr, true); +} + +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { + HIP_INIT_API(hipEventRecord, event, stream); + HIP_RETURN(hipEventRecord_common(event, stream)); +} + +hipError_t hipEventRecord_spt(hipEvent_t event, hipStream_t stream) { + HIP_INIT_API(hipEventRecord, event, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipEventRecord_common(event, stream)); +} + +hipError_t hipEventSynchronize(hipEvent_t event) { + HIP_INIT_API(hipEventSynchronize, event); + + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + hip::Event* e = reinterpret_cast(event); + HIP_RETURN(e->synchronize()); +} + +hipError_t ihipEventQuery(hipEvent_t event) { + if (event == nullptr) { + return hipErrorInvalidHandle; + } + + hip::Event* e = reinterpret_cast(event); + return e->query(); +} + +hipError_t hipEventQuery(hipEvent_t event) { + HIP_INIT_API(hipEventQuery, event); + HIP_RETURN(ihipEventQuery(event)); +} diff --git a/projects/clr/hipamd/src/hip_event.hpp b/projects/clr/hipamd/src/hip_event.hpp new file mode 100644 index 0000000000..74f57ca163 --- /dev/null +++ b/projects/clr/hipamd/src/hip_event.hpp @@ -0,0 +1,253 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_EVENT_H +#define HIP_EVENT_H + +#include "hip_internal.hpp" +#include "thread/monitor.hpp" + +// Internal structure for stream callback handler +class StreamCallback { +protected: + void* userData_; + public: + StreamCallback(void* userData) + : userData_(userData) {} + + virtual void CL_CALLBACK callback() = 0; +}; + +class StreamAddCallback : public StreamCallback { + hipStreamCallback_t callBack_; + hipStream_t stream_; +public: + StreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData) + : StreamCallback(userData) { + stream_ = stream; + callBack_ = callback; + } + + void CL_CALLBACK callback() { + hipError_t status = hipSuccess; + callBack_(stream_, status, userData_); + } +}; + +class LaunchHostFuncCallback : public StreamCallback { + hipHostFn_t callBack_; + public: + LaunchHostFuncCallback(hipHostFn_t callback, void* userData) + : StreamCallback(userData) { + callBack_ = callback; + } + + void CL_CALLBACK callback() { callBack_(userData_); } +}; + +void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data); + +namespace hip { + +#define IPC_SIGNALS_PER_EVENT 32 +typedef struct ihipIpcEventShmem_s { + std::atomic owners; + std::atomic owners_device_id; + std::atomic owners_process_id; + std::atomic read_index; + std::atomic write_index; + uint32_t signal[IPC_SIGNALS_PER_EVENT]; +} ihipIpcEventShmem_t; + +class EventMarker : public amd::Marker { + public: + EventMarker(amd::HostQueue& stream, bool disableFlush, bool markerTs = false, + int32_t scope = amd::Device::kCacheStateInvalid) + : amd::Marker(stream, disableFlush) { + profilingInfo_.enabled_ = true; + profilingInfo_.callback_ = nullptr; + profilingInfo_.marker_ts_ = markerTs; + profilingInfo_.clear(); + setEventScope(scope); + } +}; + +enum eventType { Query, StreamWait, ElapsedTime }; +class Event { + /// event recorded on stream where capture is active + bool onCapture_; + /// capture stream where event is recorded + hipStream_t captureStream_ = nullptr; + /// Previous captured nodes before event record + std::vector nodesPrevToRecorded_; + protected: + bool CheckHwEvent(eventType type) { + bool ready; + if (type == Query) { + ready = g_devices[deviceId()]->devices()[0]->IsHwEventReadyForcedWait(*event_); + } else { + ready = g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_); + } + return ready; + } + + public: + Event(unsigned int flags) : flags(flags), lock_("hipEvent_t", true), + event_(nullptr), unrecorded_(false), stream_(nullptr) { + // No need to init event_ here as addMarker does that + onCapture_ = false; + device_id_ = hip::getCurrentDevice()->deviceId(); // Created in current device ctx + } + + virtual ~Event() { + if (event_ != nullptr) { + event_->release(); + } + } + unsigned int flags; + + virtual hipError_t query(); + virtual hipError_t synchronize(); + hipError_t elapsedTime(Event& eStop, float& ms); + + virtual hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream); + virtual hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command); + virtual hipError_t streamWait(hipStream_t stream, uint flags); + + virtual hipError_t recordCommand(amd::Command*& command, amd::HostQueue* stream, + uint32_t flags = 0); + virtual hipError_t enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record); + hipError_t addMarker(hipStream_t stream, amd::Command* command, bool record); + + void BindCommand(amd::Command& command, bool record) { + amd::ScopedLock lock(lock_); + if (event_ != nullptr) { + event_->release(); + } + event_ = &command.event(); + unrecorded_ = !record; + command.retain(); + } + + bool isUnRecorded() const { return unrecorded_; } + amd::Monitor& lock() { return lock_; } + const int deviceId() const { return device_id_; } + void setDeviceId(int id) { device_id_ = id; } + amd::Event* event() { return event_; } + + /// End capture on this event + void EndCapture() { + onCapture_ = false; + captureStream_ = nullptr; + } + /// Start capture when waited on this event + void StartCapture(hipStream_t stream) { + onCapture_ = true; + captureStream_ = stream; + } + /// Get capture status of the graph + bool GetCaptureStatus() const { return onCapture_; } + /// Get capture stream where event is recorded + hipStream_t GetCaptureStream() const { return captureStream_; } + /// Set capture stream where event is recorded + void SetCaptureStream(hipStream_t stream) { captureStream_ = stream; } + /// Returns previous captured nodes before event record + std::vector GetNodesPrevToRecorded() const { return nodesPrevToRecorded_; } + /// Set last captured graph node before event record + void SetNodesPrevToRecorded(std::vector& graphNode) { + nodesPrevToRecorded_ = graphNode; + } + virtual hipError_t GetHandle(ihipIpcEventHandle_t* handle) { + return hipErrorInvalidConfiguration; + } + virtual hipError_t OpenHandle(ihipIpcEventHandle_t* handle) { + return hipErrorInvalidConfiguration; + } + virtual bool awaitEventCompletion(); + virtual bool ready(eventType type); + virtual int64_t time(bool getStartTs) const; + + protected: + amd::Monitor lock_; + hip::Stream* stream_; + amd::Event* event_; + int device_id_; + //! Flag to indicate hipEventRecord has not been called. This is needed for + //! hip*ModuleLaunchKernel API which takes start and stop events so no + //! hipEventRecord is called. Cleanup needed once those APIs are deprecated. + bool unrecorded_; +}; + +class EventDD : public Event { + public: + EventDD(unsigned int flags) : Event(flags) {} + virtual ~EventDD() {} + + virtual bool awaitEventCompletion(); + virtual bool ready(eventType type); + virtual int64_t time(bool getStartTs) const; +}; + +class IPCEvent : public Event { + // IPC Events + struct ihipIpcEvent_t { + std::string ipc_name_; + int ipc_fd_; + ihipIpcEventShmem_t* ipc_shmem_; + ihipIpcEvent_t() : ipc_name_("dummy"), ipc_fd_(0), ipc_shmem_(nullptr) {} + void setipcname(const char* name) { ipc_name_ = std::string(name); } + }; + ihipIpcEvent_t ipc_evt_; + + public: + ~IPCEvent() { + if (ipc_evt_.ipc_shmem_) { + int owners = --ipc_evt_.ipc_shmem_->owners; + // Make sure event is synchronized + hipError_t status = synchronize(); + status = ihipHostUnregister(&ipc_evt_.ipc_shmem_->signal); + if (!amd::Os::MemoryUnmapFile(ipc_evt_.ipc_shmem_, sizeof(hip::ihipIpcEventShmem_t))) { + // print hipErrorInvalidHandle; + } + } + } + IPCEvent() : Event(hipEventInterprocess) {} + bool createIpcEventShmemIfNeeded(); + hipError_t GetHandle(ihipIpcEventHandle_t* handle); + hipError_t OpenHandle(ihipIpcEventHandle_t* handle); + hipError_t synchronize(); + hipError_t query(); + + hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream); + hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command); + hipError_t streamWait(hipStream_t stream, uint flags); + + hipError_t recordCommand(amd::Command*& command, amd::HostQueue* queue, uint32_t flags = 0); + hipError_t enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record); +}; + +}; // namespace hip + +struct CallbackData { + int previous_read_index; + hip::ihipIpcEventShmem_t* shmem; +}; + +#endif // HIP_EVEMT_H diff --git a/projects/clr/hipamd/src/hip_event_ipc.cpp b/projects/clr/hipamd/src/hip_event_ipc.cpp new file mode 100644 index 0000000000..706b3d4448 --- /dev/null +++ b/projects/clr/hipamd/src/hip_event_ipc.cpp @@ -0,0 +1,250 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_event.hpp" +#if !defined(_MSC_VER) +#include +#else +#include +#endif + +// ================================================================================================ + +hipError_t ihipEventCreateWithFlags(hipEvent_t* event, unsigned flags); + +namespace hip { + +bool IPCEvent::createIpcEventShmemIfNeeded() { + if (ipc_evt_.ipc_shmem_) { + // ipc_shmem_ already created, no need to create it again + return true; + } + + char name_template[] = "/tmp/eventXXXXXX"; +#if !defined(_MSC_VER) + int temp_fd = mkstemp(name_template); +#else + _mktemp_s(name_template, sizeof(name_template)); +#endif + + ipc_evt_.ipc_name_ = name_template; + ipc_evt_.ipc_name_.replace(0, 5, "/hip_"); + if (!amd::Os::MemoryMapFileTruncated( + ipc_evt_.ipc_name_.c_str(), + const_cast(reinterpret_cast(&(ipc_evt_.ipc_shmem_))), + sizeof(hip::ihipIpcEventShmem_t))) { + return false; + } + +#if !defined(_MSC_VER) + close(temp_fd); +#endif + + ipc_evt_.ipc_shmem_->owners = 1; + ipc_evt_.ipc_shmem_->read_index = -1; + ipc_evt_.ipc_shmem_->write_index = 0; + for (uint32_t sig_idx = 0; sig_idx < IPC_SIGNALS_PER_EVENT; ++sig_idx) { + ipc_evt_.ipc_shmem_->signal[sig_idx] = 0; + } + + // device sets 0 to this ptr when the ipc event is completed + hipError_t status = ihipHostRegister(&ipc_evt_.ipc_shmem_->signal, + sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT, + 0); + if (status != hipSuccess) { + return false; + } + return true; +} + +hipError_t IPCEvent::query() { + if (ipc_evt_.ipc_shmem_) { + int prev_read_idx = ipc_evt_.ipc_shmem_->read_index; + int offset = (prev_read_idx % IPC_SIGNALS_PER_EVENT); + if (ipc_evt_.ipc_shmem_->read_index < prev_read_idx + IPC_SIGNALS_PER_EVENT && + ipc_evt_.ipc_shmem_->signal[offset] != 0) { + return hipErrorNotReady; + } + } + return hipSuccess; +} + +hipError_t IPCEvent::synchronize() { + if (ipc_evt_.ipc_shmem_) { + int prev_read_idx = ipc_evt_.ipc_shmem_->read_index; + if (prev_read_idx >= 0) { + int offset = (prev_read_idx % IPC_SIGNALS_PER_EVENT); + while ((ipc_evt_.ipc_shmem_->read_index < prev_read_idx + IPC_SIGNALS_PER_EVENT) && + (ipc_evt_.ipc_shmem_->signal[offset] != 0)) { + amd::Os::sleep(1); + } + } + } + return hipSuccess; +} + +hipError_t IPCEvent::streamWaitCommand(amd::Command*& command, hip::Stream* stream) { + command = new amd::Marker(*stream, false); + if (command == NULL) { + return hipErrorOutOfMemory; + } + return hipSuccess; +} + +hipError_t IPCEvent::enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command) { + auto t{new CallbackData{ipc_evt_.ipc_shmem_->read_index, ipc_evt_.ipc_shmem_}}; + StreamCallback* cbo = new StreamAddCallback( + stream, reinterpret_cast(WaitThenDecrementSignal), t); + if (!command->setCallback(CL_COMPLETE, ihipStreamCallback, cbo)) { + command->release(); + return hipErrorInvalidHandle; + } + command->enqueue(); + command->release(); + command->awaitCompletion(); + return hipSuccess; +} + +hipError_t IPCEvent::streamWait(hipStream_t stream, uint flags) { + hip::Stream* hip_stream = hip::getStream(stream); + + amd::ScopedLock lock(lock_); + if(query() != hipSuccess) { + amd::Command* command; + hipError_t status = streamWaitCommand(command, hip_stream); + if (status != hipSuccess) { + return status; + } + status = enqueueStreamWaitCommand(stream, command); + return status; + } + return hipSuccess; +} + +hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t flags) { + bool unrecorded = isUnRecorded(); + if (unrecorded) { + command = new amd::Marker(*stream, kMarkerDisableFlush); + } else { + return Event::recordCommand(command, stream); + } + return hipSuccess; +} + +hipError_t IPCEvent::enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record) { + bool unrecorded = isUnRecorded(); + if (unrecorded) { + amd::Event& tEvent = command->event(); + createIpcEventShmemIfNeeded(); + int write_index = ipc_evt_.ipc_shmem_->write_index++; + int offset = write_index % IPC_SIGNALS_PER_EVENT; + while (ipc_evt_.ipc_shmem_->signal[offset] != 0) { + amd::Os::sleep(1); + } + // Lock signal. + ipc_evt_.ipc_shmem_->signal[offset] = 1; + ipc_evt_.ipc_shmem_->owners_device_id = deviceId(); + command->enqueue(); + + // device writes 0 to signal after the hipEventRecord command is completed + // the signal value is checked by WaitThenDecrementSignal cb + hipError_t status = ihipStreamOperation(stream, ROCCLR_COMMAND_STREAM_WRITE_VALUE, + &(ipc_evt_.ipc_shmem_->signal[offset]), + 0, + 0, 0, sizeof(uint32_t)); + if (status != hipSuccess) { + return status; + } + + // Update read index to indicate new signal. + int expected = write_index - 1; + while (!ipc_evt_.ipc_shmem_->read_index.compare_exchange_weak(expected, write_index)) { + amd::Os::sleep(1); + } + } else { + return Event::enqueueRecordCommand(stream, command, record); + } + return hipSuccess; +} + +hipError_t IPCEvent::GetHandle(ihipIpcEventHandle_t* handle) { + if (!createIpcEventShmemIfNeeded()) { + return hipErrorInvalidValue; + } + ipc_evt_.ipc_shmem_->owners_device_id = deviceId(); + ipc_evt_.ipc_shmem_->owners_process_id = amd::Os::getProcessId(); + memset(handle->shmem_name, 0, HIP_IPC_HANDLE_SIZE); + ipc_evt_.ipc_name_.copy(handle->shmem_name, std::string::npos); + return hipSuccess; +} + +hipError_t IPCEvent::OpenHandle(ihipIpcEventHandle_t* handle) { + ipc_evt_.ipc_name_ = handle->shmem_name; + if (!amd::Os::MemoryMapFileTruncated(ipc_evt_.ipc_name_.c_str(), + (const void**)&(ipc_evt_.ipc_shmem_), + sizeof(ihipIpcEventShmem_t))) { + return hipErrorInvalidValue; + } + + if (amd::Os::getProcessId() == ipc_evt_.ipc_shmem_->owners_process_id.load()) { + // If this is in the same process, return error. + return hipErrorInvalidContext; + } + + ipc_evt_.ipc_shmem_->owners += 1; + // device sets 0 to this ptr when the ipc event is completed + hipError_t status = hipSuccess; + status = ihipHostRegister(&ipc_evt_.ipc_shmem_->signal, + sizeof(uint32_t) * IPC_SIGNALS_PER_EVENT, + 0); + return status; +} + +} // namespace hip + +// ================================================================================================ + +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) { + HIP_INIT_API(hipIpcGetEventHandle, handle, event); + + if (handle == nullptr || event == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hip::Event* e = reinterpret_cast(event); + HIP_RETURN(e->GetHandle(reinterpret_cast(handle))); +} + +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) { + HIP_INIT_API(hipIpcOpenEventHandle, event, handle); + + hipError_t hip_err = hipSuccess; + if (event == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hip_err = ihipEventCreateWithFlags(event, hipEventDisableTiming | hipEventInterprocess); + if (hip_err != hipSuccess) { + HIP_RETURN(hip_err); + } + hip::Event* e = reinterpret_cast(*event); + ihipIpcEventHandle_t* iHandle = reinterpret_cast(&handle); + HIP_RETURN(e->OpenHandle(iHandle)); +} diff --git a/projects/clr/hipamd/src/hip_fatbin.cpp b/projects/clr/hipamd/src/hip_fatbin.cpp new file mode 100644 index 0000000000..c2b7ff75b9 --- /dev/null +++ b/projects/clr/hipamd/src/hip_fatbin.cpp @@ -0,0 +1,345 @@ +#include "hip_fatbin.hpp" + +#include +#include "hip_code_object.hpp" + +namespace hip { + +FatBinaryDeviceInfo::~FatBinaryDeviceInfo() { + if (program_ != nullptr) { + program_->unload(); + program_->release(); + program_ = nullptr; + } +} + +FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) : fdesc_(amd::Os::FDescInit()), + fsize_(0), foffset_(0), image_(image), image_mapped_(false), + uri_(std::string()) { + + if (fname != nullptr) { + fname_ = std::string(fname); + } else { + fname_ = std::string(); + } + + fatbin_dev_info_.resize(g_devices.size(), nullptr); +} + +FatBinaryInfo::~FatBinaryInfo() { + + for (auto* fbd: fatbin_dev_info_) { + if (fbd != nullptr) { + delete fbd; + } + } + + if (fdesc_ > 0) { + if (fsize_ && image_mapped_ && !amd::Os::MemoryUnmapFile(image_, fsize_)) { + guarantee(false, "Cannot unmap file"); + } + if (!amd::Os::CloseFileHandle(fdesc_)) { + guarantee(false, "Cannot close file"); + } + } + + fname_ = std::string(); + fdesc_ = amd::Os::FDescInit(); + fsize_ = 0; + image_ = nullptr; + uri_ = std::string(); +} + +hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector& devices) { + amd_comgr_data_t data_object; + amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS; + hipError_t hip_status = hipSuccess; + amd_comgr_code_object_info_t* query_list_array = nullptr; + + // If image was passed as a pointer to our hipMod* api, we can try to extract the file name + // if it was mapped by the app. Otherwise use the COMGR data API. + if (fname_.size() == 0) { + if (image_ == nullptr) { + LogError("Both Filename and image cannot be null"); + return hipErrorInvalidValue; + } + + if(!amd::Os::FindFileNameFromAddress(image_, &fname_, &foffset_)) { + fname_ = std::string(""); + foffset_ = 0; + } + } + + // If file name & path are available (or it is passed to you), then get the file desc to use + // COMGR file slice APIs. + if (fname_.size() > 0) { + // Get File Handle & size of the file. + if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) + return hipErrorFileNotFound; + + // If the file name exists but the file size is 0, the something wrong with the file or its path + if (fsize_ == 0) + return hipErrorInvalidValue; + + // If image_ is nullptr, then file path is passed via hipMod* APIs, so map the file. + if (image_ == nullptr && !amd::Os::MemoryMapFileDesc(fdesc_, fsize_, foffset_, &image_) + && (image_mapped_ = true)) { + LogError("Cannot map the file descriptor"); + amd::Os::CloseFileHandle(fdesc_); + return hipErrorInvalidValue; + } + } + + // At this line, image should be a valid ptr. + guarantee(image_ != nullptr, "Image cannot be nullptr, file did not map for some reason"); + + do { + + // If the image ptr is not clang offload bundle then just directly point the image. + if (!CodeObject::IsClangOffloadMagicBundle(image_)) { + for (size_t dev_idx=0; dev_idx < devices.size(); ++dev_idx) { + fatbin_dev_info_[devices[dev_idx]->deviceId()] + = new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0); + fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ + = new amd::Program(*devices[dev_idx]->asContext()); + if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) { + hip_status = hipErrorOutOfMemory; + break; + } + } + break; + } + + // Create a data object, if it fails return error + if ((comgr_status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)) + != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Creating data object failed with status %d ", comgr_status); + hip_status = hipErrorInvalidValue; + break; + } + +#if !defined(_WIN32) + // Using the file descriptor and file size, map the data object. + if (fdesc_ > 0) { + guarantee(fsize_ > 0, "Cannot have a file size of 0"); + if ((comgr_status = amd_comgr_set_data_from_file_slice(data_object, fdesc_, foffset_, + fsize_)) != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Setting data from file slice failed with status %d ", comgr_status); + hip_status = hipErrorInvalidValue; + break; + } + } else +#endif + if (image_ != nullptr) { + // Using the image ptr, map the data object. + if ((comgr_status = amd_comgr_set_data(data_object, 4096, + reinterpret_cast(image_))) != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Setting data from file slice failed with status %d ", comgr_status); + hip_status = hipErrorInvalidValue; + break; + } + } else { + guarantee(false, "Cannot have both fname_ and image_ as nullptr"); + } + + // Find the unique number of ISAs needed for this COMGR query. + std::unordered_map> unique_isa_names; + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + std::string device_name = devices[dev_idx]->devices()[0]->isa().isaName(); + if (unique_isa_names.cend() == unique_isa_names.find(device_name)) { + unique_isa_names.insert({device_name, std::make_pair(0,0)}); + } + } + + // Create a query list using COMGR info for unique ISAs. + query_list_array = new amd_comgr_code_object_info_t[unique_isa_names.size()]; + auto isa_it = unique_isa_names.begin(); + for (size_t isa_idx = 0; isa_idx < unique_isa_names.size(); ++isa_idx) { + std::advance(isa_it, isa_idx); + query_list_array[isa_idx].isa = isa_it->first.c_str(); + query_list_array[isa_idx].size = 0; + query_list_array[isa_idx].offset = 0; + } + + // Look up the code object info passing the query list. + if ((comgr_status = amd_comgr_lookup_code_object(data_object, query_list_array, + unique_isa_names.size())) != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Setting data from file slice failed with status %d ", comgr_status); + hip_status = hipErrorInvalidValue; + break; + } + + for (size_t isa_idx = 0; isa_idx < unique_isa_names.size(); ++isa_idx) { + auto unique_it = unique_isa_names.find(query_list_array[isa_idx].isa); + guarantee(unique_isa_names.cend() != unique_it, "Cannot find unique isa"); + unique_it->second = std::pair + (static_cast(query_list_array[isa_idx].size), + static_cast(query_list_array[isa_idx].offset)); + } + + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + std::string device_name = devices[dev_idx]->devices()[0]->isa().isaName(); + auto dev_it = unique_isa_names.find(device_name); + guarantee(unique_isa_names.cend() != dev_it, + "Cannot find the device name in the unique device name"); + fatbin_dev_info_[devices[dev_idx]->deviceId()] + = new FatBinaryDeviceInfo(reinterpret_cast
(const_cast(image_)) + + dev_it->second.second, dev_it->second.first, + dev_it->second.second); + fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ + = new amd::Program(*devices[dev_idx]->asContext()); + } + + } while(0); + + if (query_list_array) { + delete[] query_list_array; + } + + // Clean up file and memory resouces if hip_status failed for some reason. + if (hip_status != hipSuccess && hip_status != hipErrorInvalidKernelFile) { + if (image_mapped_) { + if (!amd::Os::MemoryUnmapFile(image_, fsize_)) + guarantee(false, "Cannot unmap the file"); + + image_ = nullptr; + image_mapped_ = false; + } + + if (fdesc_ > 0) { + guarantee(fsize_ > 0, "Size has to greater than 0 too"); + if (!amd::Os::CloseFileHandle(fdesc_)) + guarantee(false, "Cannot close the file handle"); + + fdesc_ = 0; + fsize_ = 0; + } + + if ((comgr_status = amd_comgr_release_data(data_object)) != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("Releasing COMGR data failed with status %d ", comgr_status); + return hipErrorInvalidValue; + } + } + + return hip_status; +} + +hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devices) { + if (!HIP_USE_RUNTIME_UNBUNDLER) { + return ExtractFatBinaryUsingCOMGR(devices); + } + + hipError_t hip_error = hipSuccess; + std::vector> code_objs; + + // Copy device names for Extract Code object File + std::vector device_names; + device_names.reserve(devices.size()); + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); + } + + // We are given file name, get the file desc and file size + if (fname_.size() > 0) { + // Get File Handle & size of the file. + if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) { + return hipErrorFileNotFound; + } + if (fsize_ == 0) { + return hipErrorInvalidImage; + } + + // Extract the code object from file + hip_error = CodeObject::ExtractCodeObjectFromFile(fdesc_, fsize_, &image_, + device_names, code_objs); + + } else if (image_ != nullptr) { + // We are directly given image pointer directly, try to extract file desc & file Size + hip_error = CodeObject::ExtractCodeObjectFromMemory(image_, + device_names, code_objs, uri_); + } else { + return hipErrorInvalidValue; + } + + if (hip_error == hipErrorNoBinaryForGpu) { + LogPrintfError("hipErrorNoBinaryForGpu: Couldn't find binary for current devices! - %d",hip_error); + return hip_error; + } + + if (hip_error == hipErrorInvalidKernelFile) { + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + // the image type is no CLANG_OFFLOAD_BUNDLER, image for current device directly passed + fatbin_dev_info_[devices[dev_idx]->deviceId()] + = new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0); + } + } else if(hip_error == hipSuccess) { + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + // Calculate the offset wrt binary_image and the original image + size_t offset_l + = (reinterpret_cast
(const_cast(code_objs[dev_idx].first)) + - reinterpret_cast
(const_cast(image_))); + + fatbin_dev_info_[devices[dev_idx]->deviceId()] + = new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, offset_l); + } + } + + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ + = new amd::Program(*devices[dev_idx]->asContext()); + if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) { + return hipErrorOutOfMemory; + } + } + + return hipSuccess; +} + +hipError_t FatBinaryInfo::AddDevProgram(const int device_id) { + // Device Id bounds Check + DeviceIdCheck(device_id); + + FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; + if (fbd_info == nullptr) { + return hipErrorInvalidKernelFile; + } + + // If fat binary was already added, skip this step and return success + if (fbd_info->add_dev_prog_ == false) { + amd::Context* ctx = g_devices[device_id]->asContext(); + if (CL_SUCCESS != fbd_info->program_->addDeviceProgram(*ctx->devices()[0], + fbd_info->binary_image_, + fbd_info->binary_size_, false, + nullptr, nullptr, fdesc_, + fbd_info->binary_offset_, uri_)) { + return hipErrorInvalidKernelFile; + } + fbd_info->add_dev_prog_ = true; + } + return hipSuccess; +} + +hipError_t FatBinaryInfo::BuildProgram(const int device_id) { + + // Device Id Check and Add DeviceProgram if not added so far + DeviceIdCheck(device_id); + IHIP_RETURN_ONFAIL(AddDevProgram(device_id)); + + // If Program was already built skip this step and return success + FatBinaryDeviceInfo* fbd_info = fatbin_dev_info_[device_id]; + if (fbd_info->prog_built_ == false) { + if(CL_SUCCESS != fbd_info->program_->build(g_devices[device_id]->devices(), + nullptr, nullptr, nullptr, + kOptionChangeable, kNewDevProg)) { + return hipErrorSharedObjectInitFailed; + } + fbd_info->prog_built_ = true; + } + + if (!fbd_info->program_->load()) { + return hipErrorSharedObjectInitFailed; + } + return hipSuccess; +} + +} //namespace : hip diff --git a/projects/clr/hipamd/src/hip_fatbin.hpp b/projects/clr/hipamd/src/hip_fatbin.hpp new file mode 100644 index 0000000000..d903550d81 --- /dev/null +++ b/projects/clr/hipamd/src/hip_fatbin.hpp @@ -0,0 +1,90 @@ +#ifndef HIP_FAT_BINARY_HPP +#define HIP_FAT_BINARY_HPP + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "hip_internal.hpp" +#include "platform/program.hpp" + +namespace hip { + +//Fat Binary Per Device info +class FatBinaryDeviceInfo { +public: + FatBinaryDeviceInfo (const void* binary_image, size_t binary_size, size_t binary_offset) + : binary_image_(binary_image), binary_size_(binary_size), + binary_offset_(binary_offset), program_(nullptr), + add_dev_prog_(false), prog_built_(false) {} + + ~FatBinaryDeviceInfo(); + +private: + const void* binary_image_; // binary image ptr + size_t binary_size_; // binary image size + size_t binary_offset_; // image offset from original + + amd::Program* program_; // reinterpreted as hipModule_t + friend class FatBinaryInfo; + + //Control Variables + bool add_dev_prog_; + bool prog_built_; +}; + + +// Fat Binary Info +class FatBinaryInfo { +public: + FatBinaryInfo(const char* fname, const void* image); + ~FatBinaryInfo(); + + // Loads Fat binary from file or image, unbundles COs for devices. + hipError_t ExtractFatBinaryUsingCOMGR(const std::vector& devices); + hipError_t ExtractFatBinary(const std::vector& devices); + hipError_t AddDevProgram(const int device_id); + hipError_t BuildProgram(const int device_id); + + + // Device Id bounds check + inline void DeviceIdCheck(const int device_id) const { + guarantee(device_id >= 0, "Invalid DeviceId less than 0"); + guarantee(static_cast(device_id) < fatbin_dev_info_.size(), "Invalid DeviceId, greater than no of fatbin device info!"); + } + + // Getter Methods + amd::Program* GetProgram(int device_id) { + DeviceIdCheck(device_id); + return fatbin_dev_info_[device_id]->program_; + } + + hipModule_t Module(int device_id) const { + DeviceIdCheck(device_id); + return reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); + } + + hipError_t GetModule(int device_id, hipModule_t* hmod) const { + DeviceIdCheck(device_id); + *hmod = reinterpret_cast(as_cl(fatbin_dev_info_[device_id]->program_)); + return hipSuccess; + } + +private: + std::string fname_; // File name + amd::Os::FileDesc fdesc_; // File descriptor + size_t fsize_; // Total file size + size_t foffset_; // File Offset where the fat binary is present. + + // Even when file is passed image will be mmapped till ~desctructor. + const void* image_; // Image + bool image_mapped_; // flag to detect if image is mapped + + // Only used for FBs where image is directly passed + std::string uri_; // Uniform resource indicator + + // Per Device Info, like corresponding binary ptr, size. + std::vector fatbin_dev_info_; +}; + +}; /* namespace hip */ + +#endif /* HIP_FAT_BINARY_HPP */ diff --git a/projects/clr/hipamd/src/hip_formatting.hpp b/projects/clr/hipamd/src/hip_formatting.hpp new file mode 100644 index 0000000000..00a27476be --- /dev/null +++ b/projects/clr/hipamd/src/hip_formatting.hpp @@ -0,0 +1,877 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ +#include +#include + +inline std::ostream& operator<<(std::ostream& os, const hipTextureFilterMode& s) { + switch (s) { + case hipFilterModePoint: + os << "hipFilterModePoint"; + break; + case hipFilterModeLinear: + os << "hipFilterModeLinear"; + break; + default: + os << "hipFilterModePoint"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureReadMode& s) { + switch (s) { + case hipReadModeElementType: + os << "hipReadModeElementType"; + break; + case hipReadModeNormalizedFloat: + os << "hipReadModeNormalizedFloat"; + break; + default: + os << "hipReadModeElementType"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureAddressMode& s) { + switch (s) { + case hipAddressModeWrap: + os << "hipAddressModeWrap"; + break; + case hipAddressModeClamp: + os << "hipAddressModeClamp"; + break; + case hipAddressModeMirror: + os << "hipAddressModeMirror"; + break; + case hipAddressModeBorder: + os << "hipAddressModeBorder"; + break; + default: + os << "hipAddressModeWrap"; + }; + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const hipMemcpyKind& s) { + switch (s) { + case hipMemcpyHostToHost: + os << "hipMemcpyHostToHost"; + break; + case hipMemcpyHostToDevice: + os << "hipMemcpyHostToDevice"; + break; + case hipMemcpyDeviceToHost: + os << "hipMemcpyDeviceToHost"; + break; + case hipMemcpyDeviceToDevice: + os << "hipMemcpyDeviceToDevice"; + break; + case hipMemcpyDefault: + os << "hipMemcpyDefault"; + break; + default: + os << "hipMemcpyDefault"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatKind& s) { + switch (s) { + case hipChannelFormatKindSigned: + os << "hipChannelFormatKindSigned"; + break; + case hipChannelFormatKindUnsigned: + os << "hipMemcpyHostToDevice"; + break; + case hipChannelFormatKindFloat: + os << "hipChannelFormatKindFloat"; + break; + case hipChannelFormatKindNone: + os << "hipChannelFormatKindNone"; + break; + default: + os << "hipChannelFormatKindNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipArray_Format& s) { + switch (s) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + os << "HIP_AD_FORMAT_UNSIGNED_INT8"; + break; + case HIP_AD_FORMAT_UNSIGNED_INT16: + os << "HIP_AD_FORMAT_UNSIGNED_INT16"; + break; + case HIP_AD_FORMAT_UNSIGNED_INT32: + os << "HIP_AD_FORMAT_UNSIGNED_INT32"; + break; + case HIP_AD_FORMAT_SIGNED_INT8: + os << "HIP_AD_FORMAT_SIGNED_INT8"; + break; + case HIP_AD_FORMAT_SIGNED_INT16: + os << "HIP_AD_FORMAT_SIGNED_INT16"; + break; + case HIP_AD_FORMAT_SIGNED_INT32: + os << "HIP_AD_FORMAT_SIGNED_INT32"; + break; + case HIP_AD_FORMAT_HALF: + os << "HIP_AD_FORMAT_HALF"; + break; + case HIP_AD_FORMAT_FLOAT: + os << "HIP_AD_FORMAT_FLOAT"; + break; + default: + os << "HIP_AD_FORMAT_FLOAT"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipResourceViewFormat& s) { + switch (s) { + case hipResViewFormatNone: + os << "hipResViewFormatNone"; + break; + case hipResViewFormatUnsignedChar1: + os << "hipResViewFormatUnsignedChar1"; + break; + case hipResViewFormatUnsignedChar2: + os << "hipResViewFormatUnsignedChar2"; + break; + case hipResViewFormatUnsignedChar4: + os << "hipResViewFormatUnsignedChar4"; + break; + case hipResViewFormatSignedChar1: + os << "hipResViewFormatSignedChar1"; + break; + case hipResViewFormatSignedChar2: + os << "hipResViewFormatSignedChar2"; + break; + case hipResViewFormatSignedChar4: + os << "hipResViewFormatSignedChar4"; + break; + case hipResViewFormatUnsignedShort1: + os << "hipResViewFormatUnsignedShort1"; + break; + case hipResViewFormatUnsignedShort2: + os << "hipResViewFormatUnsignedShort2"; + break; + case hipResViewFormatUnsignedShort4: + os << "hipResViewFormatUnsignedShort4"; + break; + case hipResViewFormatSignedShort1: + os << "hipResViewFormatSignedShort1"; + break; + case hipResViewFormatSignedShort2: + os << "hipResViewFormatSignedShort2"; + break; + case hipResViewFormatSignedShort4: + os << "hipResViewFormatSignedShort4"; + break; + case hipResViewFormatUnsignedInt1: + os << "hipResViewFormatUnsignedInt1"; + break; + case hipResViewFormatUnsignedInt2: + os << "hipResViewFormatUnsignedInt2"; + break; + case hipResViewFormatUnsignedInt4: + os << "hipResViewFormatUnsignedInt4"; + break; + case hipResViewFormatSignedInt1: + os << "hipResViewFormatSignedInt1"; + break; + case hipResViewFormatSignedInt2: + os << "hipResViewFormatSignedInt2"; + break; + case hipResViewFormatSignedInt4: + os << "hipResViewFormatSignedInt4"; + break; + case hipResViewFormatHalf1: + os << "hipResViewFormatHalf1"; + break; + case hipResViewFormatHalf2: + os << "hipResViewFormatHalf2"; + break; + case hipResViewFormatHalf4: + os << "hipResViewFormatHalf4"; + break; + case hipResViewFormatFloat1: + os << "hipResViewFormatFloat1"; + break; + case hipResViewFormatFloat2: + os << "hipResViewFormatFloat2"; + break; + case hipResViewFormatFloat4: + os << "hipResViewFormatFloat4"; + break; + case hipResViewFormatUnsignedBlockCompressed1: + os << "hipResViewFormatUnsignedBlockCompressed1"; + break; + case hipResViewFormatUnsignedBlockCompressed2: + os << "hipResViewFormatUnsignedBlockCompressed2"; + break; + case hipResViewFormatUnsignedBlockCompressed3: + os << "hipResViewFormatUnsignedBlockCompressed3"; + break; + case hipResViewFormatUnsignedBlockCompressed4: + os << "hipResViewFormatUnsignedBlockCompressed4"; + break; + case hipResViewFormatSignedBlockCompressed4: + os << "hipResViewFormatSignedBlockCompressed4"; + break; + case hipResViewFormatUnsignedBlockCompressed5: + os << "hipResViewFormatUnsignedBlockCompressed5"; + break; + case hipResViewFormatSignedBlockCompressed5: + os << "hipResViewFormatSignedBlockCompressed5"; + break; + case hipResViewFormatUnsignedBlockCompressed6H: + os << "hipResViewFormatUnsignedBlockCompressed6H"; + break; + case hipResViewFormatSignedBlockCompressed6H: + os << "hipResViewFormatSignedBlockCompressed6H"; + break; + case hipResViewFormatUnsignedBlockCompressed7: + os << "hipResViewFormatUnsignedBlockCompressed7"; + break; + default: + os << "hipResViewFormatNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipFunction_attribute& s) { + switch (s) { + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + os << "HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK"; + break; + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + os << "HIP_FUNC_ATTRIBUTE_NUM_REGS"; + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + os << "HIP_FUNC_ATTRIBUTE_PTX_VERSION"; + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + os << "HIP_FUNC_ATTRIBUTE_BINARY_VERSION"; + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + os << "HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA"; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + os << "HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES"; + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + os << "HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT"; + break; + case HIP_FUNC_ATTRIBUTE_MAX: + os << "HIP_FUNC_ATTRIBUTE_MAX"; + break; + default: + os << "HIP_FUNC_ATTRIBUTE_MAX"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hiprtcResult& s) { + switch (s) { + case HIPRTC_SUCCESS: + os << "HIPRTC_SUCCESS"; + break; + case HIPRTC_ERROR_OUT_OF_MEMORY: + os << "HIPRTC_ERROR_OUT_OF_MEMORY"; + break; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + os << "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + break; + case HIPRTC_ERROR_INVALID_INPUT: + os << "HIPRTC_ERROR_INVALID_INPUT"; + break; + case HIPRTC_ERROR_INVALID_PROGRAM: + os << "HIPRTC_ERROR_INVALID_PROGRAM"; + break; + case HIPRTC_ERROR_INVALID_OPTION: + os << "HIPRTC_ERROR_INVALID_OPTION"; + break; + case HIPRTC_ERROR_COMPILATION: + os << "HIPRTC_ERROR_COMPILATION"; + break; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + os << "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + break; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + os << "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + break; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + os << "IPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + break; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + os << "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + break; + case HIPRTC_ERROR_INTERNAL_ERROR: + os << "HIPRTC_ERROR_INTERNAL_ERROR"; + break; + default: + os << "HIPRTC_ERROR_INTERNAL_ERROR"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipJitOption& s) { + switch (s) { + case HIPRTC_JIT_MAX_REGISTERS: + os << "HIPRTC_JIT_MAX_REGISTERS"; + break; + case HIPRTC_JIT_THREADS_PER_BLOCK: + os << "HIPRTC_JIT_THREADS_PER_BLOCK"; + break; + case HIPRTC_JIT_WALL_TIME: + os << "HIPRTC_JIT_WALL_TIME"; + break; + case HIPRTC_JIT_INFO_LOG_BUFFER: + os << "HIPRTC_JIT_INFO_LOG_BUFFER"; + break; + case HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES: + os << "HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES"; + break; + case HIPRTC_JIT_ERROR_LOG_BUFFER: + os << "HIPRTC_JIT_ERROR_LOG_BUFFER"; + break; + case HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: + os << "HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES"; + break; + case HIPRTC_JIT_OPTIMIZATION_LEVEL: + os << "HIPRTC_JIT_OPTIMIZATION_LEVEL"; + break; + case HIPRTC_JIT_TARGET_FROM_HIPCONTEXT: + os << "HIPRTC_JIT_TARGET_FROM_HIPCONTEXT"; + break; + case HIPRTC_JIT_TARGET: + os << "HIPRTC_JIT_TARGET"; + break; + case HIPRTC_JIT_FALLBACK_STRATEGY: + os << "HIPRTC_JIT_FALLBACK_STRATEGY"; + break; + case HIPRTC_JIT_GENERATE_DEBUG_INFO: + os << "HIPRTC_JIT_GENERATE_DEBUG_INFO"; + break; + case HIPRTC_JIT_CACHE_MODE: + os << "HIPRTC_JIT_CACHE_MODE"; + break; + case HIPRTC_JIT_NEW_SM3X_OPT: + os << "HIPRTC_JIT_NEW_SM3X_OPT"; + break; + case HIPRTC_JIT_FAST_COMPILE: + os << "HIPRTC_JIT_FAST_COMPILE"; + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_NAMES: + os << "HIPRTC_JIT_GLOBAL_SYMBOL_NAMES"; + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS: + os << "HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS"; + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_COUNT: + os << "HIPRTC_JIT_GLOBAL_SYMBOL_COUNT"; + break; + case HIPRTC_JIT_LTO: + os << "HIPRTC_JIT_LTO"; + break; + case HIPRTC_JIT_FTZ: + os << "HIPRTC_JIT_FTZ"; + break; + case HIPRTC_JIT_PREC_DIV: + os << "HIPRTC_JIT_PREC_DIV"; + break; + case HIPRTC_JIT_PREC_SQRT: + os << "HIPRTC_JIT_PREC_SQRT"; + break; + case HIPRTC_JIT_FMA: + os << "HIPRTC_JIT_FMA"; + break; + case HIPRTC_JIT_NUM_OPTIONS: + os << "HIPRTC_JIT_NUM_OPTIONS"; + break; + default: + os << "HIPRTC_JIT_MAX_REGISTERS"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipFuncCache_t& s) { + switch (s) { + case hipFuncCachePreferNone: + os << "hipFuncCachePreferNone"; + break; + case hipFuncCachePreferShared: + os << "hipFuncCachePreferShared"; + break; + case hipFuncCachePreferL1: + os << "hipFuncCachePreferL1"; + break; + case hipFuncCachePreferEqual: + os << "hipFuncCachePreferEqual"; + break; + default: + os << "hipFuncCachePreferNone"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipSharedMemConfig& s) { + switch (s) { + case hipSharedMemBankSizeDefault: + os << "hipSharedMemBankSizeDefault"; + break; + case hipSharedMemBankSizeFourByte: + os << "hipSharedMemBankSizeFourByte"; + break; + case hipSharedMemBankSizeEightByte: + os << "hipSharedMemBankSizeEightByte"; + break; + default: + os << "hipSharedMemBankSizeDefault"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipDataType& s) { + switch (s) { + case HIP_R_16F: + os << "HIP_R_16F"; + break; + case HIP_R_32F: + os << "HIP_R_32F"; + break; + case HIP_R_64F: + os << "HIP_R_64F"; + break; + case HIP_C_16F: + os << "HIP_C_16F"; + break; + case HIP_C_32F: + os << "HIP_C_32F"; + break; + case HIP_C_64F: + os << "HIP_C_64F"; + break; + default: + os << "HIP_R_16F"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipLibraryPropertyType& s) { + switch (s) { + case HIP_LIBRARY_MAJOR_VERSION: + os << "HIP_LIBRARY_MAJOR_VERSION"; + break; + case HIP_LIBRARY_MINOR_VERSION: + os << "HIP_LIBRARY_MINOR_VERSION"; + break; + case HIP_LIBRARY_PATCH_LEVEL: + os << "HIP_LIBRARY_PATCH_LEVEL"; + break; + default: + os << "HIP_LIBRARY_MAJOR_VERSION"; + }; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hip_api_id_t& s) { + os << hip_api_name(s); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hip_api_id_t* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureDesc& s) { + os << '{' + << '{' + << s.addressMode[0] + << ',' + << s.addressMode[1] + << ',' + << s.addressMode[2] + << '}' + << ',' + << s.filterMode + << ',' + << s.readMode + << ',' + << s.sRGB + << ',' + << '{' + << s.borderColor[0] + << ',' + << s.borderColor[1] + << ',' + << s.borderColor[2] + << ',' + << s.borderColor[3] + << '}' + << ',' + << s.normalizedCoords + << ',' + << s.mipmapFilterMode + << ',' + << s.mipmapLevelBias + << ',' + << s.minMipmapLevelClamp + << ',' + << s.maxMipmapLevelClamp + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipTextureDesc* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const dim3& s) { + os << '{' + << s.x + << ',' + << s.y + << ',' + << s.z + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const dim3* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatDesc& s) { + os << '{' + << s.x + << ',' + << s.y + << ',' + << s.z + << ',' + << s.w + << ',' + << s.f + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipChannelFormatDesc* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipMipmappedArray& s) { + os << '{' + << s.data + << ',' + << s.desc + << ',' + << s.width + << ',' + << s.height + << ',' + << s.depth + << '}'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const hipMipmappedArray* s) { + if (s) { + os << *s; + } else { + os << "nullptr"; + } + return os; +} + + +inline std::ostream& operator<<(std::ostream& os, const hipResourceDesc& s) { + os << '{' + << s.resType + << ',' + << '{'; + + switch (s.resType) { + case hipResourceTypeLinear: + os << s.res.linear.devPtr + << ',' + << s.res.linear.desc + << ',' + << s.res.linear.sizeInBytes; + break; + case hipResourceTypePitch2D: + os << s.res.pitch2D.devPtr + << ',' + << s.res.pitch2D.desc + << ',' + << s.res.pitch2D.width + << ',' + << s.res.pitch2D.height + << ',' + << s.res.pitch2D.pitchInBytes; + break; + case hipResourceTypeArray: + os << s.res.array.array; + break; + case hipResourceTypeMipmappedArray: + os < +#include +#include "hip_conversions.hpp" + +namespace amd { +static std::once_flag interopOnce; +} +// Sets up GL context association with amd context. +// NOTE: Refer to Context setup code in OCLTestImp.cpp +void setupGLInteropOnce() { + amd::Context* amdContext = hip::getCurrentDevice()->asContext(); + +//current context will be read in amdContext->create + cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)AMD_PLATFORM, + ROCCLR_HIP_GL_CONTEXT_KHR, + (cl_context_properties) nullptr, +#ifdef _WIN32 + ROCCLR_HIP_WGL_HDC_KHR, + (cl_context_properties) nullptr, +#else + ROCCLR_HIP_GLX_DISPLAY_KHR, + (cl_context_properties) nullptr, +#endif + 0}; + + amd::Context::Info info; + if (CL_SUCCESS != amd::Context::checkProperties(properties, &info)) { + LogError("Context setup failed \n"); + return; + } + + amdContext->setInfo(info); + if (CL_SUCCESS != amdContext->create(properties)) { + LogError("Context setup failed \n"); + } +} + +static inline hipError_t hipSetInteropObjects(int num_objects, void** mem_objects, + std::vector& interopObjects) { + if ((num_objects == 0 && mem_objects != nullptr) || (num_objects != 0 && mem_objects == nullptr)) { + return hipErrorUnknown; + } + + while (num_objects-- > 0) { + void* obj = *mem_objects++; + if (obj == nullptr) { + return hipErrorInvalidResourceHandle; + } + + amd::Memory* mem = reinterpret_cast(obj); + + if (mem->getInteropObj() == nullptr) { + return hipErrorInvalidResourceHandle; + } + + interopObjects.push_back(mem); + } + return hipSuccess; +} + +// NOTE: This method cooresponds to OpenCL functionality in clGetGLContextInfoKHR() +hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, + unsigned int hipDeviceCount, hipGLDeviceList deviceList) { + HIP_INIT_API(hipGLGetDevices, pHipDeviceCount, pHipDevices, hipDeviceCount, deviceList); + + std::call_once(amd::interopOnce, setupGLInteropOnce); + + static const bool VALIDATE_ONLY = true; + if (deviceList == hipGLDeviceListNextFrame) { + LogError(" hipGLDeviceListNextFrame not supported yet.\n"); + HIP_RETURN(hipErrorNotSupported); + } + if (pHipDeviceCount == nullptr || pHipDevices == nullptr || hipDeviceCount == 0) { + LogError(" Invalid Argument \n"); + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceCount = std::min(hipDeviceCount, static_cast(g_devices.size())); + + amd::Context::Info info = hip::getCurrentDevice()->asContext()->info(); + if (!(info.flags_ & amd::Context::GLDeviceKhr)) { + LogError("Failed : Invalid Shared Group Reference \n"); + HIP_RETURN(hipErrorInvalidValue); + } + amd::GLFunctions* glenv = hip::getCurrentDevice()->asContext()->glenv(); + if (glenv != nullptr) { +#ifdef _WIN32 + info.hCtx_ = glenv->wglGetCurrentContext_(); +#else + info.hCtx_ = glenv->glXGetCurrentContext_(); +#endif + hip::getCurrentDevice()->asContext()->setInfo(info); + glenv->update(reinterpret_cast(info.hCtx_)); + } + *pHipDeviceCount = 0; + switch (deviceList) { + case hipGLDeviceListCurrentFrame: + for (int i = 0; i < hipDeviceCount; ++i) { + const std::vector& devices = g_devices[i]->devices(); + if (devices.size() > 0 && + devices[0]->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, VALIDATE_ONLY)) { + pHipDevices[0] = i; + *pHipDeviceCount = 1; + break; + } + } + break; + + case hipGLDeviceListAll: { + int foundDeviceCount = 0; + for (int i = 0; i < hipDeviceCount; ++i) { + const std::vector& devices = g_devices[i]->devices(); + if (devices.size() > 0 && + devices[0]->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, VALIDATE_ONLY)) { + pHipDevices[foundDeviceCount++] = i; + break; + } + } + + *pHipDeviceCount = foundDeviceCount; + } break; + + default: + LogWarning("Invalid deviceList value"); + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(*pHipDeviceCount > 0 ? hipSuccess : hipErrorNoDevice); +} + +static inline void clearGLErrors(const amd::Context& amdContext) { + GLenum glErr, glLastErr = GL_NO_ERROR; + while (1) { + glErr = amdContext.glenv()->glGetError_(); + if (glErr == GL_NO_ERROR || glErr == glLastErr) { + break; + } + glLastErr = glErr; + LogWarning("GL error"); + } +} + +static inline GLenum checkForGLError(const amd::Context& amdContext) { + GLenum glRetErr = GL_NO_ERROR; + GLenum glErr; + while (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + glRetErr = glErr; // Just return the last GL error + LogWarning("Check GL error"); + } + return glRetErr; +} + +hipError_t hipGraphicsSubResourceGetMappedArray(hipArray_t* array, hipGraphicsResource_t resource, + unsigned int arrayIndex, unsigned int mipLevel) { + HIP_INIT_API(hipGraphicsSubResourceGetMappedArray, array, resource, arrayIndex, mipLevel); + + amd::Context& amdContext = *(hip::getCurrentDevice()->asContext()); + if (array == nullptr || resource == nullptr) { + LogError("invalid array/resource"); + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Image* image = (reinterpret_cast(resource))->asImage(); + if (image == nullptr) { + LogError("invalid resource/image"); + HIP_RETURN(hipErrorInvalidValue); + } + // arrayIndex higher than zero not implmented + assert(arrayIndex == 0) ; + amd::Image * view = image->createView(amdContext, image->getImageFormat(), nullptr, mipLevel, 0); + + hipArray* myarray = new hipArray(); + + myarray->data = as_cl (view); + + myarray->width = view->getWidth(); + myarray->height = view->getHeight(); + myarray->depth = view->getDepth(); + + const cl_mem_object_type image_type = hip::getCLMemObjectType(myarray->width, myarray->height, myarray->depth, hipArrayDefault); + myarray->type = image_type; + amd::Image::Format f = image->getImageFormat(); + myarray->Format = hip::getCL2hipArrayFormat(f.image_channel_data_type); + myarray->desc = hip::getChannelFormatDesc(f.getNumChannels(), myarray->Format); + myarray->NumChannels = hip::getNumChannels(myarray->desc); + myarray->isDrv = 0; + myarray->textureType = 0; + *array = myarray; + { + amd::ScopedLock lock(hip::hipArraySetLock); + hip::hipArraySet.insert(*array); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image, GLenum target, + unsigned int flags) { + HIP_INIT_API(hipGraphicsGLRegisterImage, resource, image, target, flags); + + if (!((flags == hipGraphicsRegisterFlagsNone) || (flags & hipGraphicsRegisterFlagsReadOnly) || + (flags & hipGraphicsRegisterFlagsWriteDiscard) || + (flags & hipGraphicsRegisterFlagsSurfaceLoadStore) || + (flags & hipGraphicsRegisterFlagsTextureGather))) { + LogError("invalid parameter \"flags\""); + HIP_RETURN(hipErrorInvalidValue); + } + + if (resource == nullptr) { + LogError("invalid resource"); + HIP_RETURN(hipErrorInvalidValue); + } + + GLint miplevel = 0; + amd::Context& amdContext = *(hip::getCurrentDevice()->asContext()); + + if (amdContext.glenv() == nullptr) { + LogError("invalid context, gl interop not initialized"); + HIP_RETURN(hipErrorInvalidValue); + } + + amd::GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + LogWarning("\"amdContext\" is not created from GL context or share list \n"); + HIP_RETURN(hipErrorUnknown); + } + + amd::ImageGL* pImageGL = NULL; + GLenum glErr; + GLenum glTarget = 0; + GLenum glInternalFormat; + cl_image_format clImageFormat; + uint dim = 1; + cl_mem_object_type clType; + cl_gl_object_type clGLType; + GLsizei numSamples = 1; + + GLint gliTexWidth = 1; + GLint gliTexHeight = 1; + GLint gliTexDepth = 1; + + // Verify GL texture object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsTexture_(image)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + LogWarning("\"texture\" is not a GL texture object"); + HIP_RETURN(hipErrorUnknown); + } + + bool isImage = true; + + // Check target value validity + switch (target) { + case GL_TEXTURE_BUFFER: + glTarget = GL_TEXTURE_BUFFER; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D_BUFFER; + clGLType = CL_GL_OBJECT_TEXTURE_BUFFER; + isImage = false; + break; + + case GL_TEXTURE_1D: + glTarget = GL_TEXTURE_1D; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D; + clGLType = CL_GL_OBJECT_TEXTURE1D; + break; + + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + glTarget = GL_TEXTURE_CUBE_MAP; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_1D_ARRAY: + glTarget = GL_TEXTURE_1D_ARRAY; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE1D_ARRAY; + break; + + case GL_TEXTURE_2D: + glTarget = GL_TEXTURE_2D; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_MULTISAMPLE: + glTarget = GL_TEXTURE_2D_MULTISAMPLE; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_RECTANGLE_ARB: + glTarget = GL_TEXTURE_RECTANGLE_ARB; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_ARRAY: + glTarget = GL_TEXTURE_2D_ARRAY; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE2D_ARRAY; + break; + + case GL_TEXTURE_3D: + glTarget = GL_TEXTURE_3D; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE3D; + clGLType = CL_GL_OBJECT_TEXTURE3D; + break; + + default: + // wrong value + LogWarning("invalid \"target\" value"); + HIP_RETURN(hipErrorInvalidValue); + break; + } + amdContext.glenv()->glBindTexture_(glTarget, image); + + // Check if size is available - data store is created + if (isImage) { + // Check mipmap level for "texture" name + GLint gliTexBaseLevel; + GLint gliTexMaxLevel; + + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_BASE_LEVEL, &gliTexBaseLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get base mipmap level of a GL \"texture\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_MAX_LEVEL, &gliTexMaxLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get max mipmap level of a GL \"texture\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + + if ((gliTexBaseLevel > miplevel) || (miplevel > gliTexMaxLevel)) { + LogWarning("\"miplevel\" is not a valid mipmap level of the GL \"texture\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_SAMPLES, + (GLint*)&numSamples); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get numbers of samples of GL \"texture\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + if (numSamples > 1) { + LogWarning("MSAA \"texture\" object is not suppoerted for the device"); + HIP_RETURN(hipErrorInvalidValue); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + 0)) { //clFlags)) { + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + HIP_RETURN(hipErrorInvalidValue); + } + + switch (dim) { + case 3: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_DEPTH, + &gliTexDepth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get the depth of \"miplevel\" of GL \"texure\""); + HIP_RETURN(hipErrorInvalidValue); + } + // Fall trough to process other dimensions... + case 2: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_HEIGHT, + &gliTexHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get the height of \"miplevel\" of GL \"texure\""); + HIP_RETURN(hipErrorInvalidValue); + } + // Fall trough to process other dimensions... + case 1: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_WIDTH, + &gliTexWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get the width of \"miplevel\" of GL \"texure\""); + HIP_RETURN(hipErrorInvalidValue); + } + break; + default: + LogWarning("invalid \"target\" value"); + HIP_RETURN(hipErrorInvalidValue); + } + + } else { + GLint size; + + // In case target is GL_TEXTURE_BUFFER + GLint backingBuffer; + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_( + glTarget, 0, GL_TEXTURE_BUFFER_DATA_STORE_BINDING, &backingBuffer); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get backing buffer for GL \"texture buffer\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + amdContext.glenv()->glBindBuffer_(glTarget, backingBuffer); + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetIntegerv_(GL_TEXTURE_BUFFER_FORMAT_EXT, + reinterpret_cast(&glInternalFormat)); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + flags)) { + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + HIP_RETURN(hipErrorInvalidValue); + } + + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &size); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + HIP_RETURN(hipErrorInvalidValue); + } + + gliTexWidth = size / iBytesPerPixel; + } + size_t imageSize = (clType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? static_cast(gliTexHeight) + : static_cast(gliTexDepth); + + if (!amd::Image::validateDimensions( + amdContext.devices(), clType, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), imageSize)) { + LogWarning("The GL \"texture\" data store is not created or out of supported dimensions"); + HIP_RETURN(hipErrorInvalidValue); + } + target = (glTarget == GL_TEXTURE_CUBE_MAP) ? target : 0; + + pImageGL = new (amdContext) + amd::ImageGL(amdContext, clType, flags, clImageFormat, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), glTarget, + image, 0, glInternalFormat, clGLType, numSamples, target); + + if (!pImageGL) { + LogWarning("Cannot create class ImageGL - out of memory?"); + HIP_RETURN(hipErrorUnknown); + } + + if (!pImageGL->create()) { + pImageGL->release(); + HIP_RETURN(hipErrorUnknown); + } + // Create interop object + if (pImageGL->getInteropObj() == nullptr) { + LogWarning("cannot create object of class BufferGL"); + pImageGL->release(); + HIP_RETURN(hipErrorUnknown); + } + // Fixme: If more than one device is present in the context, we choose the first device. + // We should come up with a more elegant solution to handle this. + assert(amdContext.devices().size() == 1); + + const amd::Device& dev = *(amdContext.devices()[0]); + + device::Memory* mem = pImageGL->getDeviceMemory(dev); + if (nullptr == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", pImageGL->getSize()); + pImageGL->release(); + HIP_RETURN(hipErrorUnknown); + } + mem->processGLResource(device::Memory::GLDecompressResource); + + *resource = reinterpret_cast(pImageGL); + HIP_RETURN(hipSuccess); + +} + +hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer, + unsigned int flags) { + HIP_INIT_API(hipGraphicsGLRegisterBuffer, resource, buffer, flags); + + if (!((flags == hipGraphicsRegisterFlagsNone) || (flags & hipGraphicsRegisterFlagsReadOnly) || + (flags & hipGraphicsRegisterFlagsWriteDiscard))) { + LogError("invalid parameter \"flags\""); + HIP_RETURN(hipErrorInvalidValue); + } + + if (resource == nullptr) { + LogError("invalid resource"); + HIP_RETURN(hipErrorInvalidValue); + } + + amd::BufferGL* pBufferGL = nullptr; + GLenum glErr; + GLenum glTarget = GL_ARRAY_BUFFER; + GLint gliSize = 0; + GLint gliMapped = 0; + + amd::Context& amdContext = *(hip::getCurrentDevice()->asContext()); + + if (amdContext.glenv() == nullptr) { + LogError("invalid context, gl interop not initialized"); + HIP_RETURN(hipErrorInvalidValue); + } + + // Add this scope to bound the scoped lock + { + amd::GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + LogWarning("\"amdContext\" is not created from GL context or share list \n"); + HIP_RETURN(hipErrorUnknown); + } + + // Verify GL buffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsBuffer_(buffer)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + LogWarning("\"buffer\" is not a GL buffer object \n"); + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + // Check if size is available - data store is created + amdContext.glenv()->glBindBuffer_(glTarget, buffer); + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &gliSize); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + LogWarning("cannot get the GL buffer size \n"); + HIP_RETURN(hipErrorInvalidResourceHandle); + } + if (gliSize == 0) { + LogWarning("the GL buffer's data store is not created \n"); + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + } // Release scoped lock + + // Now create BufferGL object + pBufferGL = new (amdContext) amd::BufferGL(amdContext, flags, gliSize, 0, buffer); + + if (!pBufferGL) { + LogWarning("cannot create object of class BufferGL"); + HIP_RETURN(hipErrorUnknown); + } + + if (!pBufferGL->create()) { + pBufferGL->release(); + HIP_RETURN(hipErrorUnknown); + } + + // Create interop object + if (pBufferGL->getInteropObj() == nullptr) { + LogWarning("cannot create object of class BufferGL"); + HIP_RETURN(hipErrorUnknown); + } + + // Fixme: If more than one device is present in the context, we choose the first device. + // We should come up with a more elegant solution to handle this. + assert(amdContext.devices().size() == 1); + + const auto it = amdContext.devices().cbegin(); + const amd::Device& dev = *(*it); + + device::Memory* mem = pBufferGL->getDeviceMemory(dev); + if (nullptr == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", pBufferGL->getSize()); + HIP_RETURN(hipErrorUnknown); + } + mem->processGLResource(device::Memory::GLDecompressResource); + + *resource = reinterpret_cast(pBufferGL); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream) { + HIP_INIT_API(hipGraphicsMapResources, count, resources, stream); + amd::Context* amdContext = hip::getCurrentDevice()->asContext(); + if (!amdContext || !amdContext->glenv()) { + HIP_RETURN(hipErrorUnknown); + } + clearGLErrors(*amdContext); + amdContext->glenv()->glFinish_(); + if (checkForGLError(*amdContext) != GL_NO_ERROR) { + HIP_RETURN(hipErrorUnknown); + } + + hip::Stream* hip_stream = hip::getStream(stream); + if (nullptr == hip_stream) { + HIP_RETURN(hipErrorUnknown); + } + + if (!hip_stream->context().glenv() || !hip_stream->context().glenv()->isAssociated()) { + LogWarning("\"amdContext\" is not created from GL context or share list"); + HIP_RETURN(hipErrorUnknown); + } + + std::vector memObjects; + hipError_t err = hipSetInteropObjects(count, reinterpret_cast(resources), memObjects); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + amd::Command::EventWaitList nullWaitList; + + //! Now create command and enqueue + amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand( + *hip_stream, nullWaitList, count, memObjects, CL_COMMAND_ACQUIRE_GL_OBJECTS); + if (command == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + HIP_RETURN(hipErrorUnknown); + } + + command->enqueue(); + + // *not_null(event) = as_cl(&command->event()); + if (as_cl(&command->event()) == nullptr) { + command->release(); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, + hipGraphicsResource_t resource) { + HIP_INIT_API(hipGraphicsResourceGetMappedPointer, devPtr, size, resource); + amd::Context* amdContext = hip::getCurrentDevice()->asContext(); + if (!amdContext || !amdContext->glenv()) { + HIP_RETURN(hipErrorUnknown); + } + + // Fixme: If more than one device is present in the context, we choose the first device. + // We should come up with a more elegant solution to handle this. + assert(amdContext->devices().size() == 1); + + const auto it = amdContext->devices().cbegin(); + + amd::Device* curDev = *it; + amd::Memory* amdMem = reinterpret_cast(resource); + *size = amdMem->getSize(); + + // Interop resources don't have svm allocations they are added to + // amd::MemObjMap using device virtual address during creation. + device::Memory* mem = reinterpret_cast(amdMem->getDeviceMemory(*curDev)); + *devPtr = reinterpret_cast(static_cast(mem->virtualAddress())); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources, + hipStream_t stream) { + HIP_INIT_API(hipGraphicsUnmapResources, count, resources, stream); + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorContextIsDestroyed); + } + + // Wait for the current host queue + hip::getStream(stream)->finish(); + + hip::Stream* hip_stream = hip::getStream(stream); + if (nullptr == hip_stream) { + HIP_RETURN(hipErrorUnknown); + } + + std::vector memObjects; + hipError_t err = hipSetInteropObjects(count, reinterpret_cast(resources), memObjects); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + amd::Command::EventWaitList nullWaitList; + + // Now create command and enqueue + amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand( + *hip_stream, nullWaitList, count, memObjects, CL_COMMAND_RELEASE_GL_OBJECTS); + if (command == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + HIP_RETURN(hipErrorUnknown); + } + + command->enqueue(); + + if (as_cl(&command->event()) == nullptr) { + command->release(); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphicsUnregisterResource(hipGraphicsResource_t resource) { + HIP_INIT_API(hipGraphicsUnregisterResource, resource); + + amd::BufferGL* pBufferGL = reinterpret_cast(resource); + delete pBufferGL; + + HIP_RETURN(hipSuccess); +} diff --git a/projects/clr/hipamd/src/hip_global.cpp b/projects/clr/hipamd/src/hip_global.cpp new file mode 100644 index 0000000000..5759cb715e --- /dev/null +++ b/projects/clr/hipamd/src/hip_global.cpp @@ -0,0 +1,235 @@ +#include "hip_global.hpp" + +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_code_object.hpp" +#include "platform/program.hpp" +#include + +const char* amd_dbgapi_get_build_name(void) { + return HIP_VERSION_BUILD_NAME; +} + +const char* amd_dbgapi_get_git_hash() { + return HIP_VERSION_GITHASH; +} + +size_t amd_dbgapi_get_build_id() { + return HIP_VERSION_BUILD_ID; +} + +#ifdef __HIP_ENABLE_PCH +extern const char __hip_pch_wave32[]; +extern const char __hip_pch_wave64[]; +extern unsigned __hip_pch_wave32_size; +extern unsigned __hip_pch_wave64_size; +void __hipGetPCH(const char** pch, unsigned int *size) { + hipDeviceProp_t deviceProp; + int deviceId; + hipError_t error = hipGetDevice(&deviceId); + error = hipGetDeviceProperties(&deviceProp, deviceId); + if (deviceProp.warpSize == 32) { + *pch = __hip_pch_wave32; + *size = __hip_pch_wave32_size; + } else { + *pch = __hip_pch_wave64; + *size = __hip_pch_wave64_size; + } +} +#endif +namespace hip { + +//Device Vars +DeviceVar::DeviceVar(std::string name, + hipModule_t hmod, + int deviceId) : + shadowVptr(nullptr), name_(name), + amd_mem_obj_(nullptr), device_ptr_(nullptr), + size_(0) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + device::Program* dev_program = + program->getDeviceProgram(*g_devices.at(deviceId)->devices()[0]); + + if (dev_program == nullptr) { + LogPrintfError("Cannot get Device Program for module: 0x%x \n", hmod); + guarantee(false, "Cannot get Device Program"); + } + + if(!dev_program->createGlobalVarObj(&amd_mem_obj_, &device_ptr_, &size_, name.c_str())) { + LogPrintfError("Cannot create Global Var obj for symbol: %s \n", name.c_str()); + guarantee(false, "Cannot create GlobalVar Obj"); + } + + // Handle size 0 symbols + if (size_ != 0) { + if (amd_mem_obj_ == nullptr || device_ptr_ == nullptr) { + LogPrintfError("Cannot get memory for creating device Var: %s", name.c_str()); + guarantee(false, "Cannot get memory for creating device var"); + } + amd::MemObjMap::AddMemObj(device_ptr_, amd_mem_obj_); + } +} + +DeviceVar::~DeviceVar() { + if (amd_mem_obj_ != nullptr) { + amd::MemObjMap::RemoveMemObj(device_ptr_); + amd_mem_obj_->release(); + } + + if (shadowVptr != nullptr) { + textureReference* texRef = reinterpret_cast(shadowVptr); + hipError_t err = ihipUnbindTexture(texRef); + delete texRef; + shadowVptr = nullptr; + } + + device_ptr_ = nullptr; + size_ = 0; +} + +//Device Functions +DeviceFunc::DeviceFunc(std::string name, hipModule_t hmod) : dflock_("function lock"), + name_(name), kernel_(nullptr) { + amd::Program* program = as_amd(reinterpret_cast(hmod)); + + const amd::Symbol *symbol = program->findSymbol(name.c_str()); + if (symbol == nullptr) { + LogPrintfError("Cannot find Symbol with name: %s \n", name.c_str()); + guarantee(false, "Cannot find Symbol"); + } + + kernel_ = new amd::Kernel(*program, *symbol, name); + if (kernel_ == nullptr) { + LogPrintfError("Cannot create kernel with name: %s \n", name.c_str()); + guarantee(false, "Cannot Create kernel"); + } +} + +DeviceFunc::~DeviceFunc() { + if (kernel_ != nullptr) { + kernel_->release(); + } +} + +//Abstract functions +Function::Function(const std::string& name, FatBinaryInfo** modules) + : name_(name), modules_(modules) { + dFunc_.resize(g_devices.size()); +} + +Function::~Function() { + for (auto& elem : dFunc_) { + delete elem; + } + name_ = ""; + modules_ = nullptr; +} + +hipError_t Function::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod) { + guarantee((dFunc_.size() == g_devices.size()), "dFunc Size mismatch"); + if (dFunc_[ihipGetDevice()] == nullptr) { + dFunc_[ihipGetDevice()] = new DeviceFunc(name_, hmod); + } + *hfunc = dFunc_[ihipGetDevice()]->asHipFunction(); + + return hipSuccess; +} + +hipError_t Function::getStatFunc(hipFunction_t* hfunc, int deviceId) { + guarantee(modules_ != nullptr, "Module not initialized"); + + hipModule_t hmod = nullptr; + IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); + IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); + + if (dFunc_[deviceId] == nullptr) { + dFunc_[deviceId] = new DeviceFunc(name_, hmod); + } + *hfunc = dFunc_[deviceId]->asHipFunction(); + + return hipSuccess; +} + +hipError_t Function::getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId) { + guarantee((modules_ != nullptr), "Module not initialized"); + + hipModule_t hmod = nullptr; + IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); + IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); + + if (dFunc_[deviceId] == nullptr) { + dFunc_[deviceId] = new DeviceFunc(name_, hmod); + } + + const std::vector& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false); + + amd::Kernel* kernel = dFunc_[deviceId]->kernel(); + const device::Kernel::WorkGroupInfo* wginfo = kernel->getDeviceKernel(*devices[deviceId])->workGroupInfo(); + func_attr->sharedSizeBytes = static_cast(wginfo->localMemSize_); + func_attr->binaryVersion = static_cast(kernel->signature().version()); + func_attr->cacheModeCA = 0; + func_attr->constSizeBytes = 0; + func_attr->localSizeBytes = wginfo->privateMemSize_; + func_attr->maxDynamicSharedSizeBytes = static_cast(wginfo->availableLDSSize_ + - wginfo->localMemSize_); + + func_attr->maxThreadsPerBlock = static_cast(wginfo->size_); + func_attr->numRegs = static_cast(wginfo->usedVGPRs_); + func_attr->preferredShmemCarveout = 0; + func_attr->ptxVersion = 30; + + + return hipSuccess; +} + +//Abstract Vars +Var::Var(const std::string& name, DeviceVarKind dVarKind, size_t size, int type, int norm, + FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind), size_(size), + type_(type), norm_(norm), modules_(modules), managedVarPtr_(nullptr), align_(0) { + dVar_.resize(g_devices.size()); +} + +Var::Var(const std::string& name, DeviceVarKind dVarKind, void *pointer, size_t size, + unsigned align, FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind), + size_(size), modules_(modules), managedVarPtr_(pointer), align_(align), + type_(0), norm_(0) { + dVar_.resize(g_devices.size()); +} + +Var::~Var() { + for (auto& elem : dVar_) { + delete elem; + } + modules_ = nullptr; +} + +hipError_t Var::getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod) { + guarantee((deviceId >= 0), "Invalid DeviceId, less than zero"); + guarantee((static_cast(deviceId) < g_devices.size()), + "Invalid DeviceId, greater than no of code objects"); + guarantee((dVar_.size() == g_devices.size()), + "Device Var not initialized to size"); + + if (dVar_[deviceId] == nullptr) { + dVar_[deviceId] = new DeviceVar(name_, hmod, deviceId); + } + + *dvar = dVar_[deviceId]; + return hipSuccess; +} + +hipError_t Var::getStatDeviceVar(DeviceVar** dvar, int deviceId) { + guarantee((deviceId >= 0) , "Invalid DeviceId, less than zero"); + guarantee((static_cast(deviceId) < g_devices.size()), + "Invalid DeviceId, greater than no of code objects"); + if (dVar_[deviceId] == nullptr) { + hipModule_t hmod = nullptr; + IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); + IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); + dVar_[deviceId] = new DeviceVar(name_, hmod, deviceId); + } + *dvar = dVar_[deviceId]; + return hipSuccess; +} + +}; //namespace: hip diff --git a/projects/clr/hipamd/src/hip_global.hpp b/projects/clr/hipamd/src/hip_global.hpp new file mode 100644 index 0000000000..f769e85af2 --- /dev/null +++ b/projects/clr/hipamd/src/hip_global.hpp @@ -0,0 +1,128 @@ +#ifndef HIP_GLOBAL_HPP +#define HIP_GLOBAL_HPP + +#include +#include + +#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_fatbin.hpp" +#include "platform/program.hpp" + +namespace hip { + +//Forward Declaration +class CodeObject; + +//Device Structures +class DeviceVar { +public: + DeviceVar(std::string name, hipModule_t hmod, int deviceId); + ~DeviceVar(); + + //Accessors for device ptr and size, populated during constructor. + hipDeviceptr_t device_ptr() const { return device_ptr_; } + size_t size() const { return size_; } + std::string name() const { return name_; } + void* shadowVptr; + +private: + std::string name_; //Name of the var + amd::Memory* amd_mem_obj_; //amd_mem_obj abstraction + hipDeviceptr_t device_ptr_; //Device Pointer + size_t size_; //Size of the var +}; + +class DeviceFunc { +public: + DeviceFunc(std::string name, hipModule_t hmod); + ~DeviceFunc(); + + amd::Monitor dflock_; + + //Converts DeviceFunc to hipFunction_t(used by app) and vice versa. + hipFunction_t asHipFunction() { return reinterpret_cast(this); } + static DeviceFunc* asFunction(hipFunction_t f) { return reinterpret_cast(f); } + + //Accessor for kernel_ and name_ populated during constructor. + std::string name() const { return name_; } + amd::Kernel* kernel() const { return kernel_; } + +private: + std::string name_; //name of the func(not unique identifier) + amd::Kernel* kernel_; //Kernel ptr referencing to ROCclr Symbol +}; + +//Abstract Structures +class Function { +public: + Function(const std::string& name, FatBinaryInfo** modules=nullptr); + ~Function(); + + //Return DeviceFunc for this this dynamically loaded module + hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod); + + //Return Device Func & attr . Generate/build if not already done so. + hipError_t getStatFunc(hipFunction_t *hfunc, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, int deviceId); + void resize_dFunc(size_t size) { dFunc_.resize(size); } + FatBinaryInfo** moduleInfo() { return modules_; } + const std::string& name() const { return name_; } + +private: + std::vector dFunc_; //DeviceFuncObj per Device + std::string name_; //name of the func(not unique identifier) + FatBinaryInfo** modules_; // static module where it is referenced +}; + +class Var { +public: + //Types of variable + enum DeviceVarKind { + DVK_Variable = 0, + DVK_Surface, + DVK_Texture, + DVK_Managed + }; + + Var(const std::string& name, DeviceVarKind dVarKind, size_t size, int type, int norm, + FatBinaryInfo** modules = nullptr); + + Var(const std::string& name, DeviceVarKind dVarKind, void *pointer, size_t size, unsigned align, + FatBinaryInfo** modules = nullptr); + + ~Var(); + + //Return DeviceVar for this dynamically loaded module + hipError_t getDeviceVar(DeviceVar** dvar, int deviceId, hipModule_t hmod); + + //Return DeviceVar for module Generate/build if not already done so. + hipError_t getStatDeviceVar(DeviceVar** dvar, int deviceId); + void resize_dVar(size_t size) { dVar_.resize(size); } + + FatBinaryInfo** moduleInfo() { return modules_; }; + DeviceVarKind getVarKind() const { return dVarKind_; } + size_t getSize() const { return size_; } + + void* getManagedVarPtr() { return managedVarPtr_; }; + void setManagedVarInfo(void* pointer, size_t size) { + managedVarPtr_ = pointer; + size_ = size; + dVarKind_ = DVK_Managed; + } +private: + std::vector dVar_; // DeviceVarObj per Device + std::string name_; // Variable name (not unique identifier) + DeviceVarKind dVarKind_; // Variable kind + size_t size_; // Size of the variable + int type_; // Type(Textures/Surfaces only) + int norm_; // Type(Textures/Surfaces only) + FatBinaryInfo** modules_; // static module where it is referenced + + void *managedVarPtr_; // Managed memory pointer with size_ & align_ + unsigned int align_; // Managed memory alignment +}; + +}; //namespace: hip +#endif /* HIP_GLOBAL_HPP */ diff --git a/projects/clr/hipamd/src/hip_graph.cpp b/projects/clr/hipamd/src/hip_graph.cpp new file mode 100644 index 0000000000..deeea225ad --- /dev/null +++ b/projects/clr/hipamd/src/hip_graph.cpp @@ -0,0 +1,2456 @@ +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "top.hpp" +#include "hip_graph_internal.hpp" +#include "platform/command.hpp" +#include "hip_conversions.hpp" +#include "hip_platform.hpp" +#include "hip_event.hpp" +#include "hip_mempool_impl.hpp" + + +std::vector g_captureStreams; +amd::Monitor g_captureStreamsLock{"StreamCaptureGlobalList"}; +static amd::Monitor g_streamSetLock{"StreamCaptureset"}; +std::unordered_set g_allCapturingStreams; + +inline hipError_t ihipGraphAddNode(hipGraphNode_t graphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + bool capture = true) { + graph->AddNode(graphNode); + std::unordered_set DuplicateDep; + for (size_t i = 0; i < numDependencies; i++) { + if ((!hipGraphNode::isNodeValid(pDependencies[i])) || + (graph != pDependencies[i]->GetParentGraph())) { + return hipErrorInvalidValue; + } + if (DuplicateDep.find(pDependencies[i]) != DuplicateDep.end()) { + return hipErrorInvalidValue; + } + DuplicateDep.insert(pDependencies[i]); + pDependencies[i]->AddEdge(graphNode); + } + if (capture == false) { + { + amd::ScopedLock lock(g_streamSetLock); + for (auto stream : g_allCapturingStreams) { + if (stream->GetCaptureGraph() == graph) { + graph->AddManualNodeDuringCapture(graphNode); + break; + } + } + } + } + return hipSuccess; +} + +hipError_t ihipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipKernelNodeParams* pNodeParams, bool capture = true) { + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || pNodeParams == nullptr || + pNodeParams->func == nullptr) { + return hipErrorInvalidValue; + } + + if (!ihipGraph::isGraphValid(graph)) { + return hipErrorInvalidValue; + } + + // If neither 'kernelParams' or 'extra' are provided or if both are provided, return error + if ((pNodeParams->kernelParams == nullptr && pNodeParams->extra == nullptr) || + (pNodeParams->kernelParams != nullptr && pNodeParams->extra != nullptr)) { + return hipErrorInvalidValue; + } + + hipError_t status = hipGraphKernelNode::validateKernelParams(pNodeParams); + if (hipSuccess != status) { + return status; + } + + size_t globalWorkSizeX = static_cast(pNodeParams->gridDim.x) * pNodeParams->blockDim.x; + size_t globalWorkSizeY = static_cast(pNodeParams->gridDim.y) * pNodeParams->blockDim.y; + size_t globalWorkSizeZ = static_cast(pNodeParams->gridDim.z) * pNodeParams->blockDim.z; + if (globalWorkSizeX > std::numeric_limits::max() || + globalWorkSizeY > std::numeric_limits::max() || + globalWorkSizeZ > std::numeric_limits::max()) { + return hipErrorInvalidConfiguration; + } + + *pGraphNode = new hipGraphKernelNode(pNodeParams); + status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, capture); + return status; +} + +hipError_t ihipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemcpy3DParms* pCopyParams, bool capture = true) { + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || pCopyParams == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = ihipMemcpy3D_validate(pCopyParams); + if (status != hipSuccess) { + return status; + } + *pGraphNode = new hipGraphMemcpyNode(pCopyParams); + status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, capture); + return status; +} + +hipError_t ihipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dst, const void* src, size_t count, hipMemcpyKind kind, + bool capture = true) { + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || count ==0) { + return hipErrorInvalidValue; + } + hipError_t status = hipGraphMemcpyNode1D::ValidateParams(dst, src, count, kind); + if (status != hipSuccess) { + return status; + } + *pGraphNode = new hipGraphMemcpyNode1D(dst, src, count, kind); + status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, capture); + return status; +} + +hipError_t ihipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemsetParams* pMemsetParams, bool capture = true) { + if (pGraphNode == nullptr || graph == nullptr || pMemsetParams == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || pMemsetParams->height == 0) { + return hipErrorInvalidValue; + } + // The element size must be 1, 2, or 4 bytes + if (pMemsetParams->elementSize != sizeof(int8_t) && + pMemsetParams->elementSize != sizeof(int16_t) && + pMemsetParams->elementSize != sizeof(int32_t)) { + return hipErrorInvalidValue; + } + + hipError_t status; + status = ihipGraphMemsetParams_validate(pMemsetParams); + if (status != hipSuccess) { + return status; + } + if (pMemsetParams->height == 1) { + status = + ihipMemset_validate(pMemsetParams->dst, pMemsetParams->value, pMemsetParams->elementSize, + pMemsetParams->width * pMemsetParams->elementSize); + } else { + if (pMemsetParams->pitch < (pMemsetParams->width * pMemsetParams->elementSize)) { + return hipErrorInvalidValue; + } + auto sizeBytes = pMemsetParams->width * pMemsetParams->height * pMemsetParams->elementSize * 1; + status = ihipMemset3D_validate( + {pMemsetParams->dst, pMemsetParams->pitch, pMemsetParams->width, pMemsetParams->height}, + pMemsetParams->value, {pMemsetParams->width, pMemsetParams->height, 1}, sizeBytes); + } + if (status != hipSuccess) { + return status; + } + *pGraphNode = new hipGraphMemsetNode(pMemsetParams); + status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, capture); + return status; +} + +hipError_t capturehipLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim, + dim3& blockDim, void**& args, size_t& sharedMemBytes) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node kernel launch on stream : %p", stream); + + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipKernelNodeParams nodeParams; + nodeParams.func = const_cast(hostFunction); + nodeParams.blockDim = blockDim; + nodeParams.extra = nullptr; + nodeParams.gridDim = gridDim; + nodeParams.kernelParams = args; + nodeParams.sharedMemBytes = sharedMemBytes; + + hipGraphNode_t pGraphNode; + hipError_t status = + ihipGraphAddKernelNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &nodeParams); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t ihipExtLaunchKernel(hipStream_t stream, hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, void** kernelParams, + void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, + uint32_t flags, bool capture = true) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + + hipGraphNode_t pGraphNode; + hipError_t status; + if (startEvent != nullptr) { + pGraphNode = new hipGraphEventRecordNode(startEvent); + status = ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), capture); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + } + hipKernelNodeParams nodeParams; + nodeParams.func = f; + nodeParams.blockDim = dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ); + nodeParams.extra = extra; + nodeParams.gridDim = dim3(globalWorkSizeX / localWorkSizeX, globalWorkSizeY / localWorkSizeY, + globalWorkSizeZ / localWorkSizeZ); + nodeParams.kernelParams = kernelParams; + nodeParams.sharedMemBytes = sharedMemBytes; + status = + ihipGraphAddKernelNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &nodeParams); + + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + if (stopEvent != nullptr) { + pGraphNode = new hipGraphEventRecordNode(stopEvent); + status = ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size()); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + } + return hipSuccess; +} + +hipError_t capturehipExtModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f, + uint32_t& globalWorkSizeX, uint32_t& globalWorkSizeY, + uint32_t& globalWorkSizeZ, uint32_t& localWorkSizeX, + uint32_t& localWorkSizeY, uint32_t& localWorkSizeZ, + size_t& sharedMemBytes, void**& kernelParams, + void**& extra, hipEvent_t& startEvent, + hipEvent_t& stopEvent, uint32_t& flags) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node Ext Module launch kernel on stream : %p", stream); + return ihipExtLaunchKernel(stream, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, + kernelParams, extra, startEvent, stopEvent, flags); +} + +hipError_t capturehipExtLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim, + dim3& blockDim, void**& args, size_t& sharedMemBytes, + hipEvent_t& startEvent, hipEvent_t& stopEvent, int& flags) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node Ext kernel launch on stream : %p", stream); + return ihipExtLaunchKernel( + stream, reinterpret_cast(const_cast(hostFunction)), + gridDim.x * blockDim.x, gridDim.y * blockDim.y, gridDim.z * blockDim.z, blockDim.x, + blockDim.y, blockDim.z, sharedMemBytes, args, nullptr, startEvent, stopEvent, flags); +} + +hipError_t capturehipModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f, uint32_t& gridDimX, + uint32_t& gridDimY, uint32_t& gridDimZ, uint32_t& blockDimX, + uint32_t& blockDimY, uint32_t& blockDimZ, + uint32_t& sharedMemBytes, void**& kernelParams, + void**& extra) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node module launch kernel launch on stream : %p", stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipKernelNodeParams nodeParams; + nodeParams.func = f; + nodeParams.blockDim = {blockDimX, blockDimY, blockDimZ}; + nodeParams.extra = extra; + nodeParams.gridDim = {gridDimX, gridDimY, gridDimZ}; + nodeParams.kernelParams = kernelParams; + nodeParams.sharedMemBytes = sharedMemBytes; + + hipGraphNode_t pGraphNode; + hipError_t status = + ihipGraphAddKernelNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &nodeParams); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpy3DAsync(hipStream_t& stream, const hipMemcpy3DParms*& p) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node Memcpy3D on stream : %p", + stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpy2DAsync(hipStream_t& stream, void*& dst, size_t& dpitch, + const void*& src, size_t& spitch, size_t& width, size_t& height, + hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node Memcpy2D on stream : %p", + stream); + if (dst == nullptr || src == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.kind = kind; + p.srcPtr.ptr = const_cast(src); + p.srcPtr.pitch = spitch; + p.srcArray = nullptr; // Ignored. + + p.dstPtr.ptr = const_cast(dst); + p.dstPtr.pitch = dpitch; + p.dstArray = nullptr; // Ignored. + + p.extent = {width, height, 1}; + + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, size_t& dpitch, + hipArray_const_t& src, size_t& wOffsetSrc, + size_t& hOffsetSrc, size_t& width, size_t& height, + hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node Memcpy2DFromArray on stream : %p", stream); + if (src == nullptr || dst == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.srcPos = {wOffsetSrc, hOffsetSrc, 0}; + p.kind = kind; + p.srcPtr.ptr = nullptr; + p.srcArray = const_cast(src); // Ignored. + + p.kind = kind; + p.dstPtr.ptr = dst; + p.dstArray = nullptr; // Ignored. + p.dstPtr.pitch = dpitch; + p.extent = {width / hip::getElementSize(p.srcArray), height, 1}; + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpyFromArrayAsync(hipStream_t& stream, void*& dst, hipArray_const_t& src, + size_t& wOffsetSrc, size_t& hOffsetSrc, size_t& count, + hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node Memcpy2DFromArray on stream : %p", stream); + if (src == nullptr || dst == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.srcPos = {wOffsetSrc, hOffsetSrc, 0}; + p.kind = kind; + p.srcPtr.ptr = nullptr; + p.srcArray = const_cast(src); + + p.kind = kind; + p.dstPtr.ptr = dst; + p.dstArray = nullptr; // Ignored. + p.dstPtr.pitch = 0; + const size_t arrayHeight = (src->height != 0) ? src->height : 1; + const size_t widthInBytes = count / arrayHeight; + const size_t height = (count / src->width) / hip::getElementSize(src); + p.extent = {widthInBytes / hip::getElementSize(p.srcArray), height, 1}; + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray*& dst, size_t& wOffset, + size_t& hOffset, const void*& src, size_t& spitch, + size_t& width, size_t& height, hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node Memcpy2DFromArray on stream : %p", stream); + if (src == nullptr || dst == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.dstPos = {wOffset, hOffset, 0}; + p.kind = kind; + p.dstPtr.ptr = nullptr; + p.dstArray = dst; // Ignored. + + p.kind = kind; + p.srcPtr.ptr = const_cast(src); + p.srcArray = nullptr; // Ignored. + p.srcPtr.pitch = spitch; + p.extent = {width / hip::getElementSize(p.dstArray), height, 1}; + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpyToArrayAsync(hipStream_t& stream, hipArray_t& dst, size_t& wOffset, + size_t& hOffset, const void*& src, size_t& count, + hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node Memcpy2DFromArray on stream : %p", stream); + if (src == nullptr || dst == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.dstPos = {wOffset, hOffset, 0}; + p.kind = kind; + p.dstPtr.ptr = nullptr; + p.dstArray = dst; // Ignored. + + p.kind = kind; + p.srcPtr.ptr = const_cast(src); + p.srcArray = nullptr; // Ignored. + p.srcPtr.pitch = 0; + const size_t arrayHeight = (dst->height != 0) ? dst->height : 1; + const size_t widthInBytes = count / arrayHeight; + const size_t height = (count / dst->width) / hip::getElementSize(dst); + p.extent = {widthInBytes / hip::getElementSize(p.dstArray), height, 1}; + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpyParam2DAsync(hipStream_t& stream, const hip_Memcpy2D*& pCopy) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node MemcpyParam2D on stream : %p", stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.srcArray = pCopy->srcArray; + p.srcPos = {pCopy->srcXInBytes, pCopy->srcY, 0}; + p.srcPtr.pitch = pCopy->srcPitch; + if (pCopy->srcDevice != nullptr) { + p.srcPtr.ptr = pCopy->srcDevice; + } + if (pCopy->srcHost != nullptr) { + p.srcPtr.ptr = const_cast(pCopy->srcHost); + } + p.dstArray = pCopy->dstArray; + p.dstPos = {pCopy->dstXInBytes, pCopy->dstY, 0}; + p.dstPtr.pitch = pCopy->srcPitch; + if (pCopy->dstDevice != nullptr) { + p.dstPtr.ptr = pCopy->dstDevice; + } + if (pCopy->dstHost != nullptr) { + p.dstPtr.ptr = const_cast(pCopy->dstHost); + } + p.extent = {pCopy->WidthInBytes, pCopy->Height, 1}; + if (pCopy->srcMemoryType == hipMemoryTypeHost && pCopy->dstMemoryType == hipMemoryTypeDevice) { + p.kind = hipMemcpyHostToDevice; + } else if (pCopy->srcMemoryType == hipMemoryTypeDevice && + pCopy->dstMemoryType == hipMemoryTypeHost) { + p.kind = hipMemcpyDeviceToHost; + } else if (pCopy->srcMemoryType == hipMemoryTypeDevice && + pCopy->dstMemoryType == hipMemoryTypeDevice) { + p.kind = hipMemcpyDeviceToDevice; + } + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray*& srcArray, + size_t& srcOffset, size_t& ByteCount) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node MemcpyParam2D on stream : %p", stream); + if (srcArray == nullptr || dstHost == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.srcArray = srcArray; + p.srcPos = {srcOffset, 0, 0}; + p.dstPtr.ptr = dstHost; + p.extent = {ByteCount / hip::getElementSize(p.srcArray), 1, 1}; + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray*& dstArray, size_t& dstOffset, + const void*& srcHost, size_t& ByteCount) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node MemcpyParam2D on stream : %p", stream); + if (dstArray == nullptr || srcHost == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipMemcpy3DParms p = {}; + memset(&p, 0, sizeof(p)); + p.dstArray = dstArray; + p.dstPos = {dstOffset, 0, 0}; + p.srcPtr.ptr = const_cast(srcHost); + p.extent = {ByteCount / hip::getElementSize(p.dstArray), 1, 1}; + hipError_t status = + ihipGraphAddMemcpyNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &p); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpy(hipStream_t stream, void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + std::vector pDependencies = s->GetLastCapturedNodes(); + size_t numDependencies = s->GetLastCapturedNodes().size(); + hipGraph_t graph = s->GetCaptureGraph(); + hipError_t status = ihipMemcpy_validate(dst, src, sizeBytes, kind); + if (status != hipSuccess) { + return status; + } + hipGraphNode_t node = new hipGraphMemcpyNode1D(dst, src, sizeBytes, kind); + status = ihipGraphAddNode(node, graph, pDependencies.data(), numDependencies); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(node); + return hipSuccess; +} + +hipError_t capturehipMemcpyAsync(hipStream_t& stream, void*& dst, const void*& src, + size_t& sizeBytes, hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node Memcpy1D on stream : %p", + stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + return capturehipMemcpy(stream, dst, src, sizeBytes, kind); +} + +hipError_t capturehipMemcpyHtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice, void*& srcHost, + size_t& ByteCount, hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node MemcpyHtoD on stream : %p", + stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + return capturehipMemcpy(stream, dstDevice, srcHost, ByteCount, kind); +} + +hipError_t capturehipMemcpyDtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice, + hipDeviceptr_t& srcDevice, size_t& ByteCount, + hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node hipMemcpyDtoD on stream : %p", stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + return capturehipMemcpy(stream, dstDevice, srcDevice, ByteCount, kind); +} + +hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost, hipDeviceptr_t& srcDevice, + size_t& ByteCount, hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node hipMemcpyDtoH on stream : %p", stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + return capturehipMemcpy(stream, dstHost, srcDevice, ByteCount, kind); +} + +hipError_t capturehipMemcpyFromSymbolAsync(hipStream_t& stream, void*& dst, const void*& symbol, + size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node MemcpyFromSymbolNode on stream : %p", stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr); + if (status != hipSuccess) { + HIP_RETURN(status); + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode = + new hipGraphMemcpyNodeFromSymbol(dst, symbol, sizeBytes, offset, kind); + status = ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size()); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemcpyToSymbolAsync(hipStream_t& stream, const void*& symbol, const void*& src, + size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node MemcpyToSymbolNode on stream : %p", stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr); + if (status != hipSuccess) { + HIP_RETURN(status); + } + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode = new hipGraphMemcpyNodeToSymbol(symbol, src, sizeBytes, offset, kind); + status = ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size()); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemsetAsync(hipStream_t& stream, void*& dst, int& value, size_t& valueSize, + size_t& sizeBytes) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node Memset1D on stream : %p", + stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hipMemsetParams memsetParams = {0}; + memsetParams.dst = dst; + memsetParams.value = value; + memsetParams.elementSize = valueSize; + memsetParams.width = sizeBytes / valueSize; + memsetParams.height = 1; + + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipError_t status = + ihipGraphAddMemsetNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &memsetParams); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemset2DAsync(hipStream_t& stream, void*& dst, size_t& pitch, int& value, + size_t& width, size_t& height) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node Memset2D on stream : %p", + stream); + hipMemsetParams memsetParams = {0}; + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + memsetParams.dst = dst; + memsetParams.value = value; + memsetParams.width = width; + memsetParams.height = height; + memsetParams.pitch = pitch; + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode; + hipError_t status = + ihipGraphAddMemsetNode(&pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size(), &memsetParams); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +hipError_t capturehipMemset3DAsync(hipStream_t& stream, hipPitchedPtr& pitchedDevPtr, int& value, + hipExtent& extent) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node Memset3D on stream : %p", + stream); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + return hipSuccess; +} + +hipError_t capturehipEventRecord(hipStream_t& stream, hipEvent_t& event) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node EventRecord on stream : %p, Event %p", stream, event); + if (event == nullptr) { + return hipErrorInvalidHandle; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Event* e = reinterpret_cast(event); + e->StartCapture(stream); + hip::Stream* s = reinterpret_cast(stream); + s->SetCaptureEvent(event); + std::vector lastCapturedNodes = s->GetLastCapturedNodes(); + if (!lastCapturedNodes.empty()) { + e->SetNodesPrevToRecorded(lastCapturedNodes); + } + return hipSuccess; +} + +hipError_t capturehipStreamWaitEvent(hipEvent_t& event, hipStream_t& stream, unsigned int& flags) { + ClPrint(amd::LOG_INFO, amd::LOG_API, + "[hipGraph] current capture node StreamWaitEvent on stream : %p, Event %p", stream, + event); + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + hip::Event* e = reinterpret_cast(event); + + if (event == nullptr || stream == nullptr) { + return hipErrorInvalidValue; + } + if (!s->IsOriginStream()) { + s->SetCaptureGraph(reinterpret_cast(e->GetCaptureStream())->GetCaptureGraph()); + s->SetCaptureId(reinterpret_cast(e->GetCaptureStream())->GetCaptureID()); + s->SetCaptureMode(reinterpret_cast(e->GetCaptureStream())->GetCaptureMode()); + s->SetParentStream(e->GetCaptureStream()); + reinterpret_cast(s->GetParentStream())->SetParallelCaptureStream(stream); + } + s->AddCrossCapturedNode(e->GetNodesPrevToRecorded()); + return hipSuccess; +} + +hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*& userData) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] current capture node host on stream : %p", + stream); + if (fn == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hipHostNodeParams hostParams = {0}; + hostParams.fn = fn; + hostParams.userData = userData; + hip::Stream* s = reinterpret_cast(stream); + hipGraphNode_t pGraphNode = new hipGraphHostNode(&hostParams); + hipError_t status = + ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size()); + if (status != hipSuccess) { + return status; + } + s->SetLastCapturedNode(pGraphNode); + return hipSuccess; +} + +// ================================================================================================ +hipError_t capturehipMallocAsync(hipStream_t stream, hipMemPool_t mem_pool, + size_t size, void** dev_ptr) { + auto s = reinterpret_cast(stream); + auto mpool = reinterpret_cast(mem_pool); + + hipMemAllocNodeParams node_params{}; + + node_params.poolProps.allocType = hipMemAllocationTypePinned; + node_params.poolProps.location.id = mpool->Device()->deviceId(); + node_params.poolProps.location.type = hipMemLocationTypeDevice; + + std::vector descs; + for (const auto device : g_devices ) { + hipMemLocation location{hipMemLocationTypeDevice, device->deviceId()}; + hipMemAccessFlags flags{}; + mpool->GetAccess(device, &flags); + descs.push_back({location, flags}); + } + + node_params.accessDescs = &descs[0]; + node_params.accessDescCount = descs.size(); + node_params.bytesize = size; + + auto mem_alloc_node = new hipGraphMemAllocNode(&node_params); + auto status = ihipGraphAddNode(mem_alloc_node, s->GetCaptureGraph(), + s->GetLastCapturedNodes().data(), s->GetLastCapturedNodes().size()); + if (status != hipSuccess) { + return status; + } + // Execute the node during capture, so runtime can return a valid device pointer + *dev_ptr = mem_alloc_node->Execute(s); + s->SetLastCapturedNode(mem_alloc_node); + + return hipSuccess; +} + +// ================================================================================================ +hipError_t capturehipFreeAsync(hipStream_t stream, void* dev_ptr) { + hip::Stream* s = reinterpret_cast(stream); + auto mem_free_node = new hipGraphMemFreeNode(dev_ptr); + auto status = ihipGraphAddNode(mem_free_node, s->GetCaptureGraph(), + s->GetLastCapturedNodes().data(), s->GetLastCapturedNodes().size()); + if (status != hipSuccess) { + return status; + } + // Execute the node during capture, so runtime can release memory into cache + mem_free_node->Execute(s); + s->SetLastCapturedNode(mem_free_node); + return hipSuccess; +} + +// ================================================================================================ +hipError_t hipStreamIsCapturing_common(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus) { + if (pCaptureStatus == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + if (hip::Stream::StreamCaptureBlocking() == true && stream == nullptr) { + return hipErrorStreamCaptureImplicit; + } + if (stream == nullptr) { + *pCaptureStatus = hipStreamCaptureStatusNone; + } else { + *pCaptureStatus = reinterpret_cast(stream)->GetCaptureStatus(); + } + return hipSuccess; +} + +hipError_t hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus) { + HIP_INIT_API(hipStreamIsCapturing, stream, pCaptureStatus); + HIP_RETURN(hipStreamIsCapturing_common(stream, pCaptureStatus)); +} + +hipError_t hipStreamIsCapturing_spt(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus) { + HIP_INIT_API(hipStreamIsCapturing, stream, pCaptureStatus); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamIsCapturing_common(stream, pCaptureStatus)); +} + +hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode) { + HIP_INIT_API(hipThreadExchangeStreamCaptureMode, mode); + + if (mode == nullptr || *mode < hipStreamCaptureModeGlobal || + *mode > hipStreamCaptureModeRelaxed) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto oldMode = hip::tls.stream_capture_mode_; + hip::tls.stream_capture_mode_ = *mode; + *mode = oldMode; + + HIP_RETURN_DURATION(hipSuccess); +} + +hipError_t hipStreamBeginCapture_common(hipStream_t stream, hipStreamCaptureMode mode) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + // capture cannot be initiated on legacy stream + if (stream == nullptr) { + return hipErrorStreamCaptureUnsupported; + } + if (mode < hipStreamCaptureModeGlobal || mode > hipStreamCaptureModeRelaxed) { + return hipErrorInvalidValue; + } + hip::Stream* s = reinterpret_cast(stream); + // It can be initiated if the stream is not already in capture mode + if (s->GetCaptureStatus() == hipStreamCaptureStatusActive) { + return hipErrorIllegalState; + } + + s->SetCaptureGraph(new ihipGraph(s->GetDevice())); + s->SetCaptureId(); + s->SetCaptureMode(mode); + s->SetOriginStream(); + if (mode != hipStreamCaptureModeRelaxed) { + hip::tls.capture_streams_.push_back(s); + } + if (mode == hipStreamCaptureModeGlobal) { + amd::ScopedLock lock(g_captureStreamsLock); + g_captureStreams.push_back(s); + } + { + amd::ScopedLock lock(g_streamSetLock); + g_allCapturingStreams.insert(s); + } + return hipSuccess; +} + +hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode) { + HIP_INIT_API(hipStreamBeginCapture, stream, mode); + HIP_RETURN_DURATION(hipStreamBeginCapture_common(stream, mode)); +} + +hipError_t hipStreamBeginCapture_spt(hipStream_t stream, hipStreamCaptureMode mode) { + HIP_INIT_API(hipStreamBeginCapture, stream, mode); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN_DURATION(hipStreamBeginCapture_common(stream, mode)); +} + +hipError_t hipStreamEndCapture_common(hipStream_t stream, hipGraph_t* pGraph) { + if (pGraph == nullptr) { + return hipErrorInvalidValue; + } + if (stream == nullptr) { + return hipErrorIllegalState; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + // Capture status must be active before endCapture can be initiated + if (s->GetCaptureStatus() == hipStreamCaptureStatusNone) { + return hipErrorIllegalState; + } + // Capture must be ended on the same stream in which it was initiated + if (!s->IsOriginStream()) { + return hipErrorStreamCaptureUnmatched; + } + // If mode is not hipStreamCaptureModeRelaxed, hipStreamEndCapture must be called on the stream + // from the same thread + const auto& it = std::find(hip::tls.capture_streams_.begin(), hip::tls.capture_streams_.end(), s); + if (s->GetCaptureMode() != hipStreamCaptureModeRelaxed) { + if (it == hip::tls.capture_streams_.end()) { + return hipErrorStreamCaptureWrongThread; + } + hip::tls.capture_streams_.erase(it); + } + if (s->GetCaptureMode() == hipStreamCaptureModeGlobal) { + amd::ScopedLock lock(g_captureStreamsLock); + g_captureStreams.erase(std::find(g_captureStreams.begin(), g_captureStreams.end(), s)); + } + // If capture was invalidated, due to a violation of the rules of stream capture + if (s->GetCaptureStatus() == hipStreamCaptureStatusInvalidated) { + *pGraph = nullptr; + return hipErrorStreamCaptureInvalidated; + } + { + amd::ScopedLock lock(g_streamSetLock); + g_allCapturingStreams.erase(std::find(g_allCapturingStreams.begin(), g_allCapturingStreams.end(), s)); + } + // check if all parallel streams have joined + // Nodes that are removed from the dependency set via API hipStreamUpdateCaptureDependencies do + // not result in hipErrorStreamCaptureUnjoined + // add temporary node to check if all parallel streams have joined + hipGraphNode_t pGraphNode; + pGraphNode = new hipGraphEmptyNode(); + hipError_t status = + ihipGraphAddNode(pGraphNode, s->GetCaptureGraph(), s->GetLastCapturedNodes().data(), + s->GetLastCapturedNodes().size()); + + if (s->GetCaptureGraph()->GetLeafNodeCount() > 1) { + std::vector leafNodes = s->GetCaptureGraph()->GetLeafNodes(); + std::unordered_set nodes = s->GetCaptureGraph()->GetManualNodesDuringCapture(); + for (auto node : nodes) { + leafNodes.erase(std::find(leafNodes.begin(), leafNodes.end(), node)); + } + const std::vector& removedDepNodes = s->GetRemovedDependencies(); + bool foundInRemovedDep = false; + for (auto leafNode : leafNodes) { + for (auto node : removedDepNodes) { + if (node == leafNode) { + foundInRemovedDep = true; + } + } + } + // remove temporary node + s->GetCaptureGraph()->RemoveNode(pGraphNode); + s->GetCaptureGraph()->RemoveManualNodesDuringCapture(); + if (leafNodes.size() > 1 && foundInRemovedDep == false) { + return hipErrorStreamCaptureUnjoined; + } + } else { + // remove temporary node + s->GetCaptureGraph()->RemoveNode(pGraphNode); + } + *pGraph = s->GetCaptureGraph(); + // end capture on all streams/events part of graph capture + return s->EndCapture(); +} + +hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph) { + HIP_INIT_API(hipStreamEndCapture, stream, pGraph); + HIP_RETURN_DURATION(hipStreamEndCapture_common(stream, pGraph)); +} + +hipError_t hipStreamEndCapture_spt(hipStream_t stream, hipGraph_t* pGraph) { + HIP_INIT_API(hipStreamEndCapture, stream, pGraph); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN_DURATION(hipStreamEndCapture_common(stream, pGraph)); +} + +hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags) { + HIP_INIT_API(hipGraphCreate, pGraph, flags); + if ((pGraph == nullptr) || (flags != 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + *pGraph = new ihipGraph(hip::getCurrentDevice()); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphDestroy(hipGraph_t graph) { + HIP_INIT_API(hipGraphDestroy, graph); + if (graph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + // if graph is not valid its destroyed already + if (!ihipGraph::isGraphValid(graph)) { + HIP_RETURN(hipErrorIllegalState); + } + delete graph; + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipKernelNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphAddKernelNode, pGraphNode, graph, pDependencies, numDependencies, + pNodeParams); + if (pGraphNode == nullptr || graph == nullptr || pNodeParams == nullptr || + (numDependencies > 0 && pDependencies == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN_DURATION(ihipGraphAddKernelNode(pGraphNode, graph, pDependencies, numDependencies, + pNodeParams, false)); +} + +hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemcpy3DParms* pCopyParams) { + HIP_INIT_API(hipGraphAddMemcpyNode, pGraphNode, graph, pDependencies, numDependencies, + pCopyParams); + + HIP_RETURN_DURATION(ihipGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, + pCopyParams, false)); +} + +hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dst, const void* src, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipGraphAddMemcpyNode1D, pGraphNode, graph, pDependencies, numDependencies, dst, src, + count, kind); + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN_DURATION(ihipGraphAddMemcpyNode1D(pGraphNode, graph, pDependencies, numDependencies, + dst, src, count, kind, false)); +} + +hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, const void* src, + size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipGraphMemcpyNodeSetParams1D, node, dst, src, count, kind); + if (!hipGraphNode::isNodeValid(node) || dst == nullptr || src == nullptr || count == 0 || + src == dst) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(reinterpret_cast(node)->SetParams(dst, src, count, kind)); +} + +hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* src, size_t count, + hipMemcpyKind kind) { + HIP_INIT_API(hipGraphExecMemcpyNodeSetParams1D, hGraphExec, node, dst, src, count, kind); + if (hGraphExec == nullptr || !hipGraphNode::isNodeValid(node) || dst == nullptr || + src == nullptr || count == 0 || src == dst) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(dst, src, count, kind)); +} + +hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemsetParams* pMemsetParams) { + HIP_INIT_API(hipGraphAddMemsetNode, pGraphNode, graph, pDependencies, numDependencies, + pMemsetParams); + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN_DURATION(ihipGraphAddMemsetNode(pGraphNode, graph, pDependencies, numDependencies, + pMemsetParams, false)); +} + +hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies) { + HIP_INIT_API(hipGraphAddEmptyNode, pGraphNode, graph, pDependencies, numDependencies); + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + *pGraphNode = new hipGraphEmptyNode(); + hipError_t status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, false); + HIP_RETURN(status); +} + +hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipGraph_t childGraph) { + HIP_INIT_API(hipGraphAddChildGraphNode, pGraphNode, pDependencies, numDependencies, childGraph); + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || childGraph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *pGraphNode = new hipChildGraphNode(childGraph); + hipError_t status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, false); + HIP_RETURN(status); +} + +hipError_t ihipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, + uint64_t flags = 0) { + if (pGraphExec == nullptr || graph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + std::unordered_map clonedNodes; + hipGraph_t clonedGraph = graph->clone(clonedNodes); + if (clonedGraph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + std::vector> parallelLists; + std::unordered_map> nodeWaitLists; + std::unordered_set graphExeUserObj; + clonedGraph->GetRunList(parallelLists, nodeWaitLists); + std::vector levelOrder; + clonedGraph->LevelOrder(levelOrder); + clonedGraph->GetUserObjs(graphExeUserObj); + *pGraphExec = + new hipGraphExec(levelOrder, parallelLists, nodeWaitLists, clonedNodes, + graphExeUserObj, flags); + if (*pGraphExec != nullptr) { + return (*pGraphExec)->Init(); + } else { + return hipErrorOutOfMemory; + } +} + +hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize) { + HIP_INIT_API(hipGraphInstantiate, pGraphExec, graph); + HIP_RETURN_DURATION(ihipGraphInstantiate(pGraphExec, graph)); +} + +hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, + unsigned long long flags = 0) { + HIP_INIT_API(hipGraphInstantiateWithFlags, pGraphExec, graph, flags); + if (pGraphExec == nullptr || graph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // invalid flag check + if (flags != 0 && flags != hipGraphInstantiateFlagAutoFreeOnLaunch) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN_DURATION(ihipGraphInstantiate(pGraphExec, graph, flags)); +} + +hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec) { + HIP_INIT_API(hipGraphExecDestroy, pGraphExec); + if (pGraphExec == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + delete pGraphExec; + HIP_RETURN(hipSuccess); +} + +hipError_t ihipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + return graphExec->Run(stream); +} + +hipError_t hipGraphLaunch_common(hipGraphExec_t graphExec, hipStream_t stream) { + if (graphExec == nullptr || !hipGraphExec::isGraphExecValid(graphExec)) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + return ihipGraphLaunch(graphExec, stream); +} + +hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { + HIP_INIT_API(hipGraphLaunch, graphExec, stream); + HIP_RETURN_DURATION(hipGraphLaunch_common(graphExec, stream)); +} + +hipError_t hipGraphLaunch_spt(hipGraphExec_t graphExec, hipStream_t stream) { + HIP_INIT_API(hipGraphLaunch, graphExec, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN_DURATION(hipGraphLaunch_common(graphExec, stream)); +} + +hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, size_t* numNodes) { + HIP_INIT_API(hipGraphGetNodes, graph, nodes, numNodes); + if (graph == nullptr || numNodes == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + std::vector graphNodes; + graph->LevelOrder(graphNodes); + if (nodes == nullptr) { + *numNodes = graphNodes.size(); + HIP_RETURN(hipSuccess); + } else if (*numNodes <= graphNodes.size()) { + for (int i = 0; i < *numNodes; i++) { + nodes[i] = graphNodes[i]; + } + } else { + for (int i = 0; i < graphNodes.size(); i++) { + nodes[i] = graphNodes[i]; + } + for (int i = graphNodes.size(); i < *numNodes; i++) { + nodes[i] = nullptr; + } + *numNodes = graphNodes.size(); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, + size_t* pNumRootNodes) { + HIP_INIT_API(hipGraphGetRootNodes, graph, pRootNodes, pNumRootNodes); + + if (graph == nullptr || pNumRootNodes == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + const std::vector nodes = graph->GetRootNodes(); + if (pRootNodes == nullptr) { + *pNumRootNodes = nodes.size(); + HIP_RETURN(hipSuccess); + } else if (*pNumRootNodes <= nodes.size()) { + for (int i = 0; i < *pNumRootNodes; i++) { + pRootNodes[i] = nodes[i]; + } + } else { + for (int i = 0; i < nodes.size(); i++) { + pRootNodes[i] = nodes[i]; + } + for (int i = nodes.size(); i < *pNumRootNodes; i++) { + pRootNodes[i] = nullptr; + } + *pNumRootNodes = nodes.size(); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphKernelNodeGetParams, node, pNodeParams); + if (!hipGraphNode::isNodeValid(node) || pNodeParams == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(pNodeParams); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphKernelNodeSetParams, node, pNodeParams); + if (!hipGraphNode::isNodeValid(node) || pNodeParams == nullptr || pNodeParams->func == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(node)->SetParams(pNodeParams)); +} + +hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms* pNodeParams) { + HIP_INIT_API(hipGraphMemcpyNodeGetParams, node, pNodeParams); + if (!hipGraphNode::isNodeValid(node) || pNodeParams == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(pNodeParams); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + const hipKernelNodeAttrValue* value) { + HIP_INIT_API(hipGraphKernelNodeSetAttribute, hNode, attr, value); + if (hNode == nullptr || value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (attr != hipKernelNodeAttributeAccessPolicyWindow && + attr != hipKernelNodeAttributeCooperative) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(hNode)->SetAttrParams(attr, value)); +} + +hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, + hipKernelNodeAttrValue* value) { + HIP_INIT_API(hipGraphKernelNodeGetAttribute, hNode, attr, value); + if (hNode == nullptr || value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (attr != hipKernelNodeAttributeAccessPolicyWindow && + attr != hipKernelNodeAttributeCooperative) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(hNode)->GetAttrParams(attr, value)); +} + +hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms* pNodeParams) { + HIP_INIT_API(hipGraphMemcpyNodeSetParams, node, pNodeParams); + if (!hipGraphNode::isNodeValid(node) || pNodeParams == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(node)->SetParams(pNodeParams)); +} + +hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipMemcpy3DParms* pNodeParams) { + HIP_INIT_API(hipGraphExecMemcpyNodeSetParams, hGraphExec, node, pNodeParams); + if (hGraphExec == nullptr || !hipGraphNode::isNodeValid(node)) { + HIP_RETURN(hipErrorInvalidValue); + } + if (ihipMemcpy3D_validate(pNodeParams) != hipSuccess) { + HIP_RETURN(hipErrorInvalidValue); + } + // Check if pNodeParams passed is a empty struct + if (((pNodeParams->srcArray == 0) && (pNodeParams->srcPtr.ptr == nullptr)) || + ((pNodeParams->dstArray == 0) && (pNodeParams->dstPtr.ptr == nullptr))) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(pNodeParams)); +} + +hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams* pNodeParams) { + HIP_INIT_API(hipGraphMemsetNodeGetParams, node, pNodeParams); + if (!hipGraphNode::isNodeValid(node) || pNodeParams == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(pNodeParams); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams* pNodeParams) { + HIP_INIT_API(hipGraphMemsetNodeSetParams, node, pNodeParams); + if (!hipGraphNode::isNodeValid(node) || pNodeParams == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (pNodeParams->height > 1 && + pNodeParams->pitch < (pNodeParams->width * pNodeParams->elementSize)) { + return hipErrorInvalidValue; + } + HIP_RETURN(reinterpret_cast(node)->SetParams(pNodeParams)); +} + +hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipMemsetParams* pNodeParams) { + HIP_INIT_API(hipGraphExecMemsetNodeSetParams, hGraphExec, node, pNodeParams); + if (hGraphExec == nullptr || !hipGraphNode::isNodeValid(node) || pNodeParams == nullptr || + pNodeParams->dst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (ihipGraphMemsetParams_validate(pNodeParams) != hipSuccess) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(pNodeParams)); +} + +hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies) { + HIP_INIT_API(hipGraphAddDependencies, graph, from, to, numDependencies); + if (graph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (numDependencies == 0) { + HIP_RETURN(hipSuccess); + } else if (from == nullptr || to == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + for (size_t i = 0; i < numDependencies; i++) { + // When the same node is specified for both from and to + if (from[i] == nullptr || to[i] == nullptr || from[i] == to[i] || + !hipGraphNode::isNodeValid(to[i]) || !hipGraphNode::isNodeValid(from[i]) || + // making sure the nodes blong to the graph + to[i]->GetParentGraph() != graph || from[i]->GetParentGraph() != graph) { + HIP_RETURN(hipErrorInvalidValue); + } + } + + for (size_t i = 0; i < numDependencies; i++) { + // When the same edge added from->to return invalid value + const std::vector& edges = from[i]->GetEdges(); + for (auto edge : edges) { + if (edge == to[i]) { + HIP_RETURN(hipErrorInvalidValue); + } + } + from[i]->AddEdge(to[i]); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipKernelNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphExecKernelNodeSetParams, hGraphExec, node, pNodeParams); + if (hGraphExec == nullptr || !hipGraphNode::isNodeValid(node) || pNodeParams == nullptr || + pNodeParams->func == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(pNodeParams)); +} + +hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph) { + HIP_INIT_API(hipGraphChildGraphNodeGetGraph, node, pGraph); + if (!hipGraphNode::isNodeValid(node) || pGraph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *pGraph = reinterpret_cast(node)->GetChildGraph(); + if (*pGraph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + hipGraph_t childGraph) { + HIP_INIT_API(hipGraphExecChildGraphNodeSetParams, hGraphExec, node, childGraph); + if (hGraphExec == nullptr || !hipGraphNode::isNodeValid(node) || childGraph == nullptr || + !ihipGraph::isGraphValid(childGraph)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (childGraph == node->GetParentGraph()) { + HIP_RETURN(hipErrorUnknown); + } + + // Validate whether the topology of node and childGraph matches + std::vector childGraphNodes1; + node->LevelOrder(childGraphNodes1); + + std::vector childGraphNodes2; + childGraph->LevelOrder(childGraphNodes2); + + if (childGraphNodes1.size() != childGraphNodes2.size()) { + HIP_RETURN(hipErrorUnknown); + } + // Validate if the node insertion order matches + else { + for (std::vector::size_type i = 0; i != childGraphNodes1.size(); i++) { + if (childGraphNodes1[i]->GetType() != childGraphNodes2[i]->GetType()) { + HIP_RETURN(hipErrorUnknown); + } + } + } + + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(childGraph)); +} + +hipError_t hipStreamGetCaptureInfo_common(hipStream_t stream, + hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId) { + if (pCaptureStatus == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + if (hip::Stream::StreamCaptureBlocking() == true && stream == nullptr) { + return hipErrorStreamCaptureImplicit; + } + if (stream == nullptr) { + *pCaptureStatus = hipStreamCaptureStatusNone; + return hipSuccess; + } + hip::Stream* s = reinterpret_cast(stream); + *pCaptureStatus = s->GetCaptureStatus(); + if (*pCaptureStatus == hipStreamCaptureStatusActive && pId != nullptr) { + *pId = s->GetCaptureID(); + } + return hipSuccess; +} + +hipError_t hipStreamGetCaptureInfo(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId) { + HIP_INIT_API(hipStreamGetCaptureInfo, stream, pCaptureStatus, pId); + HIP_RETURN(hipStreamGetCaptureInfo_common(stream, pCaptureStatus, pId)); +} + +hipError_t hipStreamGetCaptureInfo_spt(hipStream_t stream, hipStreamCaptureStatus* pCaptureStatus, + unsigned long long* pId) { + HIP_INIT_API(hipStreamGetCaptureInfo, stream, pCaptureStatus, pId); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamGetCaptureInfo_common(stream, pCaptureStatus, pId)); +} + +hipError_t hipStreamGetCaptureInfo_v2_common(hipStream_t stream, + hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out, hipGraph_t* graph_out, + const hipGraphNode_t** dependencies_out, + size_t* numDependencies_out) { + if (captureStatus_out == nullptr) { + return hipErrorInvalidValue; + } + if (hip::Stream::StreamCaptureBlocking() == true && stream == nullptr) { + return hipErrorStreamCaptureImplicit; + } + if (stream == nullptr) { + *captureStatus_out = hipStreamCaptureStatusNone; + return hipSuccess; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* s = reinterpret_cast(stream); + *captureStatus_out = s->GetCaptureStatus(); + if (*captureStatus_out == hipStreamCaptureStatusActive) { + if (id_out != nullptr) { + *id_out = s->GetCaptureID(); + } + if (graph_out != nullptr) { + *graph_out = s->GetCaptureGraph(); + } + if (dependencies_out != nullptr) { + *dependencies_out = s->GetLastCapturedNodes().data(); + } + if (numDependencies_out != nullptr) { + *numDependencies_out = s->GetLastCapturedNodes().size(); + } + } + return hipSuccess; +} + +hipError_t hipStreamGetCaptureInfo_v2(hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out, hipGraph_t* graph_out, + const hipGraphNode_t** dependencies_out, + size_t* numDependencies_out) { + HIP_INIT_API(hipStreamGetCaptureInfo_v2, stream, captureStatus_out, id_out, graph_out, + dependencies_out, numDependencies_out); + HIP_RETURN(hipStreamGetCaptureInfo_v2_common(stream, captureStatus_out, id_out, graph_out, + dependencies_out, numDependencies_out)); +} + +hipError_t hipStreamGetCaptureInfo_v2_spt(hipStream_t stream, + hipStreamCaptureStatus* captureStatus_out, + unsigned long long* id_out, hipGraph_t* graph_out, + const hipGraphNode_t** dependencies_out, + size_t* numDependencies_out) { + HIP_INIT_API(hipStreamGetCaptureInfo_v2, stream, captureStatus_out, id_out, graph_out, + dependencies_out, numDependencies_out); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamGetCaptureInfo_v2_common(stream, captureStatus_out, id_out, graph_out, + dependencies_out, numDependencies_out)); +} + +hipError_t hipStreamUpdateCaptureDependencies(hipStream_t stream, hipGraphNode_t* dependencies, + size_t numDependencies, unsigned int flags) { + HIP_INIT_API(hipStreamUpdateCaptureDependencies, stream, dependencies, numDependencies, flags); + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorContextIsDestroyed); + } + hip::Stream* s = reinterpret_cast(stream); + if (s->GetCaptureStatus() == hipStreamCaptureStatusNone) { + HIP_RETURN(hipErrorIllegalState); + } + if ((s->GetCaptureGraph()->GetNodeCount() < numDependencies) || + (numDependencies > 0 && dependencies == nullptr) || + (flags != 0 && flags != hipStreamAddCaptureDependencies && + flags != hipStreamSetCaptureDependencies)) { + HIP_RETURN(hipErrorInvalidValue); + } + std::vector depNodes; + const std::vector& graphNodes = s->GetCaptureGraph()->GetNodes(); + for (int i = 0; i < numDependencies; i++) { + if ((dependencies[i] == nullptr) || + std::find(std::begin(graphNodes), std::end(graphNodes), dependencies[i]) == std::end(graphNodes)) { + HIP_RETURN(hipErrorInvalidValue); + } + depNodes.push_back(dependencies[i]); + } + if (flags == hipStreamAddCaptureDependencies) { + s->AddCrossCapturedNode(depNodes); + } else if (flags == hipStreamSetCaptureDependencies) { + bool replace = true; + s->AddCrossCapturedNode(depNodes, replace); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, + const hipGraphNode_t* to, size_t numDependencies) { + HIP_INIT_API(hipGraphRemoveDependencies, graph, from, to, numDependencies); + if (graph == nullptr || (numDependencies > 0 && (from == nullptr || to == nullptr))) { + HIP_RETURN(hipErrorInvalidValue); + } + for (size_t i = 0; i < numDependencies; i++) { + if (to[i]->GetParentGraph() != graph || from[i]->GetParentGraph() != graph || + from[i]->RemoveUpdateEdge(to[i]) == false) { + HIP_RETURN(hipErrorInvalidValue); + } + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, hipGraphNode_t* to, + size_t* numEdges) { + HIP_INIT_API(hipGraphGetEdges, graph, from, to, numEdges); + if (graph == nullptr || numEdges == nullptr || (from == nullptr && to != nullptr) || + (to == nullptr && from != nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + const std::vector> edges = graph->GetEdges(); + // returns only the number of edges in numEdges when from and to are null + if (from == nullptr && to == nullptr) { + *numEdges = edges.size(); + HIP_RETURN(hipSuccess); + } else if (*numEdges <= edges.size()) { + for (int i = 0; i < *numEdges; i++) { + from[i] = edges[i].first; + to[i] = edges[i].second; + } + } else { + for (int i = 0; i < edges.size(); i++) { + from[i] = edges[i].first; + to[i] = edges[i].second; + } + // If numEdges > actual number of edges, the remaining entries in from and to will be set to + // NULL + for (int i = edges.size(); i < *numEdges; i++) { + from[i] = nullptr; + to[i] = nullptr; + } + *numEdges = edges.size(); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t* pDependencies, + size_t* pNumDependencies) { + HIP_INIT_API(hipGraphNodeGetDependencies, node, pDependencies, pNumDependencies); + if (!hipGraphNode::isNodeValid(node) || pNumDependencies == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + const std::vector& dependencies = node->GetDependencies(); + if (pDependencies == NULL) { + *pNumDependencies = dependencies.size(); + HIP_RETURN(hipSuccess); + } else if (*pNumDependencies <= dependencies.size()) { + for (int i = 0; i < *pNumDependencies; i++) { + pDependencies[i] = dependencies[i]; + } + } else { + for (int i = 0; i < dependencies.size(); i++) { + pDependencies[i] = dependencies[i]; + } + // pNumDependencies > actual number of dependencies, the remaining entries in pDependencies will + // be set to NULL + for (int i = dependencies.size(); i < *pNumDependencies; i++) { + pDependencies[i] = nullptr; + } + *pNumDependencies = dependencies.size(); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t* pDependentNodes, + size_t* pNumDependentNodes) { + HIP_INIT_API(hipGraphNodeGetDependentNodes, node, pDependentNodes, pNumDependentNodes); + if (!hipGraphNode::isNodeValid(node) || pNumDependentNodes == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + const std::vector& dependents = node->GetEdges(); + if (pDependentNodes == NULL) { + *pNumDependentNodes = dependents.size(); + HIP_RETURN(hipSuccess); + } else if (*pNumDependentNodes <= dependents.size()) { + for (int i = 0; i < *pNumDependentNodes; i++) { + pDependentNodes[i] = dependents[i]; + } + } else { + for (int i = 0; i < dependents.size(); i++) { + pDependentNodes[i] = dependents[i]; + } + // pNumDependentNodes > actual number of dependents, the remaining entries in pDependentNodes + // will be set to NULL + for (int i = dependents.size(); i < *pNumDependentNodes; i++) { + pDependentNodes[i] = nullptr; + } + *pNumDependentNodes = dependents.size(); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType) { + HIP_INIT_API(hipGraphNodeGetType, node, pType); + if (!hipGraphNode::isNodeValid(node) || pType == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *pType = node->GetType(); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphDestroyNode(hipGraphNode_t node) { + HIP_INIT_API(hipGraphDestroyNode, node); + if (!hipGraphNode::isNodeValid(node)) { + HIP_RETURN(hipErrorInvalidValue); + } + node->GetParentGraph()->RemoveNode(node); + HIP_RETURN(hipSuccess); +} + + +hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph) { + HIP_INIT_API(hipGraphClone, pGraphClone, originalGraph); + if (originalGraph == nullptr || pGraphClone == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!ihipGraph::isGraphValid(originalGraph)) { + HIP_RETURN(hipErrorInvalidValue); + } + *pGraphClone = originalGraph->clone(); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, + hipGraph_t clonedGraph) { + HIP_INIT_API(hipGraphNodeFindInClone, pNode, originalNode, clonedGraph); + if (pNode == nullptr || originalNode == nullptr || clonedGraph == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (clonedGraph->getOriginalGraph() == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + for (auto node : clonedGraph->GetNodes()) { + if (node->GetID() == originalNode->GetID()) { + *pNode = node; + HIP_RETURN(hipSuccess); + } + } + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipGraphAddMemcpyNodeFromSymbol, pGraphNode, graph, pDependencies, numDependencies, + dst, symbol, count, offset, kind); + if (graph == nullptr || pGraphNode == nullptr || count == 0 || + (numDependencies > 0 && pDependencies == nullptr) || dst == nullptr || + !ihipGraph::isGraphValid(graph)) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr); + if (status != hipSuccess) { + HIP_RETURN(status); + } + *pGraphNode = new hipGraphMemcpyNodeFromSymbol(dst, symbol, count, offset, kind); + status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, false); + HIP_RETURN(status); +} + +hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, const void* symbol, + size_t count, size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipGraphMemcpyNodeSetParamsFromSymbol, node, dst, symbol, count, offset, kind); + if (symbol == nullptr) { + HIP_RETURN(hipErrorInvalidSymbol); + } + if (!hipGraphNode::isNodeValid(node) || dst == nullptr || count == 0 || symbol == dst) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(reinterpret_cast(node)->SetParams(dst, symbol, count, + offset, kind)); +} + +hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + void* dst, const void* symbol, size_t count, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsFromSymbol, hGraphExec, node, dst, symbol, count, + offset, kind); + if (symbol == nullptr) { + HIP_RETURN(hipErrorInvalidSymbol); + } + if (hGraphExec == nullptr || !hipGraphNode::isNodeValid(node) || dst == nullptr || count == 0 || symbol == dst) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode) + ->SetParams(dst, symbol, count, offset, kind)); +} + +hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, + size_t numDependencies, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind) { + HIP_INIT_API(hipGraphAddMemcpyNodeToSymbol, pGraphNode, graph, pDependencies, numDependencies, + symbol, src, count, offset, kind); + if (pGraphNode == nullptr || graph == nullptr || src == nullptr || count == 0 || + !ihipGraph::isGraphValid(graph) || (pDependencies == nullptr && numDependencies > 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr); + if (status != hipSuccess) { + HIP_RETURN(status); + } + *pGraphNode = new hipGraphMemcpyNodeToSymbol(symbol, src, count, offset, kind); + if (*pGraphNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, false); + HIP_RETURN(status); +} + +hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, const void* symbol, + const void* src, size_t count, size_t offset, + hipMemcpyKind kind) { + HIP_INIT_API(hipGraphMemcpyNodeSetParamsToSymbol, symbol, src, count, offset, kind); + if (symbol == nullptr) { + HIP_RETURN(hipErrorInvalidSymbol); + } + if (!hipGraphNode::isNodeValid(node) || src == nullptr || count == 0 || symbol == src) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(reinterpret_cast(node)->SetParams(symbol, src, count, + offset, kind)); +} + + +hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const void* symbol, const void* src, + size_t count, size_t offset, + hipMemcpyKind kind) { + HIP_INIT_API(hipGraphExecMemcpyNodeSetParamsToSymbol, hGraphExec, node, symbol, src, count, + offset, kind); + if (symbol == nullptr) { + HIP_RETURN(hipErrorInvalidSymbol); + } + if (hGraphExec == nullptr || src == nullptr || !hipGraphNode::isNodeValid(node) || count == 0 || src == symbol) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode) + ->SetParams(symbol, src, count, offset, kind)); +} + +hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event) { + HIP_INIT_API(hipGraphAddEventRecordNode, pGraphNode, graph, pDependencies, numDependencies, + event); + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || event == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *pGraphNode = new hipGraphEventRecordNode(event); + hipError_t status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, false); + HIP_RETURN(status); +} + +hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) { + HIP_INIT_API(hipGraphEventRecordNodeGetEvent, node, event_out); + if (!hipGraphNode::isNodeValid(node) || event_out == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(event_out); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { + HIP_INIT_API(hipGraphEventRecordNodeSetEvent, node, event); + if (!hipGraphNode::isNodeValid(node) || event == nullptr || node->GetType() != hipGraphNodeTypeEventRecord) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(node)->SetParams(event)); +} + +hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event) { + HIP_INIT_API(hipGraphExecEventRecordNodeSetEvent, hGraphExec, hNode, event); + if (hGraphExec == nullptr || hNode == nullptr || event == nullptr || + hNode->GetType() != hipGraphNodeTypeEventRecord) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(hNode); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(event)); +} + +hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipEvent_t event) { + HIP_INIT_API(hipGraphAddEventWaitNode, pGraphNode, graph, pDependencies, numDependencies, event); + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || event == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *pGraphNode = new hipGraphEventWaitNode(event); + hipError_t status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, false); + HIP_RETURN(status); +} + +hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) { + HIP_INIT_API(hipGraphEventWaitNodeGetEvent, node, event_out); + if (!hipGraphNode::isNodeValid(node) || event_out == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(event_out); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { + HIP_INIT_API(hipGraphEventWaitNodeSetEvent, node, event); + if (!hipGraphNode::isNodeValid(node) || event == nullptr || node->GetType() != hipGraphNodeTypeWaitEvent) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(node)->SetParams(event)); +} + +hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + hipEvent_t event) { + HIP_INIT_API(hipGraphExecEventWaitNodeSetEvent, hGraphExec, hNode, event); + if (hGraphExec == nullptr || hNode == nullptr || event == nullptr || + (hNode->GetType() != hipGraphNodeTypeWaitEvent)) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(hNode); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(event)); +} + +hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipHostNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphAddHostNode, pGraphNode, graph, pDependencies, numDependencies, pNodeParams); + if (pGraphNode == nullptr || graph == nullptr || pNodeParams == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || pNodeParams->fn == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *pGraphNode = new hipGraphHostNode(pNodeParams); + hipError_t status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies, false); + HIP_RETURN(status); +} + +hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphHostNodeGetParams, node, pNodeParams); + if (!hipGraphNode::isNodeValid(node) || pNodeParams == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(pNodeParams); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphHostNodeSetParams, node, pNodeParams); + if (pNodeParams == nullptr || pNodeParams->fn == nullptr || + !hipGraphNode::isNodeValid(node)) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(node)->SetParams(pNodeParams)); +} + +hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, + const hipHostNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphExecHostNodeSetParams, hGraphExec, node, pNodeParams); + if (hGraphExec == nullptr || pNodeParams == nullptr || pNodeParams->fn == nullptr || + !hipGraphNode::isNodeValid(node)) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(node); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(clonedNode)->SetParams(pNodeParams)); +} + +hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, + hipGraphNode_t* hErrorNode_out, + hipGraphExecUpdateResult* updateResult_out) { + HIP_INIT_API(hipGraphExecUpdate, hGraphExec, hGraph, hErrorNode_out, updateResult_out); + // parameter check + if (hGraphExec == nullptr || hGraph == nullptr || hErrorNode_out == nullptr || + updateResult_out == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + std::vector newGraphNodes; + hGraph->LevelOrder(newGraphNodes); + std::vector& oldGraphExecNodes = hGraphExec->GetNodes(); + if (newGraphNodes.size() != oldGraphExecNodes.size()) { + *updateResult_out = hipGraphExecUpdateErrorTopologyChanged; + HIP_RETURN(hipErrorGraphExecUpdateFailure); + } + for (std::vector::size_type i = 0; i != newGraphNodes.size(); i++) { + if (newGraphNodes[i]->GetType() == oldGraphExecNodes[i]->GetType()) { + hipError_t status = oldGraphExecNodes[i]->SetParams(newGraphNodes[i]); + if (status != hipSuccess) { + *hErrorNode_out = newGraphNodes[i]; + if (status == hipErrorInvalidDeviceFunction) { + *updateResult_out = hipGraphExecUpdateErrorUnsupportedFunctionChange; + } else if (status == hipErrorInvalidValue || status == hipErrorInvalidDevicePointer) { + *updateResult_out = hipGraphExecUpdateErrorParametersChanged; + } else { + *updateResult_out = hipGraphExecUpdateErrorNotSupported; + } + HIP_RETURN(hipErrorGraphExecUpdateFailure); + } + } else { + *hErrorNode_out = newGraphNodes[i]; + *updateResult_out = hipGraphExecUpdateErrorNodeTypeChanged; + HIP_RETURN(hipErrorGraphExecUpdateFailure); + } + } + *updateResult_out = hipGraphExecUpdateSuccess; + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipGraphAddMemAllocNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + hipMemAllocNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphAddMemAllocNode, pGraphNode, graph, + pDependencies, numDependencies, pNodeParams); + if (pGraphNode == nullptr || graph == nullptr || + (numDependencies > 0 && pDependencies == nullptr) || pNodeParams == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + // Clear the pointer to allocated memory because it may contain stale/uninitialized data + pNodeParams->dptr = nullptr; + auto mem_alloc_node = new hipGraphMemAllocNode(pNodeParams); + *pGraphNode = mem_alloc_node; + auto status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies); + // The address must be provided during the node creation time + pNodeParams->dptr = mem_alloc_node->Execute(); + HIP_RETURN(status); +} + +// ================================================================================================ +hipError_t hipGraphMemAllocNodeGetParams(hipGraphNode_t node, hipMemAllocNodeParams* pNodeParams) { + HIP_INIT_API(hipGraphMemAllocNodeGetParams, node, pNodeParams); + if (node == nullptr || pNodeParams == nullptr || !hipGraphNode::isNodeValid(node) + || node->GetType() != hipGraphNodeTypeMemAlloc) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(pNodeParams); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + void* dev_ptr) { + HIP_INIT_API(hipGraphAddMemFreeNode, pGraphNode, graph, pDependencies, numDependencies, dev_ptr); + if (pGraphNode == nullptr || graph == nullptr || + ((numDependencies > 0 && pDependencies == nullptr) || + (pDependencies != nullptr && numDependencies == 0)) || + dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Is memory passed to be free'd valid + size_t offset = 0; + amd::Memory* memory_object = getMemoryObject(dev_ptr, offset); + if (memory_object == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto mem_free_node = new hipGraphMemFreeNode(dev_ptr); + *pGraphNode = mem_free_node; + auto status = ihipGraphAddNode(*pGraphNode, graph, pDependencies, numDependencies); + HIP_RETURN(status); +} + +// ================================================================================================ +hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr) { + HIP_INIT_API(hipGraphMemFreeNodeGetParams, node, dev_ptr); + if (node == nullptr || dev_ptr == nullptr || !hipGraphNode::isNodeValid(node) + || node->GetType() != hipGraphNodeTypeMemFree) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(node)->GetParams(reinterpret_cast(dev_ptr)); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value) { + HIP_INIT_API(hipDeviceGetGraphMemAttribute, device, attr, value); + if ((static_cast(device) >= g_devices.size()) || device < 0) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hipError_t result = hipErrorInvalidValue; + switch (attr) { + case hipGraphMemAttrUsedMemCurrent: + result = g_devices[device]->GetGraphMemoryPool()->GetAttribute( + hipMemPoolAttrUsedMemCurrent, value); + break; + case hipGraphMemAttrUsedMemHigh: + result = g_devices[device]->GetGraphMemoryPool()->GetAttribute( + hipMemPoolAttrUsedMemHigh, value); + break; + case hipGraphMemAttrReservedMemCurrent: + result = g_devices[device]->GetGraphMemoryPool()->GetAttribute( + hipMemPoolAttrReservedMemCurrent, value); + break; + case hipGraphMemAttrReservedMemHigh: + result = g_devices[device]->GetGraphMemoryPool()->GetAttribute( + hipMemPoolAttrReservedMemHigh, value); + break; + default: + break; + } + HIP_RETURN(result); +} + +// ================================================================================================ +hipError_t hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value) { + HIP_INIT_API(hipDeviceSetGraphMemAttribute, device, attr, value); + if ((static_cast(device) >= g_devices.size()) || device < 0) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hipError_t result = hipErrorInvalidValue; + switch (attr) { + case hipGraphMemAttrUsedMemHigh: + result = g_devices[device]->GetGraphMemoryPool()->SetAttribute( + hipMemPoolAttrUsedMemHigh, value); + break; + case hipGraphMemAttrReservedMemHigh: + result = g_devices[device]->GetGraphMemoryPool()->SetAttribute( + hipMemPoolAttrReservedMemHigh, value); + break; + default: + break; + } + HIP_RETURN(result); +} + +// ================================================================================================ +hipError_t hipDeviceGraphMemTrim(int device) { + HIP_INIT_API(hipDeviceGraphMemTrim, device); + if ((static_cast(device) >= g_devices.size()) || device < 0) { + HIP_RETURN(hipErrorInvalidDevice); + } + g_devices[device]->GetGraphMemoryPool()->TrimTo(0); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, + unsigned int initialRefcount, unsigned int flags) { + HIP_INIT_API(hipUserObjectCreate, object_out, ptr, destroy, initialRefcount, flags); + if (object_out == nullptr || flags != hipUserObjectNoDestructorSync || initialRefcount == 0 || + destroy == nullptr || initialRefcount > INT_MAX) { + HIP_RETURN(hipErrorInvalidValue); + } + + *object_out = new hipUserObject(destroy, ptr, flags); + //! Creating object adds one reference. + if (initialRefcount > 1) { + (*object_out)->increaseRefCount(static_cast(initialRefcount - 1)); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count) { + HIP_INIT_API(hipUserObjectRelease, object, count); + if (object == nullptr || count == 0 || count > INT_MAX) { + HIP_RETURN(hipErrorInvalidValue); + } + if (object->referenceCount() < count || !hipUserObject::isUserObjvalid(object)) { + HIP_RETURN(hipSuccess); + } + //! If all the counts are gone not longer need the obj in the list + if (object->referenceCount() == count) { + hipUserObject::removeUSerObj(object); + } + object->decreaseRefCount(count); + HIP_RETURN(hipSuccess); +} + +hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count) { + HIP_INIT_API(hipUserObjectRetain, object, count); + if (object == nullptr || count == 0 || count > INT_MAX) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!hipUserObject::isUserObjvalid(object)) { + HIP_RETURN(hipSuccess); + } + object->increaseRefCount(count); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count, + unsigned int flags) { + HIP_INIT_API(hipGraphRetainUserObject, graph, object, count, flags); + hipError_t status = hipSuccess; + if (graph == nullptr || object == nullptr || count == 0 || count > INT_MAX || + (flags != 0 && flags != hipGraphUserObjectMove)) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!hipUserObject::isUserObjvalid(object) && !graph->isUserObjGraphValid(object)) { + HIP_RETURN(hipSuccess); + } + if (flags != hipGraphUserObjectMove) { + status = hipUserObjectRetain(object, count); + if (status != hipSuccess) { + HIP_RETURN(status); + } + } else { + //! if flag is UserObjMove delete userobj from list + hipUserObject::removeUSerObj(object); + } + graph->addUserObjGraph(object); + HIP_RETURN(status); +} + +hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count) { + HIP_INIT_API(hipGraphReleaseUserObject, graph, object, count); + if (graph == nullptr || object == nullptr || count == 0 || count > INT_MAX) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!graph->isUserObjGraphValid(object) || object->referenceCount() < count) { + HIP_RETURN(hipSuccess); + } + //! Obj is being destroyed + unsigned int releaseCount = (object->referenceCount() < count) ? object->referenceCount() : count; + if (object->referenceCount() == releaseCount) { + graph->RemoveUserObjGraph(object); + } + hipError_t status = hipUserObjectRelease(object, count); + HIP_RETURN(status); +} + +hipError_t hipGraphKernelNodeCopyAttributes(hipGraphNode_t hSrc, hipGraphNode_t hDst) { + HIP_INIT_API(hipGraphKernelNodeCopyAttributes, hSrc, hDst); + if (hSrc == nullptr || hDst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(reinterpret_cast(hDst)->CopyAttr( + reinterpret_cast(hSrc))); +} + +hipError_t ihipGraphDebugDotPrint(hipGraph_t graph, const char* path, unsigned int flags) { + if (graph == nullptr || path == nullptr) { + return hipErrorInvalidValue; + } + std::ofstream fout; + fout.open(path, std::ios::out); + if (fout.fail()) { + ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Error during opening of file : %s", path); + return hipErrorOperatingSystem; + } + fout << "digraph dot {" << std::endl; + graph->GenerateDOT(fout, (hipGraphDebugDotFlags)flags); + fout << "}" << std::endl; + fout.close(); + return hipSuccess; +} + +hipError_t hipGraphDebugDotPrint(hipGraph_t graph, const char* path, unsigned int flags) { + HIP_INIT_API(hipGraphDebugDotPrint, graph, path, flags); + HIP_RETURN(ihipGraphDebugDotPrint(graph, path, flags)); +} + +hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int isEnabled) { + HIP_INIT_API(hipGraphNodeSetEnabled, hGraphExec, hNode, isEnabled); + if (hGraphExec == nullptr || hNode == nullptr || !hipGraphExec::isGraphExecValid(hGraphExec) || + !hipGraphNode::isNodeValid(hNode)) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(hNode); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!(hNode->GetType() == hipGraphNodeTypeKernel || hNode->GetType() == hipGraphNodeTypeMemcpy || + hNode->GetType() == hipGraphNodeTypeMemset)) { + HIP_RETURN(hipErrorInvalidValue); + } + clonedNode->SetEnabled(isEnabled); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + unsigned int* isEnabled) { + HIP_INIT_API(hipGraphNodeGetEnabled, hGraphExec, hNode, isEnabled); + if (hGraphExec == nullptr || hNode == nullptr || isEnabled == nullptr || + !hipGraphExec::isGraphExecValid(hGraphExec) || !hipGraphNode::isNodeValid(hNode)) { + HIP_RETURN(hipErrorInvalidValue); + } + hipGraphNode_t clonedNode = hGraphExec->GetClonedNode(hNode); + if (clonedNode == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!(hNode->GetType() == hipGraphNodeTypeKernel || hNode->GetType() == hipGraphNodeTypeMemcpy || + hNode->GetType() == hipGraphNodeTypeMemset)) { + HIP_RETURN(hipErrorInvalidValue); + } + *isEnabled = clonedNode->GetEnabled(); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphUpload(hipGraphExec_t graphExec, hipStream_t stream) { + HIP_INIT_API(hipGraphUpload, graphExec, stream); + if (graphExec == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + // TODO: stream is known before launch, do preperatory work with graph optimizations. pre-allocate + // memory for memAlloc nodes if any when support is added with mempool feature + HIP_RETURN(hipSuccess); +} diff --git a/projects/clr/hipamd/src/hip_graph_capture.hpp b/projects/clr/hipamd/src/hip_graph_capture.hpp new file mode 100644 index 0000000000..51226ac13e --- /dev/null +++ b/projects/clr/hipamd/src/hip_graph_capture.hpp @@ -0,0 +1,111 @@ +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +// forward declaration of capture methods +hipError_t capturehipLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim, + dim3& blockDim, void**& args, size_t& sharedMemBytes); + +hipError_t capturehipExtModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f, + uint32_t& globalWorkSizeX, uint32_t& globalWorkSizeY, + uint32_t& globalWorkSizeZ, uint32_t& localWorkSizeX, + uint32_t& localWorkSizeY, uint32_t& localWorkSizeZ, + size_t& sharedMemBytes, void**& kernelParams, + void**& extra, hipEvent_t& startEvent, + hipEvent_t& stopEvent, uint32_t& flags); + +hipError_t capturehipExtLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim, + dim3& blockDim, void**& args, size_t& sharedMemBytes, + hipEvent_t& startEvent, hipEvent_t& stopEvent, int& flags); + +hipError_t capturehipModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f, uint32_t& gridDimX, + uint32_t& gridDimY, uint32_t& gridDimZ, uint32_t& blockDimX, + uint32_t& blockDimY, uint32_t& blockDimZ, + uint32_t& sharedMemBytes, void**& kernelParams, + void**& extra); + +hipError_t capturehipMemcpy2DAsync(hipStream_t& stream, void*& dst, size_t& dpitch, + const void*& src, size_t& spitch, size_t& width, size_t& height, + hipMemcpyKind& kind); + +hipError_t capturehipMemcpyParam2DAsync(hipStream_t& stream, const hip_Memcpy2D*& pCopy); + +hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, size_t& dpitch, + hipArray_const_t& src, size_t& wOffsetSrc, + size_t& hOffsetSrc, size_t& width, size_t& height, + hipMemcpyKind& kind); + +hipError_t capturehipMemcpyFromArrayAsync(hipStream_t& stream, void*& dst, hipArray_const_t& src, + size_t& wOffsetSrc, size_t& hOffsetSrc, size_t& count, + hipMemcpyKind& kind); + +hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray*& dst, size_t& wOffset, + size_t& hOffset, const void*& src, size_t& spitch, + size_t& width, size_t& height, hipMemcpyKind& kind); + +hipError_t capturehipMemcpyToArrayAsync(hipStream_t& stream, hipArray_t& dst, size_t& wOffset, + size_t& hOffset, const void*& src, size_t& count, + hipMemcpyKind& kind); + +hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray*& srcArray, + size_t& srcOffset, size_t& ByteCount); + +hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray*& dstArray, size_t& dstOffset, + const void*& srcHost, size_t& ByteCount); + +hipError_t capturehipMemcpy3DAsync(hipStream_t& stream, const hipMemcpy3DParms*& p); + +hipError_t capturehipMemcpyAsync(hipStream_t& stream, void*& dst, const void*& src, + size_t& sizeBytes, hipMemcpyKind& kind); + +hipError_t capturehipMemcpyHtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice, void*& srcHost, + size_t& ByteCount, hipMemcpyKind& kind); + +hipError_t capturehipMemcpyDtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice, + hipDeviceptr_t& srcDevice, size_t& ByteCount, + hipMemcpyKind& kind); + +hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost, hipDeviceptr_t& srcDevice, + size_t& ByteCount, hipMemcpyKind& kind); + +hipError_t capturehipMemcpyFromSymbolAsync(hipStream_t& stream, void*& dst, const void*& symbol, + size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind); + +hipError_t capturehipMemcpyToSymbolAsync(hipStream_t& stream, const void*& symbol, const void*& src, + size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind); + +hipError_t capturehipMemsetAsync(hipStream_t& stream, void*& dst, int& value, size_t& valueSize, + size_t& sizeBytes); + +hipError_t capturehipMemset2DAsync(hipStream_t& stream, void*& dst, size_t& pitch, int& value, + size_t& width, size_t& height); + +hipError_t capturehipMemset3DAsync(hipStream_t& stream, hipPitchedPtr& pitchedDevPtr, int& value, + hipExtent& extent); + +hipError_t capturehipEventRecord(hipStream_t& stream, hipEvent_t& event); + +hipError_t capturehipStreamWaitEvent(hipEvent_t& event, hipStream_t& stream, unsigned int& flags); + +hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*& userData); + +hipError_t capturehipMallocAsync(hipStream_t stream, hipMemPool_t mem_pool, size_t size, void** dev_ptr); + +hipError_t capturehipFreeAsync(hipStream_t stream, void* dev_ptr); diff --git a/projects/clr/hipamd/src/hip_graph_helper.hpp b/projects/clr/hipamd/src/hip_graph_helper.hpp new file mode 100644 index 0000000000..20d011658e --- /dev/null +++ b/projects/clr/hipamd/src/hip_graph_helper.hpp @@ -0,0 +1,94 @@ +#include "hip_conversions.hpp" + +hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p); + +hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); + +hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hip::Stream& stream, bool isAsync = false); + +void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream); + +bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind); + +hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, void** kernelParams, void** extra, + int deviceId, uint32_t params); + +hipError_t ihipMemset_validate(void* dst, int64_t value, size_t valueSize, size_t sizeBytes); + +hipError_t ihipMemset3D_validate(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + size_t sizeBytes); + +hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, + uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, + uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, uint32_t sharedMemBytes, + hip::Stream* stream, void** kernelParams, void** extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags, + uint32_t params, uint32_t gridId, uint32_t numGrids, + uint64_t prevGridSum, uint64_t allGridSum, uint32_t firstDevice); + +hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p, + hip::Stream* stream); + +hipError_t ihipMemsetCommand(std::vector& commands, void* dst, int64_t value, + size_t valueSize, size_t sizeBytes, hip::Stream* stream); + +hipError_t ihipMemset3DCommand(std::vector& commands, hipPitchedPtr pitchedDevPtr, + int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1); + +hipError_t ihipMemcpySymbol_validate(const void* symbol, size_t sizeBytes, size_t offset, + size_t& sym_size, hipDeviceptr_t& device_ptr); + +hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t dstRowPitch, size_t dstSlicePitch, amd::Memory*& dstMemory, + amd::Image*& srcImage, amd::BufferRect& srcRect, + amd::BufferRect& dstRect); + +hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, + amd::Memory*& srcMemory, amd::BufferRect& dstRect, + amd::BufferRect& srcRect); + +hipError_t ihipMemcpyDtoDValidate(void* srcDevice, void* dstDevice, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, + size_t dstSlicePitch, amd::Memory*& srcMemory, + amd::Memory*& dstMemory, amd::BufferRect& srcRect, + amd::BufferRect& dstRect); + + +hipError_t ihipMemcpyDtoHValidate(void* srcDevice, void* dstHost, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, + size_t dstSlicePitch, amd::Memory*& srcMemory, + amd::BufferRect& srcRect, amd::BufferRect& dstRect); + +hipError_t ihipMemcpyHtoDValidate(const void* srcHost, void* dstDevice, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, + size_t dstSlicePitch, amd::Memory*& dstMemory, + amd::BufferRect& srcRect, amd::BufferRect& dstRect); + + +hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + amd::Image*& srcImage, amd::Image*& dstImage); + + +hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, + amd::BufferRect& srcRect); + +hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t dstRowPitch, size_t dstSlicePitch, amd::Image*& srcImage, + amd::BufferRect& dstRect); + +hipError_t ihipGraphMemsetParams_validate(const hipMemsetParams* pNodeParams); diff --git a/projects/clr/hipamd/src/hip_graph_internal.cpp b/projects/clr/hipamd/src/hip_graph_internal.cpp new file mode 100644 index 0000000000..3c118bc198 --- /dev/null +++ b/projects/clr/hipamd/src/hip_graph_internal.cpp @@ -0,0 +1,619 @@ +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "hip_graph_internal.hpp" +#include + +#define CASE_STRING(X, C) \ + case X: \ + case_string = #C; \ + break; +const char* GetGraphNodeTypeString(uint32_t op) { + const char* case_string; + switch (static_cast(op)) { + CASE_STRING(hipGraphNodeTypeKernel, KernelNode) + CASE_STRING(hipGraphNodeTypeMemcpy, MemcpyNode) + CASE_STRING(hipGraphNodeTypeMemset, MemsetNode) + CASE_STRING(hipGraphNodeTypeHost, HostNode) + CASE_STRING(hipGraphNodeTypeGraph, GraphNode) + CASE_STRING(hipGraphNodeTypeEmpty, EmptyNode) + CASE_STRING(hipGraphNodeTypeWaitEvent, WaitEventNode) + CASE_STRING(hipGraphNodeTypeEventRecord, EventRecordNode) + CASE_STRING(hipGraphNodeTypeExtSemaphoreSignal, ExtSemaphoreSignalNode) + CASE_STRING(hipGraphNodeTypeExtSemaphoreWait, ExtSemaphoreWaitNode) + CASE_STRING(hipGraphNodeTypeMemcpyFromSymbol, MemcpyFromSymbolNode) + CASE_STRING(hipGraphNodeTypeMemcpyToSymbol, MemcpyToSymbolNode) + default: + case_string = "Unknown node type"; + }; + return case_string; +}; + +int hipGraphNode::nextID = 0; +int ihipGraph::nextID = 0; +std::unordered_set hipGraphNode::nodeSet_; +amd::Monitor hipGraphNode::nodeSetLock_{"Guards global node set"}; +std::unordered_set ihipGraph::graphSet_; +amd::Monitor ihipGraph::graphSetLock_{"Guards global graph set"}; +std::unordered_set hipGraphExec::graphExecSet_; +amd::Monitor hipGraphExec::graphExecSetLock_{"Guards global exec graph set"}; +std::unordered_set hipUserObject::ObjectSet_; +amd::Monitor hipUserObject::UserObjectLock_{"Guards global user object"}; + +hipError_t hipGraphMemcpyNode1D::ValidateParams(void* dst, const void* src, size_t count, + hipMemcpyKind kind) { + hipError_t status = ihipMemcpy_validate(dst, src, count, kind); + if (status != hipSuccess) { + return status; + } + size_t sOffsetOrig = 0; + amd::Memory* origSrcMemory = getMemoryObject(src, sOffsetOrig); + size_t dOffsetOrig = 0; + amd::Memory* origDstMemory = getMemoryObject(dst, dOffsetOrig); + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + + if ((srcMemory == nullptr) && (dstMemory != nullptr)) { // host to device + if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) { + return hipErrorInvalidValue; + } + if ((kind != hipMemcpyHostToDevice) && (kind != hipMemcpyDefault)) { + return hipErrorInvalidValue; + } + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { // device to host + if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) { + return hipErrorInvalidValue; + } + if ((kind != hipMemcpyDeviceToHost) && (kind != hipMemcpyDefault)) { + return hipErrorInvalidValue; + } + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) { + return hipErrorInvalidValue; + } + if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) { + return hipErrorInvalidValue; + } + } + return hipSuccess; +} + +hipError_t hipGraphMemcpyNode::ValidateParams(const hipMemcpy3DParms* pNodeParams) { + hipError_t status = ihipMemcpy3D_validate(pNodeParams); + if (status != hipSuccess) { + return status; + } + size_t offset = 0; + const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pNodeParams); + // If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the + // (unified virtual address space) base address of the source data and the bytes per row to apply. + // {src/dst}Array is ignored. + hipMemoryType srcMemoryType = pCopy.srcMemoryType; + if (srcMemoryType == hipMemoryTypeUnified) { + amd::Memory* memObj = getMemoryObject(pCopy.srcDevice, offset); + srcMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags()) ? hipMemoryTypeHost : hipMemoryTypeDevice; + if (srcMemoryType == hipMemoryTypeHost) { + // {src/dst}Host may be unitialized. Copy over {src/dst}Device into it if we detect system + // memory. + const_cast(&pCopy)->srcHost = pCopy.srcDevice; + const_cast(&pCopy)->srcXInBytes += offset; + } + } + offset = 0; + hipMemoryType dstMemoryType = pCopy.dstMemoryType; + if (dstMemoryType == hipMemoryTypeUnified) { + amd::Memory* memObj = getMemoryObject(pCopy.dstDevice, offset); + dstMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags()) ? hipMemoryTypeHost : hipMemoryTypeDevice; + if (dstMemoryType == hipMemoryTypeHost) { + const_cast(&pCopy)->dstHost = pCopy.dstDevice; + const_cast(&pCopy)->dstXInBytes += offset; + } + } + offset = 0; + // If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned. + // In that case upgrade the copy type to hipMemoryTypeDevice to avoid extra pinning. + if (srcMemoryType == hipMemoryTypeHost) { + srcMemoryType = getMemoryObject(pCopy.srcHost, offset) ? hipMemoryTypeDevice : + hipMemoryTypeHost; + + if (srcMemoryType == hipMemoryTypeDevice) { + const_cast(&pCopy)->srcDevice = const_cast(pCopy.srcHost); + const_cast(&pCopy)->srcXInBytes += offset; + } + } + offset = 0; + if (dstMemoryType == hipMemoryTypeHost) { + dstMemoryType = getMemoryObject(pCopy.dstHost, offset) ? hipMemoryTypeDevice : + hipMemoryTypeHost; + + if (dstMemoryType == hipMemoryTypeDevice) { + const_cast(&pCopy)->dstDevice = const_cast(pCopy.dstDevice); + const_cast(&pCopy)->dstXInBytes += offset; + } + } + + amd::Coord3D srcOrigin = {pCopy.srcXInBytes, pCopy.srcY, pCopy.srcZ}; + amd::Coord3D dstOrigin = {pCopy.dstXInBytes, pCopy.dstY, pCopy.dstZ}; + amd::Coord3D copyRegion = {pCopy.WidthInBytes, pCopy.Height, pCopy.Depth}; + + if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { + // Host to Device. + + amd::Memory* dstMemory; + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + status = + ihipMemcpyHtoDValidate(pCopy.srcHost, pCopy.dstDevice, srcOrigin, dstOrigin, copyRegion, + pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch, + pCopy.dstPitch * pCopy.dstHeight, dstMemory, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { + // Device to Host. + amd::Memory* srcMemory; + amd::BufferRect srcRect; + amd::BufferRect dstRect; + status = + ihipMemcpyDtoHValidate(pCopy.srcDevice, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion, + pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch, + pCopy.dstPitch * pCopy.dstHeight, srcMemory, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { + // Device to Device. + amd::Memory* srcMemory; + amd::Memory* dstMemory; + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + status = ihipMemcpyDtoDValidate(pCopy.srcDevice, pCopy.dstDevice, srcOrigin, dstOrigin, + copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, + pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcMemory, + dstMemory, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { + amd::Image* dstImage; + amd::BufferRect srcRect; + + status = + ihipMemcpyHtoAValidate(pCopy.srcHost, pCopy.dstArray, srcOrigin, dstOrigin, copyRegion, + pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, dstImage, srcRect); + if (status != hipSuccess) { + return status; + } + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { + // Image to Host. + amd::Image* srcImage; + amd::BufferRect dstRect; + + status = + ihipMemcpyAtoHValidate(pCopy.srcArray, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion, + pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcImage, dstRect); + if (status != hipSuccess) { + return status; + } + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { + // Device to Image. + amd::Image* dstImage; + amd::Memory* srcMemory; + amd::BufferRect dstRect; + amd::BufferRect srcRect; + status = ihipMemcpyDtoAValidate(pCopy.srcDevice, pCopy.dstArray, srcOrigin, dstOrigin, + copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, + dstImage, srcMemory, dstRect, srcRect); + if (status != hipSuccess) { + return status; + } + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { + // Image to Device. + amd::BufferRect srcRect; + amd::BufferRect dstRect; + amd::Memory* dstMemory; + amd::Image* srcImage; + status = ihipMemcpyAtoDValidate(pCopy.srcArray, pCopy.dstDevice, srcOrigin, dstOrigin, + copyRegion, pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, + dstMemory, srcImage, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { + amd::Image* srcImage; + amd::Image* dstImage; + + status = ihipMemcpyAtoAValidate(pCopy.srcArray, pCopy.dstArray, srcOrigin, dstOrigin, + copyRegion, srcImage, dstImage); + if (status != hipSuccess) { + return status; + } + } else { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +bool ihipGraph::isGraphValid(ihipGraph* pGraph) { + amd::ScopedLock lock(graphSetLock_); + if (graphSet_.find(pGraph) == graphSet_.end()) { + return false; + } + return true; +} + +void ihipGraph::AddNode(const Node& node) { + vertices_.emplace_back(node); + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Add %s(%p)\n", + GetGraphNodeTypeString(node->GetType()), node); + node->SetParentGraph(this); +} + +void ihipGraph::RemoveNode(const Node& node) { + vertices_.erase(std::remove(vertices_.begin(), vertices_.end(), node), vertices_.end()); + delete node; +} + +// root nodes are all vertices with 0 in-degrees +std::vector ihipGraph::GetRootNodes() const { + std::vector roots; + for (auto entry : vertices_) { + if (entry->GetInDegree() == 0) { + roots.push_back(entry); + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] root node: %s(%p)\n", + GetGraphNodeTypeString(entry->GetType()), entry); + } + } + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "\n"); + return roots; +} + +// leaf nodes are all vertices with 0 out-degrees +std::vector ihipGraph::GetLeafNodes() const { + std::vector leafNodes; + for (auto entry : vertices_) { + if (entry->GetOutDegree() == 0) { + leafNodes.push_back(entry); + } + } + return leafNodes; +} + +size_t ihipGraph::GetLeafNodeCount() const { + int numLeafNodes = 0; + for (auto entry : vertices_) { + if (entry->GetOutDegree() == 0) { + numLeafNodes++; + } + } + return numLeafNodes; +} + +std::vector> ihipGraph::GetEdges() const { + std::vector> edges; + for (const auto& i : vertices_) { + for (const auto& j : i->GetEdges()) { + edges.push_back(std::make_pair(i, j)); + } + } + return edges; +} + +void ihipGraph::GetRunListUtil(Node v, std::unordered_map& visited, + std::vector& singleList, + std::vector>& parallelLists, + std::unordered_map>& dependencies) { + // Mark the current node as visited. + visited[v] = true; + singleList.push_back(v); + // Recurse for all the vertices adjacent to this vertex + for (auto& adjNode : v->GetEdges()) { + if (!visited[adjNode]) { + // For the parallel list nodes add parent as the dependency + if (singleList.empty()) { + ClPrint(amd::LOG_INFO, amd::LOG_CODE, + "[hipGraph] For %s(%p)- add parent as dependency %s(%p)\n", + GetGraphNodeTypeString(adjNode->GetType()), adjNode, + GetGraphNodeTypeString(v->GetType()), v); + dependencies[adjNode].push_back(v); + } + GetRunListUtil(adjNode, visited, singleList, parallelLists, dependencies); + } else { + for (auto& list : parallelLists) { + // Merge singleList when adjNode matches with the first element of the list in existing + // lists + if (adjNode == list[0]) { + for (auto k = singleList.rbegin(); k != singleList.rend(); ++k) { + list.insert(list.begin(), *k); + } + singleList.erase(singleList.begin(), singleList.end()); + } + } + // If the list cannot be merged with the existing list add as dependancy + if (!singleList.empty()) { + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] For %s(%p)- add dependency %s(%p)\n", + GetGraphNodeTypeString(adjNode->GetType()), adjNode, + GetGraphNodeTypeString(v->GetType()), v); + dependencies[adjNode].push_back(v); + } + } + } + if (!singleList.empty()) { + parallelLists.push_back(singleList); + singleList.erase(singleList.begin(), singleList.end()); + } +} +// The function to do Topological Sort. +// It uses recursive GetRunListUtil() +void ihipGraph::GetRunList(std::vector>& parallelLists, + std::unordered_map>& dependencies) { + std::vector singleList; + + // Mark all the vertices as not visited + std::unordered_map visited; + for (auto node : vertices_) visited[node] = false; + + // Call the recursive helper function for all vertices one by one + for (auto node : vertices_) { + // If the node has embedded child graph + node->GetRunList(parallelLists, dependencies); + if (visited[node] == false) { + GetRunListUtil(node, visited, singleList, parallelLists, dependencies); + } + } + for (size_t i = 0; i < parallelLists.size(); i++) { + for (size_t j = 0; j < parallelLists[i].size(); j++) { + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] list %d - %s(%p)\n", i + 1, + GetGraphNodeTypeString(parallelLists[i][j]->GetType()), parallelLists[i][j]); + } + } +} + +void ihipGraph::LevelOrder(std::vector& levelOrder) { + std::vector roots = GetRootNodes(); + std::unordered_map visited; + std::queue q; + for (auto it = roots.begin(); it != roots.end(); it++) { + q.push(*it); + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] %s(%p) level:%d \n", + GetGraphNodeTypeString((*it)->GetType()), *it, (*it)->GetLevel()); + } + while (!q.empty()) { + Node node = q.front(); + q.pop(); + levelOrder.push_back(node); + for (const auto& i : node->GetEdges()) { + if (visited.find(i) == visited.end() && i->GetLevel() == (node->GetLevel() + 1)) { + q.push(i); + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] %s(%p) level:%d \n", + GetGraphNodeTypeString(i->GetType()), i, i->GetLevel()); + visited[i] = true; + } + } + } +} + +ihipGraph* ihipGraph::clone(std::unordered_map& clonedNodes) const { + ihipGraph* newGraph = new ihipGraph(device_, this); + for (auto entry : vertices_) { + hipGraphNode* node = entry->clone(); + node->SetParentGraph(newGraph); + newGraph->vertices_.push_back(node); + clonedNodes[entry] = node; + } + + std::vector clonedEdges; + std::vector clonedDependencies; + for (auto node : vertices_) { + const std::vector& edges = node->GetEdges(); + clonedEdges.clear(); + for (auto edge : edges) { + clonedEdges.push_back(clonedNodes[edge]); + } + clonedNodes[node]->SetEdges(clonedEdges); + } + for (auto node : vertices_) { + const std::vector& dependencies = node->GetDependencies(); + clonedDependencies.clear(); + for (auto dep : dependencies) { + clonedDependencies.push_back(clonedNodes[dep]); + } + clonedNodes[node]->SetDependencies(clonedDependencies); + } + return newGraph; +} + +ihipGraph* ihipGraph::clone() const { + std::unordered_map clonedNodes; + return clone(clonedNodes); +} + +bool hipGraphExec::isGraphExecValid(hipGraphExec* pGraphExec) { + amd::ScopedLock lock(graphExecSetLock_); + if (graphExecSet_.find(pGraphExec) == graphExecSet_.end()) { + return false; + } + return true; +} + +hipError_t hipGraphExec::CreateStreams(uint32_t num_streams) { + parallel_streams_.reserve(num_streams); + for (uint32_t i = 0; i < num_streams; ++i) { + auto stream = new hip::Stream(hip::getCurrentDevice(), + hip::Stream::Priority::Normal, hipStreamNonBlocking); + if (stream == nullptr || !stream->Create()) { + if (stream != nullptr) { + stream->release(); + } + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to create parallel stream!\n"); + return hipErrorOutOfMemory; + } + parallel_streams_.push_back(stream); + } + return hipSuccess; +} + +hipError_t hipGraphExec::Init() { + hipError_t status; + size_t min_num_streams = 1; + + for (auto& node : levelOrder_) { + min_num_streams += node->GetNumParallelStreams(); + } + status = CreateStreams(parallelLists_.size() - 1 + min_num_streams); + return status; +} + +hipError_t FillCommands(std::vector>& parallelLists, + std::unordered_map>& nodeWaitLists, + std::vector& levelOrder, std::vector& rootCommands, + amd::Command*& endCommand, hip::Stream* stream) { + hipError_t status; + for (auto& node : levelOrder) { + // TODO: clone commands from next launch + status = node->CreateCommand(node->GetQueue()); + if (status != hipSuccess) return status; + amd::Command::EventWaitList waitList; + for (auto depNode : nodeWaitLists[node]) { + for (auto command : depNode->GetCommands()) { + waitList.push_back(command); + } + } + node->UpdateEventWaitLists(waitList); + } + // rootCommand ensures graph is started (all parallel branches) after all the previous work is + // finished + bool first = true; + for (auto& singleList : parallelLists) { + if (first) { + first = false; + continue; + } + // marker from the same queue as the list + amd::Command* rootCommand = new amd::Marker(*singleList[0]->GetQueue(), false, {}); + amd::Command::EventWaitList waitList; + waitList.push_back(rootCommand); + if (!singleList.empty()) { + auto commands = singleList[0]->GetCommands(); + if (!commands.empty()) { + commands[0]->updateEventWaitList(waitList); + rootCommands.push_back(rootCommand); + } + } + } + // endCommand ensures next enqueued ones start after graph is finished (all parallel branches) + amd::Command::EventWaitList graphLastCmdWaitList; + first = true; + for (auto& singleList : parallelLists) { + if (first) { + first = false; + continue; + } + if (!singleList.empty()) { + auto commands = singleList.back()->GetCommands(); + if (!commands.empty()) { + graphLastCmdWaitList.push_back(commands.back()); + } + } + } + if (!graphLastCmdWaitList.empty()) { + endCommand = new amd::Marker(*stream, false, graphLastCmdWaitList); + if (endCommand == nullptr) { + return hipErrorOutOfMemory; + } + } + return hipSuccess; +} + +void UpdateStream(std::vector>& parallelLists, hip::Stream* stream, + hipGraphExec* ptr) { + int i = 0; + for (const auto& list : parallelLists) { + // first parallel list will be launched on the same queue as parent + if (i == 0) { + for (auto& node : list) { + node->SetStream(stream, ptr); + } + } else { // New stream for parallel branches + hip::Stream* stream = ptr->GetAvailableStreams(); + for (auto& node : list) { + node->SetStream(stream, ptr); + } + } + i++; + } +} + +hipError_t hipGraphExec::Run(hipStream_t stream) { + hipError_t status; + + if (hip::getStream(stream) == nullptr) { + return hipErrorInvalidResourceHandle; + } + if (flags_ == hipGraphInstantiateFlagAutoFreeOnLaunch) { + if (!levelOrder_.empty()) { + levelOrder_[0]->GetParentGraph()->FreeAllMemory(); + } + } + + // If this is a repeat launch, make sure corresponding MemFreeNode exists for a MemAlloc node + if (repeatLaunch_ == true) { + for (auto& node : levelOrder_) { + if (node->GetType() == hipGraphNodeTypeMemAlloc && + static_cast(node)->IsActiveMem() == true) { + return hipErrorInvalidValue; + } + } + } + else { + repeatLaunch_ = true; + } + + auto hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() + : reinterpret_cast(stream); + UpdateStream(parallelLists_, hip_stream, this); + std::vector rootCommands; + amd::Command* endCommand = nullptr; + status = + FillCommands(parallelLists_, nodeWaitLists_, levelOrder_, rootCommands, endCommand, hip_stream); + if (status != hipSuccess) { + return status; + } + for (auto& cmd : rootCommands) { + cmd->enqueue(); + cmd->release(); + } + for (auto& node : levelOrder_) { + node->EnqueueCommands(stream); + } + if (endCommand != nullptr) { + endCommand->enqueue(); + endCommand->release(); + } + ResetQueueIndex(); + return status; +} diff --git a/projects/clr/hipamd/src/hip_graph_internal.hpp b/projects/clr/hipamd/src/hip_graph_internal.hpp new file mode 100644 index 0000000000..d3cac3a09f --- /dev/null +++ b/projects/clr/hipamd/src/hip_graph_internal.hpp @@ -0,0 +1,1929 @@ +/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_graph_helper.hpp" +#include "hip_event.hpp" +#include "hip_platform.hpp" +#include "hip_mempool_impl.hpp" + +typedef hipGraphNode* Node; +hipError_t FillCommands(std::vector>& parallelLists, + std::unordered_map>& nodeWaitLists, + std::vector& levelOrder, std::vector& rootCommands, + amd::Command*& endCommand, hip::Stream* stream); +void UpdateStream(std::vector>& parallelLists, hip::Stream* stream, + hipGraphExec* ptr); + +struct hipUserObject : public amd::ReferenceCountedObject { + typedef void (*UserCallbackDestructor)(void* data); + static std::unordered_set ObjectSet_; + static amd::Monitor UserObjectLock_; + + public: + hipUserObject(UserCallbackDestructor callback, void* data, unsigned int flags) + : ReferenceCountedObject(), callback_(callback), data_(data), flags_(flags) { + amd::ScopedLock lock(UserObjectLock_); + ObjectSet_.insert(this); + } + + virtual ~hipUserObject() { + amd::ScopedLock lock(UserObjectLock_); + if (callback_ != nullptr) { + callback_(data_); + } + ObjectSet_.erase(this); + } + + void increaseRefCount(const unsigned int refCount) { + for (uint32_t i = 0; i < refCount; i++) { + retain(); + } + } + + void decreaseRefCount(const unsigned int refCount) { + assert((refCount <= referenceCount()) && "count is bigger than refcount"); + for (uint32_t i = 0; i < refCount; i++) { + release(); + } + } + + static bool isUserObjvalid(hipUserObject* pUsertObj) { + auto it = ObjectSet_.find(pUsertObj); + if (it == ObjectSet_.end()) { + return false; + } + return true; + } + + static void removeUSerObj(hipUserObject* pUsertObj) { + amd::ScopedLock lock(UserObjectLock_); + auto it = ObjectSet_.find(pUsertObj); + if (it != ObjectSet_.end()) { + ObjectSet_.erase(it); + } + } + + private: + UserCallbackDestructor callback_; + void* data_; + unsigned int flags_; + //! Disable default operator= + hipUserObject& operator=(const hipUserObject&) = delete; + //! Disable copy constructor + hipUserObject(const hipUserObject& obj) = delete; +}; + +struct hipGraphNodeDOTAttribute { + protected: + std::string style_; + std::string shape_; + std::string label_; + + hipGraphNodeDOTAttribute(std::string style, std::string shape, std::string label) { + style_ = style; + shape_ = shape; + label_ = label; + } + + hipGraphNodeDOTAttribute() { + style_ = "solid"; + shape_ = "rectangle"; + label_ = ""; + } + + hipGraphNodeDOTAttribute(const hipGraphNodeDOTAttribute& node) { + style_ = node.style_; + shape_ = node.shape_; + label_ = node.label_; + } + + void SetStyle(std::string style) { style_ = style; } + + void SetShape(std::string shape) { shape_ = shape; } + + virtual std::string GetShape(hipGraphDebugDotFlags flag) { return shape_; } + + void SetLabel(std::string label) { label_ = label; } + + virtual std::string GetLabel(hipGraphDebugDotFlags flag) { return label_; } + + virtual void PrintAttributes(std::ostream& out, hipGraphDebugDotFlags flag) { + out << "["; + out << "style"; + out << "=\""; + out << style_; + out << "\""; + out << "shape"; + out << "=\""; + out << GetShape(flag); + out << "\""; + out << "label"; + out << "=\""; + out << GetLabel(flag); + out << "\""; + out << "];"; + } +}; + +struct hipGraphNode : public hipGraphNodeDOTAttribute { + protected: + hip::Stream* stream_ = nullptr; + uint32_t level_; + unsigned int id_; + hipGraphNodeType type_; + std::vector commands_; + std::vector edges_; + std::vector dependencies_; + bool visited_; + // count of in coming edges + size_t inDegree_; + // count of outgoing edges + size_t outDegree_; + static int nextID; + struct ihipGraph* parentGraph_; + static std::unordered_set nodeSet_; + static amd::Monitor nodeSetLock_; + unsigned int isEnabled_; + + public: + hipGraphNode(hipGraphNodeType type, std::string style = "", std::string shape = "", + std::string label = "") + : type_(type), + level_(0), + visited_(false), + inDegree_(0), + outDegree_(0), + id_(nextID++), + parentGraph_(nullptr), + isEnabled_(1), + hipGraphNodeDOTAttribute(style, shape, label) { + amd::ScopedLock lock(nodeSetLock_); + nodeSet_.insert(this); + } + /// Copy Constructor + hipGraphNode(const hipGraphNode& node) : hipGraphNodeDOTAttribute(node) { + level_ = node.level_; + type_ = node.type_; + inDegree_ = node.inDegree_; + outDegree_ = node.outDegree_; + visited_ = false; + id_ = node.id_; + parentGraph_ = nullptr; + amd::ScopedLock lock(nodeSetLock_); + nodeSet_.insert(this); + isEnabled_ = node.isEnabled_; + } + + virtual ~hipGraphNode() { + for (auto node : edges_) { + node->RemoveDependency(this); + } + for (auto node : dependencies_) { + node->RemoveEdge(this); + } + amd::ScopedLock lock(nodeSetLock_); + nodeSet_.erase(this); + } + + // check node validity + static bool isNodeValid(hipGraphNode* pGraphNode) { + amd::ScopedLock lock(nodeSetLock_); + if (pGraphNode == nullptr || nodeSet_.find(pGraphNode) == nodeSet_.end()) { + return false; + } + return true; + } + + hip::Stream* GetQueue() { return stream_; } + + virtual void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) { + stream_ = stream; + } + /// Create amd::command for the graph node + virtual hipError_t CreateCommand(hip::Stream* stream) { + commands_.clear(); + stream_ = stream; + return hipSuccess; + } + /// Return node unique ID + int GetID() const { return id_; } + /// Returns command for graph node + virtual std::vector& GetCommands() { return commands_; } + /// Returns graph node type + hipGraphNodeType GetType() const { return type_; } + /// Returns graph node in coming edges + uint32_t GetLevel() const { return level_; } + /// Set graph node level + void SetLevel(uint32_t level) { level_ = level; } + /// Clone graph node + virtual hipGraphNode* clone() const = 0; + /// Returns graph node indegree + size_t GetInDegree() const { return inDegree_; } + /// Updates indegree of the node + void SetInDegree(size_t inDegree) { inDegree_ = inDegree; } + /// Returns graph node outdegree + size_t GetOutDegree() const { return outDegree_; } + /// Updates outdegree of the node + void SetOutDegree(size_t outDegree) { outDegree_ = outDegree; } + /// Returns graph node dependencies + const std::vector& GetDependencies() const { return dependencies_; } + /// Update graph node dependecies + void SetDependencies(std::vector& dependencies) { + for (auto entry : dependencies) { + dependencies_.push_back(entry); + } + } + /// Add graph node dependency + void AddDependency(const Node& node) { dependencies_.push_back(node); } + /// Remove graph node dependency + void RemoveDependency(const Node& node) { + dependencies_.erase(std::remove(dependencies_.begin(), dependencies_.end(), node), + dependencies_.end()); + } + void RemoveEdge(const Node& childNode) { + edges_.erase(std::remove(edges_.begin(), edges_.end(), childNode), edges_.end()); + } + /// Return graph node children + const std::vector& GetEdges() const { return edges_; } + /// Updates graph node children + void SetEdges(std::vector& edges) { + for (auto entry : edges) { + edges_.push_back(entry); + } + } + /// Update level, for existing edges + void UpdateEdgeLevel() { + for (auto edge : edges_) { + edge->SetLevel(std::max(edge->GetLevel(), GetLevel() + 1)); + edge->UpdateEdgeLevel(); + } + } + void ReduceEdgeLevel() { + for (auto edge: edges_) { + edge->SetLevel(std::min(edge->GetLevel(),GetLevel() + 1)); + edge->ReduceEdgeLevel(); + } + } + /// Add edge, update parent node outdegree, child node indegree, level and dependency + void AddEdge(const Node& childNode) { + edges_.push_back(childNode); + outDegree_++; + childNode->SetInDegree(childNode->GetInDegree() + 1); + childNode->SetLevel(std::max(childNode->GetLevel(), GetLevel() + 1)); + childNode->UpdateEdgeLevel(); + childNode->AddDependency(this); + } + /// Remove edge, update parent node outdegree, child node indegree, level and dependency + bool RemoveUpdateEdge(const Node& childNode) { + // std::remove changes the end() hence saving it before hand for validation + auto currEdgeEnd = edges_.end(); + auto it = std::remove(edges_.begin(), edges_.end(), childNode); + if (it == currEdgeEnd) { + // Should come here if childNode is not present in the edge list + return false; + } + edges_.erase(it, edges_.end()); + outDegree_--; + childNode->SetInDegree(childNode->GetInDegree() - 1); + childNode->RemoveDependency(this); + const std::vector& dependencies = childNode->GetDependencies(); + int32_t level = 0; + int32_t parentLevel = 0; + uint32_t origLevel = 0; + for (auto parent : dependencies) { + parentLevel = parent->GetLevel(); + level = std::max(level, (parentLevel + 1)); + } + origLevel = childNode->GetLevel(); + childNode->SetLevel(level); + if (level < origLevel) { + childNode->ReduceEdgeLevel(); + } + return true; + } + /// Get Runlist of the nodes embedded as part of the graphnode(e.g. ChildGraph) + virtual void GetRunList(std::vector>& parallelList, + std::unordered_map>& dependencies) {} + /// Get levelorder of the nodes embedded as part of the graphnode(e.g. ChildGraph) + virtual void LevelOrder(std::vector& levelOrder) {} + /// Update waitlist of the nodes embedded as part of the graphnode(e.g. ChildGraph) + virtual void UpdateEventWaitLists(amd::Command::EventWaitList waitList) { + for (auto command : commands_) { + command->updateEventWaitList(waitList); + } + } + virtual size_t GetNumParallelStreams() { return 0; } + /// Enqueue commands part of the node + virtual void EnqueueCommands(hipStream_t stream) { + // If the node is disabled it becomes empty node. To maintain ordering just enqueue marker. + // Node can be enabled/disabled only for kernel, memcpy and memset nodes. + if (!isEnabled_ && + (type_ == hipGraphNodeTypeKernel || type_ == hipGraphNodeTypeMemcpy || + type_ == hipGraphNodeTypeMemset)) { + amd::Command::EventWaitList waitList; + hip::Stream* hip_stream = hip::getStream(stream); + amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList); + command->enqueue(); + command->release(); + return; + } + for (auto& command : commands_) { + command->enqueue(); + command->release(); + } + } + ihipGraph* GetParentGraph() { return parentGraph_; } + virtual ihipGraph* GetChildGraph() { return nullptr; } + void SetParentGraph(ihipGraph* graph) { parentGraph_ = graph; } + virtual hipError_t SetParams(hipGraphNode* node) { return hipSuccess; } + virtual void GenerateDOT(std::ostream& fout, hipGraphDebugDotFlags flag) {} + virtual void GenerateDOTNode(size_t graphId, std::ostream& fout, hipGraphDebugDotFlags flag) { + fout << "\n"; + std::string nodeName = "graph_" + std::to_string(graphId) + "_node_" + std::to_string(GetID()); + fout << "\"" << nodeName << "\""; + PrintAttributes(fout, flag); + fout << "\n"; + } + virtual void GenerateDOTNodeEdges(size_t graphId, std::ostream& fout, + hipGraphDebugDotFlags flag) { + for (auto node : edges_) { + std::string toNodeName = + "graph_" + std::to_string(graphId) + "_node_" + std::to_string(node->GetID()); + std::string fromNodeName = + "graph_" + std::to_string(graphId) + "_node_" + std::to_string(GetID()); + fout << "\"" << fromNodeName << "\" -> \"" << toNodeName << "\"" << std::endl; + } + } + virtual std::string GetLabel(hipGraphDebugDotFlags flag) { return (std::to_string(id_) + "\n" + label_); } + unsigned int GetEnabled() const { return isEnabled_; } + void SetEnabled(unsigned int isEnabled) { isEnabled_ = isEnabled; } +}; + +struct ihipGraph { + std::vector vertices_; + const ihipGraph* pOriginalGraph_ = nullptr; + static std::unordered_set graphSet_; + static amd::Monitor graphSetLock_; + std::unordered_set graphUserObj_; + unsigned int id_; + static int nextID; + hip::Device* device_; //!< HIP device object + hip::MemoryPool* mem_pool_; //!< Memory pool, associated with this graph + std::unordered_set capturedNodes_; + + public: + ihipGraph(hip::Device* device, const ihipGraph* original = nullptr) + : pOriginalGraph_(original) + , id_(nextID++) + , device_(device) { + amd::ScopedLock lock(graphSetLock_); + graphSet_.insert(this); + mem_pool_ = device->GetGraphMemoryPool(); + mem_pool_->retain(); + } + + ~ihipGraph() { + for (auto node : vertices_) { + delete node; + } + amd::ScopedLock lock(graphSetLock_); + graphSet_.erase(this); + for (auto userobj : graphUserObj_) { + userobj->release(); + } + if (mem_pool_ != nullptr) { + mem_pool_->release(); + } + + } + + void AddManualNodeDuringCapture(hipGraphNode* node) { capturedNodes_.insert(node); } + + std::unordered_set GetManualNodesDuringCapture() { return capturedNodes_; } + + void RemoveManualNodesDuringCapture() { + capturedNodes_.erase(capturedNodes_.begin(), capturedNodes_.end()); + } + + /// Return graph unique ID + int GetID() const { return id_; } + + // check graphs validity + static bool isGraphValid(ihipGraph* pGraph); + + /// add node to the graph + void AddNode(const Node& node); + void RemoveNode(const Node& node); + /// Returns root nodes, all vertices with 0 in-degrees + std::vector GetRootNodes() const; + /// Returns leaf nodes, all vertices with 0 out-degrees + std::vector GetLeafNodes() const; + /// Returns number of leaf nodes + size_t GetLeafNodeCount() const; + /// Returns total numbers of nodes in the graph + size_t GetNodeCount() const { return vertices_.size(); } + /// returns all the nodes in the graph + const std::vector& GetNodes() const { return vertices_; } + /// returns all the edges in the graph + std::vector> GetEdges() const; + // returns the original graph ptr if cloned + const ihipGraph* getOriginalGraph() const { return pOriginalGraph_; } + // Add user obj resource to graph + void addUserObjGraph(hipUserObject* pUserObj) { + amd::ScopedLock lock(graphSetLock_); + graphUserObj_.insert(pUserObj); + } + // Check user obj resource from graph is valid + bool isUserObjGraphValid(hipUserObject* pUserObj) { + if (graphUserObj_.find(pUserObj) == graphUserObj_.end()) { + return false; + } + return true; + } + // Delete user obj resource from graph + void RemoveUserObjGraph(hipUserObject* pUserObj) { graphUserObj_.erase(pUserObj); } + + void GetRunListUtil(Node v, std::unordered_map& visited, + std::vector& singleList, std::vector>& parallelLists, + std::unordered_map>& dependencies); + void GetRunList(std::vector>& parallelLists, + std::unordered_map>& dependencies); + void LevelOrder(std::vector& levelOrder); + void GetUserObjs(std::unordered_set& graphExeUserObjs) { + for (auto userObj : graphUserObj_) { + userObj->retain(); + graphExeUserObjs.insert(userObj); + } + } + ihipGraph* clone(std::unordered_map& clonedNodes) const; + ihipGraph* clone() const; + void GenerateDOT(std::ostream& fout, hipGraphDebugDotFlags flag) { + fout << "subgraph cluster_" << GetID() << " {" << std::endl; + fout << "label=\"graph_" << GetID() <<"\"graph[style=\"dashed\"];\n"; + for (auto node : vertices_) { + node->GenerateDOTNode(GetID(), fout, flag); + } + fout << "\n"; + for (auto& node : vertices_) { + node->GenerateDOTNodeEdges(GetID(), fout, flag); + } + fout << "}" << std::endl; + for (auto node : vertices_) { + node->GenerateDOT(fout, flag); + } + } + + void* AllocateMemory(size_t size, hip::Stream* stream, void* dptr) const { + auto ptr = mem_pool_->AllocateMemory(size, stream, dptr); + return ptr; + } + + void FreeMemory(void* dev_ptr, hip::Stream* stream) const { + size_t offset = 0; + auto memory = getMemoryObject(dev_ptr, offset); + if (memory != nullptr) { + auto device_id = memory->getUserData().deviceId; + if (!g_devices[device_id]->FreeMemory(memory, stream)) { + LogError("Memory didn't belong to any pool!"); + } + } + } + + bool ProbeMemory(void* dev_ptr) const { + size_t offset = 0; + auto memory = getMemoryObject(dev_ptr, offset); + if (memory != nullptr) { + return mem_pool_->IsBusyMemory(memory); + } + return false; + } + + void FreeAllMemory() { + mem_pool_->FreeAllMemory(); + } +}; + +struct hipGraphExec { + std::vector> parallelLists_; + // level order of the graph doesn't include nodes embedded as part of the child graph + std::vector levelOrder_; + std::unordered_map> nodeWaitLists_; + std::vector parallel_streams_; + uint currentQueueIndex_; + std::unordered_map clonedNodes_; + amd::Command* lastEnqueuedCommand_; + static std::unordered_set graphExecSet_; + std::unordered_set graphExeUserObj_; + static amd::Monitor graphExecSetLock_; + uint64_t flags_ = 0; + bool repeatLaunch_ = false; + public: + hipGraphExec(std::vector& levelOrder, std::vector>& lists, + std::unordered_map>& nodeWaitLists, + std::unordered_map& clonedNodes, + std::unordered_set& userObjs, + uint64_t flags = 0) + : parallelLists_(lists), + levelOrder_(levelOrder), + nodeWaitLists_(nodeWaitLists), + clonedNodes_(clonedNodes), + lastEnqueuedCommand_(nullptr), + graphExeUserObj_(userObjs), + currentQueueIndex_(0), + flags_(flags) { + amd::ScopedLock lock(graphExecSetLock_); + graphExecSet_.insert(this); + } + + ~hipGraphExec() { + // new commands are launched for every launch they are destroyed as and when command is + // terminated after it complete execution + for (auto stream : parallel_streams_) { + if (stream != nullptr) { + stream->release(); + } + } + for (auto it = clonedNodes_.begin(); it != clonedNodes_.end(); it++) delete it->second; + amd::ScopedLock lock(graphExecSetLock_); + for (auto userobj : graphExeUserObj_) { + userobj->release(); + } + graphExecSet_.erase(this); + } + + Node GetClonedNode(Node node) { + Node clonedNode; + if (clonedNodes_.find(node) == clonedNodes_.end()) { + return nullptr; + } else { + clonedNode = clonedNodes_[node]; + } + return clonedNode; + } + + // check executable graphs validity + static bool isGraphExecValid(hipGraphExec* pGraphExec); + + std::vector& GetNodes() { return levelOrder_; } + + hip::Stream* GetAvailableStreams() { return parallel_streams_[currentQueueIndex_++]; } + void ResetQueueIndex() { currentQueueIndex_ = 0; } + hipError_t Init(); + hipError_t CreateStreams(uint32_t num_streams); + hipError_t Run(hipStream_t stream); +}; + +struct hipChildGraphNode : public hipGraphNode { + struct ihipGraph* childGraph_; + std::vector childGraphlevelOrder_; + std::vector> parallelLists_; + std::unordered_map> nodeWaitLists_; + amd::Command* lastEnqueuedCommand_; + + public: + hipChildGraphNode(ihipGraph* g) : hipGraphNode(hipGraphNodeTypeGraph, "solid", "rectangle") { + childGraph_ = g->clone(); + lastEnqueuedCommand_ = nullptr; + } + + ~hipChildGraphNode() { delete childGraph_; } + + hipChildGraphNode(const hipChildGraphNode& rhs) : hipGraphNode(rhs) { + childGraph_ = rhs.childGraph_->clone(); + } + + hipGraphNode* clone() const { + return new hipChildGraphNode(static_cast(*this)); + } + + ihipGraph* GetChildGraph() { return childGraph_; } + + size_t GetNumParallelStreams() { + LevelOrder(childGraphlevelOrder_); + size_t num = 0; + for (auto& node : childGraphlevelOrder_) { + num += node->GetNumParallelStreams(); + } + // returns total number of parallel queues required for child graph nodes to be launched + // first parallel list will be launched on the same queue as parent + return num + (parallelLists_.size() - 1); + } + + void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) { + stream_ = stream; + UpdateStream(parallelLists_, stream, ptr); + } + + // For nodes that are dependent on the child graph node waitlist is the last node of the first + // parallel list + std::vector& GetCommands() { return parallelLists_[0].back()->GetCommands(); } + + // Create child graph node commands and set waitlists + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + commands_.reserve(2); + std::vector rootCommands; + amd::Command* endCommand = nullptr; + status = FillCommands(parallelLists_, nodeWaitLists_, childGraphlevelOrder_, rootCommands, + endCommand, stream); + for (auto& cmd : rootCommands) { + commands_.push_back(cmd); + } + if (endCommand != nullptr) { + commands_.push_back(endCommand); + } + return status; + } + + // + void UpdateEventWaitLists(amd::Command::EventWaitList waitList) { + parallelLists_[0].front()->UpdateEventWaitLists(waitList); + } + + void GetRunList(std::vector>& parallelList, + std::unordered_map>& dependencies) { + childGraph_->GetRunList(parallelLists_, nodeWaitLists_); + } + + void LevelOrder(std::vector& levelOrder) { childGraph_->LevelOrder(levelOrder); } + + void EnqueueCommands(hipStream_t stream) { + // enqueue child graph start command + if (commands_.size() == 1) { + commands_[0]->enqueue(); + commands_[0]->release(); + } + // enqueue nodes in child graph in level order + for (auto& node : childGraphlevelOrder_) { + node->EnqueueCommands(stream); + } + // enqueue child graph end command + if (commands_.size() == 2) { + commands_[1]->enqueue(); + commands_[1]->release(); + } + } + + hipError_t SetParams(const ihipGraph* childGraph) { + const std::vector& newNodes = childGraph->GetNodes(); + const std::vector& oldNodes = childGraph_->GetNodes(); + for (std::vector::size_type i = 0; i != newNodes.size(); i++) { + hipError_t status = oldNodes[i]->SetParams(newNodes[i]); + if (status != hipSuccess) { + return status; + } + } + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipChildGraphNode* childGraphNode = static_cast(node); + return SetParams(childGraphNode->childGraph_); + } + + std::string GetLabel(hipGraphDebugDotFlags flag) { + return std::to_string(GetID()) + "\n" + "graph_" + std::to_string(childGraph_->GetID()); + } + + virtual void GenerateDOT(std::ostream& fout, hipGraphDebugDotFlags flag) { + childGraph_->GenerateDOT(fout, flag); + } +}; + +class hipGraphKernelNode : public hipGraphNode { + hipKernelNodeParams* pKernelParams_; + unsigned int numParams_; + hipKernelNodeAttrValue kernelAttr_; + unsigned int kernelAttrInUse_; + + public: + void PrintAttributes(std::ostream& out, hipGraphDebugDotFlags flag) { + out << "["; + out << "style"; + out << "=\""; + out << style_; + (flag == hipGraphDebugDotFlagsKernelNodeParams || + flag == hipGraphDebugDotFlagsKernelNodeAttributes) ? + out << "\n" : out << "\""; + out << "shape"; + out << "=\""; + out << GetShape(flag); + out << "\""; + out << "label"; + out << "=\""; + out << GetLabel(flag); + out << "\""; + out << "];"; + } + + std::string GetLabel(hipGraphDebugDotFlags flag) { + hipFunction_t func = getFunc(*pKernelParams_, ihipGetDevice()); + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func); + std::string label; + char buffer[500]; + if (flag == hipGraphDebugDotFlagsVerbose) { + sprintf(buffer, + "{\n%s\n| {ID | %d | %s\\<\\<\\<(%u,%u,%u),(%u,%u,%u),%u\\>\\>\\>}\n| {{node " + "handle | func handle} | {%p | %p}}\n| {accessPolicyWindow | {base_ptr | num_bytes | " + "hitRatio | hitProp | missProp} | {%p | %ld | %f | %d | %d}}\n| {cooperative | " + "%u}\n| {priority | 0}\n}", + label_.c_str(), GetID(), function->name().c_str(), pKernelParams_->gridDim.x, + pKernelParams_->gridDim.y, pKernelParams_->gridDim.z, pKernelParams_->blockDim.x, + pKernelParams_->blockDim.y, pKernelParams_->blockDim.z, + pKernelParams_->sharedMemBytes, this, pKernelParams_->func, + kernelAttr_.accessPolicyWindow.base_ptr, kernelAttr_.accessPolicyWindow.num_bytes, + kernelAttr_.accessPolicyWindow.hitRatio, kernelAttr_.accessPolicyWindow.hitProp, + kernelAttr_.accessPolicyWindow.missProp, kernelAttr_.cooperative); + label = buffer; + } + else if (flag == hipGraphDebugDotFlagsKernelNodeAttributes) { + sprintf(buffer, + "{\n%s\n| {ID | %d | %s}\n" + "| {accessPolicyWindow | {base_ptr | num_bytes | " + "hitRatio | hitProp | missProp} |\n| {%p | %ld | %f | %d | %d}}\n| {cooperative | " + "%u}\n| {priority | 0}\n}", + label_.c_str(), GetID(), function->name().c_str(), + kernelAttr_.accessPolicyWindow.base_ptr, kernelAttr_.accessPolicyWindow.num_bytes, + kernelAttr_.accessPolicyWindow.hitRatio, kernelAttr_.accessPolicyWindow.hitProp, + kernelAttr_.accessPolicyWindow.missProp, kernelAttr_.cooperative); + label = buffer; + } + else if (flag == hipGraphDebugDotFlagsKernelNodeParams) { + sprintf(buffer, "%d\n%s\n\\<\\<\\<(%u,%u,%u),(%u,%u,%u),%u\\>\\>\\>", + GetID(), function->name().c_str(), pKernelParams_->gridDim.x, + pKernelParams_->gridDim.y, pKernelParams_->gridDim.z, + pKernelParams_->blockDim.x, pKernelParams_->blockDim.y, + pKernelParams_->blockDim.z, pKernelParams_->sharedMemBytes); + label = buffer; + } + else { + label = std::to_string(GetID()) + "\n" + function->name() + "\n"; + } + return label; + } + + std::string GetShape(hipGraphDebugDotFlags flag) { + if (flag == hipGraphDebugDotFlagsKernelNodeParams || flag == hipGraphDebugDotFlagsVerbose) { + return "record"; + } else { + return shape_; + } + } + + static hipFunction_t getFunc(const hipKernelNodeParams& params, unsigned int device) { + hipFunction_t func = nullptr; + hipError_t status = PlatformState::instance().getStatFunc(&func, params.func, device); + if (status == hipErrorInvalidSymbol) { + // capturehipExtModuleLaunchKernel() mixes host function with hipFunction_t, so we convert + // here. If it's wrong, later functions will fail + func = static_cast(params.func); + ClPrint(amd::LOG_INFO, amd::LOG_CODE, + "[hipGraph] capturehipExtModuleLaunchKernel() should be called", status); + } else if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] getStatFunc() failed with err %d", status); + } + return func; + } + + hipError_t copyParams(const hipKernelNodeParams* pNodeParams) { + hipFunction_t func = getFunc(*pNodeParams, ihipGetDevice()); + if (!func) { + return hipErrorInvalidDeviceFunction; + } + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func); + amd::Kernel* kernel = function->kernel(); + const amd::KernelSignature& signature = kernel->signature(); + numParams_ = signature.numParameters(); + + // Allocate/assign memory if params are passed part of 'kernelParams' + if (pNodeParams->kernelParams != nullptr) { + pKernelParams_->kernelParams = (void**)malloc(numParams_ * sizeof(void*)); + if (pKernelParams_->kernelParams == nullptr) { + return hipErrorOutOfMemory; + } + + for (uint32_t i = 0; i < numParams_; ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); + pKernelParams_->kernelParams[i] = malloc(desc.size_); + if (pKernelParams_->kernelParams[i] == nullptr) { + return hipErrorOutOfMemory; + } + ::memcpy(pKernelParams_->kernelParams[i], (pNodeParams->kernelParams[i]), desc.size_); + } + } + + // Allocate/assign memory if params are passed as part of 'extra' + else if (pNodeParams->extra != nullptr) { + // 'extra' is a struct that contains the following info: { + // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, + // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, + // HIP_LAUNCH_PARAM_END } + unsigned int numExtra = 5; + pKernelParams_->extra = (void**)malloc(numExtra * sizeof(void*)); + if (pKernelParams_->extra == nullptr) { + return hipErrorOutOfMemory; + } + pKernelParams_->extra[0] = pNodeParams->extra[0]; + size_t kernargs_size = *((size_t*)pNodeParams->extra[3]); + pKernelParams_->extra[1] = malloc(kernargs_size); + if (pKernelParams_->extra[1] == nullptr) { + return hipErrorOutOfMemory; + } + pKernelParams_->extra[2] = pNodeParams->extra[2]; + pKernelParams_->extra[3] = malloc(sizeof(void*)); + if (pKernelParams_->extra[3] == nullptr) { + return hipErrorOutOfMemory; + } + *((size_t*)pKernelParams_->extra[3]) = kernargs_size; + ::memcpy(pKernelParams_->extra[1], (pNodeParams->extra[1]), kernargs_size); + pKernelParams_->extra[4] = pNodeParams->extra[4]; + } + return hipSuccess; + } + + hipGraphKernelNode(const hipKernelNodeParams* pNodeParams) + : hipGraphNode(hipGraphNodeTypeKernel, "bold", "octagon", "KERNEL") { + pKernelParams_ = new hipKernelNodeParams(*pNodeParams); + if (copyParams(pNodeParams) != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to copy params"); + } + memset(&kernelAttr_, 0, sizeof(kernelAttr_)); + kernelAttrInUse_ = 0; + } + + ~hipGraphKernelNode() { freeParams(); } + + void freeParams() { + // Deallocate memory allocated for kernargs passed via 'kernelParams' + if (pKernelParams_->kernelParams != nullptr) { + for (size_t i = 0; i < numParams_; ++i) { + if (pKernelParams_->kernelParams[i] != nullptr) { + free(pKernelParams_->kernelParams[i]); + } + pKernelParams_->kernelParams[i] = nullptr; + } + free(pKernelParams_->kernelParams); + pKernelParams_->kernelParams = nullptr; + } + // Deallocate memory allocated for kernargs passed via 'extra' + else { + free(pKernelParams_->extra[1]); + free(pKernelParams_->extra[3]); + memset(pKernelParams_->extra, 0, 5 * sizeof(pKernelParams_->extra[0])); // 5 items + free(pKernelParams_->extra); + pKernelParams_->extra = nullptr; + } + delete pKernelParams_; + pKernelParams_ = nullptr; + } + + hipGraphKernelNode(const hipGraphKernelNode& rhs) : hipGraphNode(rhs) { + pKernelParams_ = new hipKernelNodeParams(*rhs.pKernelParams_); + hipError_t status = copyParams(rhs.pKernelParams_); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to allocate memory to copy params"); + } + memset(&kernelAttr_, 0, sizeof(kernelAttr_)); + kernelAttrInUse_ = 0; + status = CopyAttr(&rhs); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to during copy attrs"); + } + } + + hipGraphNode* clone() const { + return new hipGraphKernelNode(static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipFunction_t func = nullptr; + hipError_t status = validateKernelParams(pKernelParams_, &func, + stream ? hip::getDeviceID(stream->context()) : -1); + if (hipSuccess != status) { + return status; + } + status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + commands_.reserve(1); + amd::Command* command; + status = ihipLaunchKernelCommand( + command, func, pKernelParams_->gridDim.x * pKernelParams_->blockDim.x, + pKernelParams_->gridDim.y * pKernelParams_->blockDim.y, + pKernelParams_->gridDim.z * pKernelParams_->blockDim.z, pKernelParams_->blockDim.x, + pKernelParams_->blockDim.y, pKernelParams_->blockDim.z, pKernelParams_->sharedMemBytes, + stream, pKernelParams_->kernelParams, pKernelParams_->extra, nullptr, nullptr, 0, 0, 0, 0, 0, + 0, 0); + commands_.emplace_back(command); + return status; + } + + void GetParams(hipKernelNodeParams* params) { *params = *pKernelParams_; } + + hipError_t SetParams(const hipKernelNodeParams* params) { + // updates kernel params + hipError_t status = validateKernelParams(params); + if (hipSuccess != status) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to validateKernelParams"); + return status; + } + if (pKernelParams_ && + ((pKernelParams_->kernelParams && pKernelParams_->kernelParams == params->kernelParams) || + (pKernelParams_->extra && pKernelParams_->extra == params->extra))) { + // params is copied from pKernelParams_ and then updated, so just copy it back + *pKernelParams_ = *params; + return status; + } + freeParams(); + pKernelParams_ = new hipKernelNodeParams(*params); + status = copyParams(params); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to set params"); + } + return status; + } + + hipError_t SetAttrParams(hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* params) { + // updates kernel attr params + if (attr == hipKernelNodeAttributeAccessPolicyWindow) { + if (params->accessPolicyWindow.hitRatio > 1) { + return hipErrorInvalidValue; + } + if (params->accessPolicyWindow.missProp == hipAccessPropertyPersisting) { + return hipErrorInvalidValue; + } + if (params->accessPolicyWindow.num_bytes > 0 && params->accessPolicyWindow.hitRatio == 0) { + return hipErrorInvalidValue; + } + kernelAttr_.accessPolicyWindow.base_ptr = params->accessPolicyWindow.base_ptr; + kernelAttr_.accessPolicyWindow.hitProp = params->accessPolicyWindow.hitProp; + kernelAttr_.accessPolicyWindow.hitRatio = params->accessPolicyWindow.hitRatio; + kernelAttr_.accessPolicyWindow.missProp = params->accessPolicyWindow.missProp; + kernelAttr_.accessPolicyWindow.num_bytes = params->accessPolicyWindow.num_bytes; + } else if (attr == hipKernelNodeAttributeCooperative) { + kernelAttr_.cooperative = params->cooperative; + } + kernelAttrInUse_ = attr; + return hipSuccess; + } + hipError_t GetAttrParams(hipKernelNodeAttrID attr, hipKernelNodeAttrValue* params) { + // Get kernel attr params + if (kernelAttrInUse_ != 0 && kernelAttrInUse_ != attr) return hipErrorInvalidValue; + if (attr == hipKernelNodeAttributeAccessPolicyWindow) { + params->accessPolicyWindow.base_ptr = kernelAttr_.accessPolicyWindow.base_ptr; + params->accessPolicyWindow.hitProp = kernelAttr_.accessPolicyWindow.hitProp; + params->accessPolicyWindow.hitRatio = kernelAttr_.accessPolicyWindow.hitRatio; + params->accessPolicyWindow.missProp = kernelAttr_.accessPolicyWindow.missProp; + params->accessPolicyWindow.num_bytes = kernelAttr_.accessPolicyWindow.num_bytes; + } else if (attr == hipKernelNodeAttributeCooperative) { + params->cooperative = kernelAttr_.cooperative; + } + return hipSuccess; + } + hipError_t CopyAttr(const hipGraphKernelNode* srcNode) { + if (kernelAttrInUse_ == 0 && srcNode->kernelAttrInUse_ == 0) { + return hipSuccess; + } + if (kernelAttrInUse_ != 0 && srcNode->kernelAttrInUse_ != kernelAttrInUse_) { + return hipErrorInvalidContext; + } + kernelAttrInUse_ = srcNode->kernelAttrInUse_; + switch (srcNode->kernelAttrInUse_) { + case hipKernelNodeAttributeAccessPolicyWindow: + kernelAttr_.accessPolicyWindow.base_ptr = srcNode->kernelAttr_.accessPolicyWindow.base_ptr; + kernelAttr_.accessPolicyWindow.hitProp = srcNode->kernelAttr_.accessPolicyWindow.hitProp; + kernelAttr_.accessPolicyWindow.hitRatio = srcNode->kernelAttr_.accessPolicyWindow.hitRatio; + kernelAttr_.accessPolicyWindow.missProp = srcNode->kernelAttr_.accessPolicyWindow.missProp; + kernelAttr_.accessPolicyWindow.num_bytes = + srcNode->kernelAttr_.accessPolicyWindow.num_bytes; + break; + case hipKernelNodeAttributeCooperative: + kernelAttr_.cooperative = srcNode->kernelAttr_.cooperative; + break; + default: + return hipErrorInvalidValue; + } + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphKernelNode* kernelNode = static_cast(node); + return SetParams(kernelNode->pKernelParams_); + } + + static hipError_t validateKernelParams(const hipKernelNodeParams* pNodeParams, + hipFunction_t* ptrFunc = nullptr, int devId = -1) { + devId = devId == -1 ? ihipGetDevice() : devId; + hipFunction_t func = getFunc(*pNodeParams, devId); + if (!func) { + return hipErrorInvalidDeviceFunction; + } + + size_t globalWorkSizeX = static_cast(pNodeParams->gridDim.x) * pNodeParams->blockDim.x; + size_t globalWorkSizeY = static_cast(pNodeParams->gridDim.y) * pNodeParams->blockDim.y; + size_t globalWorkSizeZ = static_cast(pNodeParams->gridDim.z) * pNodeParams->blockDim.z; + + hipError_t status = ihipLaunchKernel_validate( + func, static_cast(globalWorkSizeX), static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), pNodeParams->blockDim.x, pNodeParams->blockDim.y, + pNodeParams->blockDim.z, pNodeParams->sharedMemBytes, pNodeParams->kernelParams, + pNodeParams->extra, devId, 0); + if (status != hipSuccess) { + return status; + } + + if (ptrFunc) *ptrFunc = func; + return hipSuccess; + } +}; + +class hipGraphMemcpyNode : public hipGraphNode { + hipMemcpy3DParms* pCopyParams_; + + public: + hipGraphMemcpyNode(const hipMemcpy3DParms* pCopyParams) + : hipGraphNode(hipGraphNodeTypeMemcpy, "solid", "trapezium", "MEMCPY") { + pCopyParams_ = new hipMemcpy3DParms(*pCopyParams); + } + ~hipGraphMemcpyNode() { delete pCopyParams_; } + + hipGraphMemcpyNode(const hipGraphMemcpyNode& rhs) : hipGraphNode(rhs) { + pCopyParams_ = new hipMemcpy3DParms(*rhs.pCopyParams_); + } + + hipGraphNode* clone() const { + return new hipGraphMemcpyNode(static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + if (IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) { + return hipSuccess; + } + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + commands_.reserve(1); + amd::Command* command; + status = ihipMemcpy3DCommand(command, pCopyParams_, stream); + commands_.emplace_back(command); + return status; + } + + void EnqueueCommands(hipStream_t stream) override { + if (isEnabled_ && IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) { + ihipHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, + pCopyParams_->extent.width * pCopyParams_->extent.height * + pCopyParams_->extent.depth, *hip::getStream(stream)); + return; + } + hipGraphNode::EnqueueCommands(stream); + } + + void GetParams(hipMemcpy3DParms* params) { + std::memcpy(params, pCopyParams_, sizeof(hipMemcpy3DParms)); + } + hipError_t SetParams(const hipMemcpy3DParms* params) { + hipError_t status = ValidateParams(params); + if (status != hipSuccess) { + return status; + } + std::memcpy(pCopyParams_, params, sizeof(hipMemcpy3DParms)); + return hipSuccess; + } + hipError_t SetParams(hipGraphNode* node) { + const hipGraphMemcpyNode* memcpyNode = static_cast(node); + return SetParams(memcpyNode->pCopyParams_); + } + // ToDo: use this when commands are cloned and command params are to be updated + hipError_t ValidateParams(const hipMemcpy3DParms* pNodeParams); + + std::string GetLabel(hipGraphDebugDotFlags flag) { + size_t offset = 0; + const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pCopyParams_); + hipMemoryType srcMemoryType = pCopy.srcMemoryType; + if (srcMemoryType == hipMemoryTypeUnified) { + srcMemoryType = + getMemoryObject(pCopy.srcDevice, offset) ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + offset = 0; + hipMemoryType dstMemoryType = pCopy.dstMemoryType; + if (dstMemoryType == hipMemoryTypeUnified) { + dstMemoryType = + getMemoryObject(pCopy.dstDevice, offset) ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + + // If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned. + // In that case upgrade the copy type to hipMemoryTypeDevice to avoid extra pinning. + offset = 0; + if (srcMemoryType == hipMemoryTypeHost) { + amd::Memory* mem = getMemoryObject(pCopy.srcHost, offset); + srcMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + if (dstMemoryType == hipMemoryTypeHost) { + amd::Memory* mem = getMemoryObject(pCopy.dstHost, offset); + dstMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost; + } + std::string memcpyDirection; + if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { + // Host to Device. + memcpyDirection = "HtoD"; + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { + // Device to Host. + memcpyDirection = "DtoH"; + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { + // Device to Device. + memcpyDirection = "DtoD"; + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { + memcpyDirection = "HtoA"; + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { + // Image to Host. + memcpyDirection = "AtoH"; + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { + // Device to Image. + memcpyDirection = "DtoA"; + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { + // Image to Device. + memcpyDirection = "AtoD"; + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { + memcpyDirection = "AtoA"; + } + std::string label; + if (flag == hipGraphDebugDotFlagsMemcpyNodeParams || flag == hipGraphDebugDotFlagsVerbose) { + char buffer[500]; + sprintf( + buffer, + "{\n%s\n| {{ID | node handle} | {%u | %p}}\n| {kind | %s}\n| {{srcPtr | dstPtr} | " + "{pitch " + "| ptr | xsize | ysize | pitch | ptr | xsize | size} | {%zu | %p | %zu | %zu | %zu | %p " + "| %zu " + "| %zu}}\n| {{srcPos | {{x | %zu} | {y | %zu} | {z | %zu}}} | {dstPos | {{x | %zu} | {y " + "| " + "%zu} | {z | %zu}}} | {Extent | {{Width | %zu} | {Height | %zu} | {Depth | %zu}}}}\n}", + label_.c_str(), GetID(), this, memcpyDirection.c_str(), pCopyParams_->srcPtr.pitch, + pCopyParams_->srcPtr.ptr, pCopyParams_->srcPtr.xsize, pCopyParams_->srcPtr.ysize, + pCopyParams_->dstPtr.pitch, pCopyParams_->dstPtr.ptr, pCopyParams_->dstPtr.xsize, + pCopyParams_->dstPtr.ysize, pCopyParams_->srcPos.x, pCopyParams_->srcPos.y, + pCopyParams_->srcPos.z, pCopyParams_->dstPos.x, pCopyParams_->dstPos.y, + pCopyParams_->dstPos.z, pCopyParams_->extent.width, pCopyParams_->extent.height, + pCopyParams_->extent.depth); + label = buffer; + } else { + label = std::to_string(GetID()) + "\nMEMCPY\n(" + memcpyDirection + ")"; + } + return label; + } + std::string GetShape(hipGraphDebugDotFlags flag) { + if (flag == hipGraphDebugDotFlagsMemcpyNodeParams || flag == hipGraphDebugDotFlagsVerbose) { + return "record"; + } else { + return shape_; + } + } +}; + +class hipGraphMemcpyNode1D : public hipGraphNode { + protected: + void* dst_; + const void* src_; + size_t count_; + hipMemcpyKind kind_; + + public: + hipGraphMemcpyNode1D(void* dst, const void* src, size_t count, hipMemcpyKind kind, + hipGraphNodeType type = hipGraphNodeTypeMemcpy) + : hipGraphNode(type, "solid", "trapezium", "MEMCPY"), + dst_(dst), + src_(src), + count_(count), + kind_(kind) {} + + ~hipGraphMemcpyNode1D() {} + + hipGraphNode* clone() const { + return new hipGraphMemcpyNode1D(static_cast(*this)); + } + + virtual hipError_t CreateCommand(hip::Stream* stream) { + if (IsHtoHMemcpy(dst_, src_, kind_)) { + return hipSuccess; + } + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + commands_.reserve(1); + amd::Command* command = nullptr; + status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *stream); + commands_.emplace_back(command); + return status; + } + + void EnqueueCommands(hipStream_t stream) { + bool isH2H = IsHtoHMemcpy(dst_, src_, kind_); + if (!isH2H) { + if (commands_.empty()) return; + // commands_ should have just 1 item + assert(commands_.size() == 1 && "Invalid command size in hipGraphMemcpyNode1D"); + } + if (isEnabled_) { + //HtoH + if (isH2H) { + ihipHtoHMemcpy(dst_, src_, count_, *hip::getStream(stream)); + return; + } + amd::Command* command = commands_[0]; + amd::HostQueue* cmdQueue = command->queue(); + hip::Stream* hip_stream = hip::getStream(stream); + + if (cmdQueue == hip_stream) { + command->enqueue(); + command->release(); + return; + } + + amd::Command::EventWaitList waitList; + amd::Command* depdentMarker = nullptr; + amd::Command* cmd = hip_stream->getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + amd::Command* depdentMarker = new amd::Marker(*cmdQueue, true, waitList); + if (depdentMarker != nullptr) { + depdentMarker->enqueue(); // Make sure command synced with last command of queue + depdentMarker->release(); + } + cmd->release(); + } + command->enqueue(); + command->release(); + + cmd = cmdQueue->getLastQueuedCommand(true); // should be command + if (cmd != nullptr) { + waitList.clear(); + waitList.push_back(cmd); + amd::Command* depdentMarker = new amd::Marker(*hip_stream, true, waitList); + if (depdentMarker != nullptr) { + depdentMarker->enqueue(); // Make sure future commands of queue synced with command + depdentMarker->release(); + } + cmd->release(); + } + } else { + amd::Command::EventWaitList waitList; + hip::Stream* hip_stream = hip::getStream(stream); + amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList); + command->enqueue(); + command->release(); + } + } + + hipError_t SetParams(void* dst, const void* src, size_t count, hipMemcpyKind kind) { + hipError_t status = ValidateParams(dst, src, count, kind); + if (status != hipSuccess) { + return status; + } + dst_ = dst; + src_ = src; + count_ = count; + kind_ = kind; + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphMemcpyNode1D* memcpy1DNode = static_cast(node); + return SetParams(memcpy1DNode->dst_, memcpy1DNode->src_, memcpy1DNode->count_, + memcpy1DNode->kind_); + } + static hipError_t ValidateParams(void* dst, const void* src, size_t count, hipMemcpyKind kind); + std::string GetLabel(hipGraphDebugDotFlags flag) { + size_t sOffsetOrig = 0; + amd::Memory* origSrcMemory = getMemoryObject(src_, sOffsetOrig); + size_t dOffsetOrig = 0; + amd::Memory* origDstMemory = getMemoryObject(dst_, dOffsetOrig); + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src_, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst_, dOffset); + std::string memcpyDirection; + if ((srcMemory == nullptr) && (dstMemory != nullptr)) { // host to device + memcpyDirection = "HtoD"; + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { // device to host + memcpyDirection = "DtoH"; + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + memcpyDirection = "DtoD"; + } else { + if (kind_ == hipMemcpyHostToDevice) { + memcpyDirection = "HtoD"; + } else if (kind_ == hipMemcpyDeviceToHost) { + memcpyDirection = "DtoH"; + } + } + std::string label; + if (flag == hipGraphDebugDotFlagsMemcpyNodeParams || flag == hipGraphDebugDotFlagsVerbose) { + char buffer[500]; + sprintf(buffer, + "{\n%s\n| {{ID | node handle} | {%u | %p}}\n| {kind | %s}\n| {{srcPtr | dstPtr} | " + "{pitch " + "| ptr | xsize | ysize | pitch | ptr | xsize | size} | {%zu | %p | %zu | %zu | %zu | %p " + "| %zu " + "| %zu}}\n| {{srcPos | {{x | %zu} | {y | %zu} | {z | %zu}}} | {dstPos | {{x | %zu} | {y " + "| " + "%zu} | {z | %zu}}} | {Extent | {{Width | %zu} | {Height | %zu} | {Depth | %zu}}}}\n}", + label_.c_str(), GetID(), this, memcpyDirection.c_str(), (size_t)0, + src_, (size_t)0, (size_t)0, (size_t)0, dst_, (size_t)0, (size_t)0, (size_t)0, (size_t)0, (size_t)0, (size_t)0, (size_t)0, (size_t)0, count_, (size_t)1, (size_t)1); + label = buffer; + } else { + label = std::to_string(GetID()) + "\n" + label_ + "\n(" + memcpyDirection + "," + + std::to_string(count_) + ")"; + } + return label; + } + std::string GetShape(hipGraphDebugDotFlags flag) { + if (flag == hipGraphDebugDotFlagsMemcpyNodeParams || flag == hipGraphDebugDotFlagsVerbose) { + return "record"; + } else { + return shape_; + } + } +}; + +class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D { + const void* symbol_; + size_t offset_; + + public: + hipGraphMemcpyNodeFromSymbol(void* dst, const void* symbol, size_t count, size_t offset, + hipMemcpyKind kind) + : hipGraphMemcpyNode1D(dst, nullptr, count, kind, hipGraphNodeTypeMemcpy), + symbol_(symbol), + offset_(offset) {} + + ~hipGraphMemcpyNodeFromSymbol() {} + + hipGraphNode* clone() const { + return new hipGraphMemcpyNodeFromSymbol( + static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + commands_.reserve(1); + amd::Command* command = nullptr; + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + status = ihipMemcpySymbol_validate(symbol_, count_, offset_, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + status = ihipMemcpyCommand(command, dst_, device_ptr, count_, kind_, *stream); + if (status != hipSuccess) { + return status; + } + commands_.emplace_back(command); + return status; + } + + hipError_t SetParams(void* dst, const void* symbol, size_t count, size_t offset, + hipMemcpyKind kind) { + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + // check to see if dst is also a symbol (hip negative test case) + hipError_t status = ihipMemcpySymbol_validate(dst, count, offset, sym_size, device_ptr); + if (status == hipSuccess) { + return hipErrorInvalidValue; + } + status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + if (dstMemory == nullptr && kind != hipMemcpyHostToDevice) { + return hipErrorInvalidMemcpyDirection; + } else if (dstMemory != nullptr && kind != hipMemcpyDeviceToDevice) { + return hipErrorInvalidMemcpyDirection; + } else if (kind == hipMemcpyHostToHost || kind == hipMemcpyDeviceToHost) { + return hipErrorInvalidMemcpyDirection; + } + + dst_ = dst; + symbol_ = symbol; + count_ = count; + offset_ = offset; + kind_ = kind; + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphMemcpyNodeFromSymbol* memcpyNode = + static_cast(node); + return SetParams(memcpyNode->dst_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_, + memcpyNode->kind_); + } +}; +class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D { + const void* symbol_; + size_t offset_; + + public: + hipGraphMemcpyNodeToSymbol(const void* symbol, const void* src, size_t count, size_t offset, + hipMemcpyKind kind) + : hipGraphMemcpyNode1D(nullptr, src, count, kind, hipGraphNodeTypeMemcpy), + symbol_(symbol), + offset_(offset) {} + + ~hipGraphMemcpyNodeToSymbol() {} + + hipGraphNode* clone() const { + return new hipGraphMemcpyNodeToSymbol(static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + commands_.reserve(1); + amd::Command* command = nullptr; + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + status = ihipMemcpySymbol_validate(symbol_, count_, offset_, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + status = ihipMemcpyCommand(command, device_ptr, src_, count_, kind_, *stream); + if (status != hipSuccess) { + return status; + } + commands_.emplace_back(command); + return status; + } + + hipError_t SetParams(const void* symbol, const void* src, size_t count, size_t offset, + hipMemcpyKind kind) { + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + // check to see if src is also a symbol (hip negative test case) + hipError_t status = ihipMemcpySymbol_validate(src, count, offset, sym_size, device_ptr); + if (status == hipSuccess) { + return hipErrorInvalidValue; + } + status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + size_t dOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, dOffset); + if (srcMemory == nullptr && kind != hipMemcpyHostToDevice) { + return hipErrorInvalidValue; + } else if (srcMemory != nullptr && kind != hipMemcpyDeviceToDevice) { + return hipErrorInvalidValue; + } else if (kind == hipMemcpyHostToHost || kind == hipMemcpyDeviceToHost) { + return hipErrorInvalidValue; + } + symbol_ = symbol; + src_ = src; + count_ = count; + offset_ = offset; + kind_ = kind; + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphMemcpyNodeToSymbol* memcpyNode = + static_cast(node); + return SetParams(memcpyNode->src_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_, + memcpyNode->kind_); + } +}; + +class hipGraphMemsetNode : public hipGraphNode { + hipMemsetParams* pMemsetParams_; + + public: + hipGraphMemsetNode(const hipMemsetParams* pMemsetParams) + : hipGraphNode(hipGraphNodeTypeMemset, "solid", "invtrapezium", "MEMSET") { + pMemsetParams_ = new hipMemsetParams(*pMemsetParams); + size_t sizeBytes = 0; + if (pMemsetParams_->height == 1) { + sizeBytes = pMemsetParams_->width * pMemsetParams_->elementSize; + } else { + sizeBytes = pMemsetParams_->width * pMemsetParams_->height * pMemsetParams_->elementSize; + } + } + + ~hipGraphMemsetNode() { delete pMemsetParams_; } + // Copy constructor + hipGraphMemsetNode(const hipGraphMemsetNode& memsetNode) : hipGraphNode(memsetNode) { + pMemsetParams_ = new hipMemsetParams(*memsetNode.pMemsetParams_); + } + + hipGraphNode* clone() const { + return new hipGraphMemsetNode(static_cast(*this)); + } + + std::string GetLabel(hipGraphDebugDotFlags flag) { + std::string label; + if (flag == hipGraphDebugDotFlagsMemsetNodeParams || flag == hipGraphDebugDotFlagsVerbose) { + char buffer[500]; + sprintf(buffer, + "{\n%s\n| {{ID | node handle | dptr | pitch | value | elementSize | width | " + "height} | {%u | %p | %p | %zu | %u | %u | %zu | %zu}}}", + label_.c_str(), GetID(), this, pMemsetParams_->dst, pMemsetParams_->pitch, + pMemsetParams_->value, pMemsetParams_->elementSize, pMemsetParams_->width, + pMemsetParams_->height); + label = buffer; + } else { + size_t sizeBytes; + if (pMemsetParams_->height == 1) { + sizeBytes = pMemsetParams_->width * pMemsetParams_->elementSize; + } else { + sizeBytes = pMemsetParams_->width * pMemsetParams_->height * pMemsetParams_->elementSize; + } + label = std::to_string(GetID()) + "\n" + label_ + "\n(" + + std::to_string(pMemsetParams_->value) + "," + std::to_string(sizeBytes) + ")"; + } + return label; + } + + std::string GetShape(hipGraphDebugDotFlags flag) { + if (flag == hipGraphDebugDotFlagsMemsetNodeParams || flag == hipGraphDebugDotFlagsVerbose) { + return "record"; + } else { + return shape_; + } + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + if (pMemsetParams_->height == 1) { + size_t sizeBytes = pMemsetParams_->width * pMemsetParams_->elementSize; + hipError_t status = ihipMemsetCommand(commands_, pMemsetParams_->dst, pMemsetParams_->value, + pMemsetParams_->elementSize, sizeBytes, stream); + } else { + hipError_t status = ihipMemset3DCommand( + commands_, + {pMemsetParams_->dst, pMemsetParams_->pitch, pMemsetParams_->width * pMemsetParams_->elementSize, + pMemsetParams_->height}, + pMemsetParams_->value, {pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height, 1}, stream, pMemsetParams_->elementSize); + } + return status; + } + + void GetParams(hipMemsetParams* params) { + std::memcpy(params, pMemsetParams_, sizeof(hipMemsetParams)); + } + + hipError_t SetParams(const hipMemsetParams* params) { + hipError_t hip_error = hipSuccess; + hipMemsetParams origParams = {}; + GetParams(&origParams); + hip_error = ihipGraphMemsetParams_validate(params); + if (hip_error != hipSuccess) { + return hip_error; + } + size_t sizeBytes; + if (params->height == 1) { + sizeBytes = params->width * params->elementSize; + if (sizeBytes != origParams.width * origParams.elementSize) { + return hipErrorInvalidValue; + } + hip_error = ihipMemset_validate(params->dst, params->value, params->elementSize, sizeBytes); + } else { + sizeBytes = params->width * params->height * 1; + if (sizeBytes != origParams.width * origParams.height * 1) { + return hipErrorInvalidValue; + } + hip_error = + ihipMemset3D_validate({params->dst, params->pitch, params->width * params->elementSize, params->height}, + params->value, {params->width * params->elementSize, params->height, 1}, sizeBytes); + } + if (hip_error != hipSuccess) { + return hip_error; + } + std::memcpy(pMemsetParams_, params, sizeof(hipMemsetParams)); + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphMemsetNode* memsetNode = static_cast(node); + return SetParams(memsetNode->pMemsetParams_); + } +}; + +class hipGraphEventRecordNode : public hipGraphNode { + hipEvent_t event_; + + public: + hipGraphEventRecordNode(hipEvent_t event) + : hipGraphNode(hipGraphNodeTypeEventRecord, "solid", "rectangle", "EVENT_RECORD"), + event_(event) {} + ~hipGraphEventRecordNode() {} + + hipGraphNode* clone() const { + return new hipGraphEventRecordNode(static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + hip::Event* e = reinterpret_cast(event_); + commands_.reserve(1); + amd::Command* command = nullptr; + status = e->recordCommand(command, stream); + commands_.emplace_back(command); + return status; + } + + void EnqueueCommands(hipStream_t stream) { + if (!commands_.empty()) { + hip::Event* e = reinterpret_cast(event_); + // command release during enqueueRecordCommand + hipError_t status = e->enqueueRecordCommand(stream, commands_[0], true); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, + "[hipGraph] enqueue event record command failed for node %p - status %d\n", this, + status); + } + } + } + + void GetParams(hipEvent_t* event) const { *event = event_; } + + hipError_t SetParams(hipEvent_t event) { + event_ = event; + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphEventRecordNode* eventRecordNode = + static_cast(node); + return SetParams(eventRecordNode->event_); + } +}; + +class hipGraphEventWaitNode : public hipGraphNode { + hipEvent_t event_; + + public: + hipGraphEventWaitNode(hipEvent_t event) + : hipGraphNode(hipGraphNodeTypeWaitEvent, "solid", "rectangle", "EVENT_WAIT"), + event_(event) {} + ~hipGraphEventWaitNode() {} + + hipGraphNode* clone() const { + return new hipGraphEventWaitNode(static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + hip::Event* e = reinterpret_cast(event_); + commands_.reserve(1); + amd::Command* command; + status = e->streamWaitCommand(command, stream); + commands_.emplace_back(command); + return status; + } + + void EnqueueCommands(hipStream_t stream) { + if (!commands_.empty()) { + hip::Event* e = reinterpret_cast(event_); + hipError_t status = e->enqueueStreamWaitCommand(stream, commands_[0]); + if (status != hipSuccess) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, + "[hipGraph] enqueue stream wait command failed for node %p - status %d\n", this, + status); + } + commands_[0]->release(); + } + } + + void GetParams(hipEvent_t* event) const { *event = event_; } + + hipError_t SetParams(hipEvent_t event) { + event_ = event; + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphEventWaitNode* eventWaitNode = static_cast(node); + return SetParams(eventWaitNode->event_); + } +}; + +class hipGraphHostNode : public hipGraphNode { + hipHostNodeParams* pNodeParams_; + + public: + hipGraphHostNode(const hipHostNodeParams* pNodeParams) + : hipGraphNode(hipGraphNodeTypeHost, "solid", "rectangle", "HOST") { + pNodeParams_ = new hipHostNodeParams(*pNodeParams); + } + ~hipGraphHostNode() { delete pNodeParams_; } + + hipGraphHostNode(const hipGraphHostNode& hostNode) : hipGraphNode(hostNode) { + pNodeParams_ = new hipHostNodeParams(*hostNode.pNodeParams_); + } + + hipGraphNode* clone() const { + return new hipGraphHostNode(static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + amd::Command::EventWaitList waitList; + commands_.reserve(1); + amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList); + commands_.emplace_back(command); + return hipSuccess; + } + + static void Callback(cl_event event, cl_int command_exec_status, void* user_data) { + hipHostNodeParams* pNodeParams = reinterpret_cast(user_data); + pNodeParams->fn(pNodeParams->userData); + } + + void EnqueueCommands(hipStream_t stream) { + if (!commands_.empty()) { + if (!commands_[0]->setCallback(CL_COMPLETE, hipGraphHostNode::Callback, pNodeParams_)) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed during setCallback"); + } + commands_[0]->enqueue(); + // Add the new barrier to stall the stream, until the callback is done + amd::Command::EventWaitList eventWaitList; + eventWaitList.push_back(commands_[0]); + amd::Command* block_command = + new amd::Marker(*commands_[0]->queue(), !kMarkerDisableFlush, eventWaitList); + if (block_command == nullptr) { + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed during block command creation"); + } + block_command->enqueue(); + block_command->release(); + commands_[0]->release(); + } + } + + void GetParams(hipHostNodeParams* params) { + std::memcpy(params, pNodeParams_, sizeof(hipHostNodeParams)); + } + hipError_t SetParams(const hipHostNodeParams* params) { + std::memcpy(pNodeParams_, params, sizeof(hipHostNodeParams)); + return hipSuccess; + } + + hipError_t SetParams(hipGraphNode* node) { + const hipGraphHostNode* hostNode = static_cast(node); + return SetParams(hostNode->pNodeParams_); + } +}; + +class hipGraphEmptyNode : public hipGraphNode { + public: + hipGraphEmptyNode() : hipGraphNode(hipGraphNodeTypeEmpty, "solid", "rectangle", "EMPTY") {} + ~hipGraphEmptyNode() {} + + hipGraphNode* clone() const { + return new hipGraphEmptyNode(static_cast(*this)); + } + + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); + if (status != hipSuccess) { + return status; + } + amd::Command::EventWaitList waitList; + commands_.reserve(1); + amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList); + commands_.emplace_back(command); + return hipSuccess; + } +}; + +class hipGraphMemAllocNode : public hipGraphNode { + hipMemAllocNodeParams node_params_; // Node parameters for memory allocation + + public: + hipGraphMemAllocNode(const hipMemAllocNodeParams* node_params) + : hipGraphNode(hipGraphNodeTypeMemAlloc, "solid", "rectangle", "MEM_ALLOC") { + node_params_ = *node_params; + } + ~hipGraphMemAllocNode() {} + + hipGraphNode* clone() const { + return new hipGraphMemAllocNode(static_cast(*this)); + } + + virtual hipError_t CreateCommand(hip::Stream* stream) { + auto error = hipGraphNode::CreateCommand(stream); + auto ptr = Execute(stream_); + return error; + } + + void* Execute(hip::Stream* stream = nullptr) { + auto graph = GetParentGraph(); + if (graph != nullptr) { + // The node creation requires to return a valid address, however FreeNode can't + // free memory on creation because it doesn't have any execution point yet. Thus + // the code below makes sure memory won't be recreated on the first execution of the graph + if ((node_params_.dptr == nullptr) || !graph->ProbeMemory(node_params_.dptr)) { + auto dptr = graph->AllocateMemory(node_params_.bytesize, stream, node_params_.dptr); + if ((node_params_.dptr != nullptr) && (node_params_.dptr != dptr)) { + LogPrintfError("Ptr mismatch in graph mem alloc %p != %p", node_params_.dptr, dptr); + } + node_params_.dptr = dptr; + } + } + return node_params_.dptr; + } + + bool IsActiveMem() { + auto graph = GetParentGraph(); + return graph->ProbeMemory(node_params_.dptr); + } + + + void GetParams(hipMemAllocNodeParams* params) const { + std::memcpy(params, &node_params_, sizeof(hipMemAllocNodeParams)); + } +}; + +class hipGraphMemFreeNode : public hipGraphNode { + void* device_ptr_; // Device pointer of the freed memory + + public: + hipGraphMemFreeNode(void* dptr) + : hipGraphNode(hipGraphNodeTypeMemFree, "solid", "rectangle", "MEM_FREE") + , device_ptr_(dptr) {} + ~hipGraphMemFreeNode() {} + + hipGraphNode* clone() const { + return new hipGraphMemFreeNode(static_cast(*this)); + } + + virtual hipError_t CreateCommand(hip::Stream* stream) { + auto error = hipGraphNode::CreateCommand(stream); + Execute(stream_); + return error; + } + + void Execute(hip::Stream* stream) { + auto graph = GetParentGraph(); + if (graph != nullptr) { + graph->FreeMemory(device_ptr_, stream); + } + } + + void GetParams(void** params) const { + *params = device_ptr_; + } +}; diff --git a/projects/clr/hipamd/src/hip_hcc.def.in b/projects/clr/hipamd/src/hip_hcc.def.in new file mode 100644 index 0000000000..fe2193594c --- /dev/null +++ b/projects/clr/hipamd/src/hip_hcc.def.in @@ -0,0 +1,461 @@ +EXPORTS +hipChooseDevice +hipCtxCreate +hipCtxDestroy +hipCtxDisablePeerAccess +hipCtxEnablePeerAccess +hipCtxGetApiVersion +hipCtxGetCacheConfig +hipCtxGetCurrent +hipCtxGetDevice +hipCtxGetFlags +hipCtxGetSharedMemConfig +hipCtxPopCurrent +hipCtxPushCurrent +hipCtxSetCacheConfig +hipCtxSetCurrent +hipCtxSetSharedMemConfig +hipCtxSynchronize +hipDeviceCanAccessPeer +hipDeviceComputeCapability +hipDeviceDisablePeerAccess +hipDeviceEnablePeerAccess +hipDeviceGet +hipDeviceGetAttribute +hipDeviceGetByPCIBusId +hipDeviceGetCacheConfig +hipDeviceGetStreamPriorityRange +hipDeviceGetLimit +hipDeviceGetName +hipDeviceGetUuid +hipDeviceGetPCIBusId +hipDeviceGetSharedMemConfig +hipDeviceGetP2PAttribute +hipDevicePrimaryCtxGetState +hipDevicePrimaryCtxRelease +hipDevicePrimaryCtxReset +hipDevicePrimaryCtxRetain +hipDevicePrimaryCtxSetFlags +hipDeviceReset +hipDeviceSetCacheConfig +hipDeviceSetSharedMemConfig +hipDeviceSynchronize +hipDeviceTotalMem +hipDriverGetVersion +hipEventCreate +hipEventCreateWithFlags +hipEventDestroy +hipEventElapsedTime +hipEventQuery +hipEventRecord +hipEventSynchronize +hipExtGetLinkTypeAndHopCount +hipExtLaunchMultiKernelMultiDevice +hipExtMallocWithFlags +hipExtModuleLaunchKernel +hipExtLaunchKernel +hipFree +hipFreeArray +hipFuncSetAttribute +hipFuncSetCacheConfig +hipFuncSetSharedMemConfig +hipGetDevice +hipGetDeviceCount +hipGetDeviceProperties +hipGetErrorName +hipGetErrorString +hipGetLastError +hipMemAllocHost +hipHostAlloc +hipHostFree +hipHostGetDevicePointer +hipHostGetFlags +hipHostMalloc +hipHostRegister +hipHostUnregister +hipInit +hipIpcCloseMemHandle +hipIpcGetMemHandle +hipIpcOpenMemHandle +hipIpcGetEventHandle +hipIpcOpenEventHandle +hipMalloc +hipMalloc3D +hipMalloc3DArray +hipMallocManaged +hipDeviceGetDefaultMemPool +hipDeviceSetMemPool +hipDeviceGetMemPool +hipMallocAsync +hipFreeAsync +hipMemPoolTrimTo +hipMemPoolSetAttribute +hipMemPoolGetAttribute +hipMemPoolSetAccess +hipMemPoolGetAccess +hipMemPoolCreate +hipMemPoolDestroy +hipMallocFromPoolAsync +hipMemPoolExportToShareableHandle +hipMemPoolImportFromShareableHandle +hipMemPoolExportPointer +hipMemPoolImportPointer +hipArrayCreate +hipArray3DCreate +hipArrayDestroy +hipArrayGetInfo +hipArrayGetDescriptor +hipArray3DGetDescriptor +hipMallocArray +hipMemAdvise +hipMemAllocPitch +hipMallocPitch +hipMemcpy +hipMemcpyWithStream +hipMemcpyParam2D +hipMemcpy2D +hipMemcpy2DAsync +hipMemcpy2DToArray +hipMemcpy2DToArrayAsync +hipMemcpy3D +hipMemcpy3DAsync +hipDrvMemcpy3D +hipDrvMemcpy3DAsync +hipMemcpyAsync +hipMemcpyDtoD +hipMemcpyDtoDAsync +hipMemcpyDtoH +hipMemcpyDtoHAsync +hipMemcpyFromSymbol +hipMemcpyFromSymbolAsync +hipMemcpyHtoD +hipMemcpyHtoDAsync +hipMemcpyPeer +hipMemcpyPeerAsync +hipMemcpyToArray +hipMemcpyFromArray +hipMemcpyToSymbol +hipMemcpyToSymbolAsync +hipMemGetAddressRange +hipGetSymbolAddress +hipGetSymbolSize +hipMemGetInfo +hipMemPrefetchAsync +hipMemPtrGetInfo +hipMemRangeGetAttribute +hipMemRangeGetAttributes +hipMemset +hipMemsetAsync +hipMemsetD8 +hipMemsetD8Async +hipMemsetD16 +hipMemsetD16Async +hipMemsetD32 +hipMemsetD32Async +hipMemset2D +hipMemset2DAsync +hipMemset3D +hipMemset3DAsync +hipModuleGetFunction +hipModuleGetGlobal +hipModuleGetTexRef +hipModuleLaunchKernel +hipModuleLaunchKernelExt +hipModuleLaunchCooperativeKernel +hipModuleLaunchCooperativeKernelMultiDevice +hipLaunchCooperativeKernel +hipLaunchCooperativeKernelMultiDevice +hipHccModuleLaunchKernel +hipModuleLoad +hipModuleLoadData +hipModuleLoadDataEx +hipModuleUnload +hipModuleOccupancyMaxPotentialBlockSize +hipModuleOccupancyMaxPotentialBlockSizeWithFlags +hipModuleOccupancyMaxActiveBlocksPerMultiprocessor +hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +hipOccupancyMaxPotentialBlockSize +hipOccupancyMaxActiveBlocksPerMultiprocessor +hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +hipFuncGetAttribute +hipFuncGetAttributes +hipPeekAtLastError +hipPointerSetAttribute +hipPointerGetAttributes +hipProfilerStart +hipProfilerStop +hipRuntimeGetVersion +hipGetDeviceFlags +hipSetDevice +hipSetDeviceFlags +hipStreamAddCallback +hipStreamAttachMemAsync +hipStreamCreate +hipStreamCreateWithFlags +hipStreamCreateWithPriority +hipStreamDestroy +hipStreamGetDevice +hipStreamGetFlags +hipStreamQuery +hipStreamSynchronize +hipStreamWaitEvent +__hipPopCallConfiguration +__hipPushCallConfiguration +__hipRegisterFatBinary +__hipRegisterFunction +__hipRegisterVar +__hipRegisterSurface +__hipRegisterTexture +__hipRegisterManagedVar +__hipUnregisterFatBinary +hipConfigureCall +hipSetupArgument +hipLaunchByPtr +hipLaunchKernel +hipRegisterTracerCallback +hipApiName +hipKernelNameRef +hipBindTexture +hipBindTexture2D +hipBindTextureToArray +hipBindTextureToMipmappedArray +hipGetTextureAlignmentOffset +hipGetTextureReference +hipUnbindTexture +hipCreateChannelDesc +hipCreateTextureObject +hipDestroyTextureObject +hipGetChannelDesc +hipGetTextureObjectResourceDesc +hipGetTextureObjectResourceViewDesc +hipGetTextureObjectTextureDesc +hipTexRefGetAddress +hipTexRefGetAddressMode +hipTexRefGetArray +hipTexRefGetBorderColor +hipTexRefGetFilterMode +hipTexRefGetFlags +hipTexRefGetFormat +hipTexRefGetMaxAnisotropy +hipTexRefGetMipmapFilterMode +hipTexRefGetMipmapLevelBias +hipTexRefGetMipmapLevelClamp +hipTexRefGetMipmappedArray +hipTexRefSetAddress +hipTexRefSetAddress2D +hipTexRefSetAddressMode +hipTexRefSetArray +hipTexRefSetBorderColor +hipTexRefSetFilterMode +hipTexRefSetFlags +hipTexRefSetFormat +hipTexRefSetMaxAnisotropy +hipTexRefSetMipmapFilterMode +hipTexRefSetMipmapLevelBias +hipTexRefSetMipmapLevelClamp +hipTexRefSetMipmappedArray +hipProfilerStart +hipProfilerStop +hipCreateSurfaceObject +hipDestroySurfaceObject +hipGetCmdName +hiprtcAddNameExpression +hiprtcCompileProgram +hiprtcCreateProgram +hiprtcDestroyProgram +hiprtcGetLoweredName +hiprtcGetProgramLog +hiprtcGetProgramLogSize +hiprtcGetCode +hiprtcGetCodeSize +hiprtcGetErrorString +hiprtcGetBitcode +hiprtcGetBitcodeSize +hiprtcLinkCreate +hiprtcLinkAddFile +hiprtcLinkAddData +hiprtcLinkComplete +hiprtcLinkDestroy +hipMipmappedArrayCreate +hipMallocMipmappedArray +hipMipmappedArrayDestroy +hipFreeMipmappedArray +hipMipmappedArrayGetLevel +hipGetMipmappedArrayLevel +hipMallocHost +hipFreeHost +hipTexObjectCreate +hipTexObjectDestroy +hipTexObjectGetResourceDesc +hipTexObjectGetResourceViewDesc +hipTexObjectGetTextureDesc +hipExtStreamCreateWithCUMask +hipStreamGetPriority +hipMemcpy2DFromArray +hipMemcpy2DFromArrayAsync +hipDrvMemcpy2DUnaligned +hipMemcpyAtoH +hipMemcpyHtoA +hipMemcpyParam2DAsync +__gnu_h2f_ieee +__gnu_f2h_ieee +hipExtStreamGetCUMask +hipImportExternalMemory +hipExternalMemoryGetMappedBuffer +hipDestroyExternalMemory +hipGraphCreate +hipGraphDestroy +hipGraphAddKernelNode +hipGraphAddMemsetNode +hipGraphAddMemcpyNode +hipGraphAddMemcpyNode1D +hipGraphInstantiate +hipGraphLaunch +hipStreamIsCapturing +hipStreamBeginCapture +hipStreamEndCapture +hipGraphExecDestroy +hipPointerGetAttribute +hipDrvPointerGetAttributes +hipImportExternalSemaphore +hipSignalExternalSemaphoresAsync +hipWaitExternalSemaphoresAsync +hipDestroyExternalSemaphore +hipGLGetDevices +hipGraphicsGLRegisterBuffer +hipGraphicsMapResources +hipGraphicsResourceGetMappedPointer +hipGraphicsUnmapResources +hipGraphicsUnregisterResource +hipGraphGetNodes +hipGraphGetRootNodes +hipGraphKernelNodeGetParams +hipGraphKernelNodeSetParams +hipGraphMemcpyNodeGetParams +hipGraphKernelNodeSetAttribute +hipGraphKernelNodeGetAttribute +hipGraphMemcpyNodeSetParams +hipGraphMemsetNodeGetParams +hipGraphMemsetNodeSetParams +hipGraphAddDependencies +hipGraphExecKernelNodeSetParams +hipGraphAddEmptyNode +hipStreamUpdateCaptureDependencies +hipGraphRemoveDependencies +hipGraphGetEdges +hipGraphNodeGetDependencies +hipGraphNodeGetDependentNodes +hipGraphNodeGetType +hipGraphDestroyNode +hipGraphClone +hipGraphNodeFindInClone +hipGraphAddChildGraphNode +hipGraphChildGraphNodeGetGraph +hipGraphExecChildGraphNodeSetParams +hipGraphAddMemcpyNodeFromSymbol +hipGraphMemcpyNodeSetParamsFromSymbol +hipGraphExecMemcpyNodeSetParamsFromSymbol +hipGraphAddMemcpyNodeToSymbol +hipGraphMemcpyNodeSetParamsToSymbol +hipGraphExecMemcpyNodeSetParamsToSymbol +hipGraphExecMemcpyNodeSetParams +hipGraphMemcpyNodeSetParams1D +hipGraphExecMemcpyNodeSetParams1D +hipGraphAddEventRecordNode +hipGraphEventRecordNodeGetEvent +hipGraphEventRecordNodeSetEvent +hipGraphExecEventRecordNodeSetEvent +hipGraphAddEventWaitNode +hipGraphEventWaitNodeGetEvent +hipGraphEventWaitNodeSetEvent +hipGraphExecEventWaitNodeSetEvent +hipGraphAddHostNode +hipGraphHostNodeGetParams +hipGraphHostNodeSetParams +hipGraphExecHostNodeSetParams +hipGraphExecUpdate +hipGraphInstantiateWithFlags +hipGraphExecMemsetNodeSetParams +hipDeviceGetGraphMemAttribute +hipDeviceSetGraphMemAttribute +hipDeviceGraphMemTrim +amd_dbgapi_get_build_name +amd_dbgapi_get_git_hash +amd_dbgapi_get_build_id +hipStreamGetCaptureInfo +hipStreamGetCaptureInfo_v2 +hipThreadExchangeStreamCaptureMode +hipMemAddressFree +hipMemAddressReserve +hipMemCreate +hipMemExportToShareableHandle +hipMemGetAccess +hipMemGetAllocationGranularity +hipMemGetAllocationPropertiesFromHandle +hipMemImportFromShareableHandle +hipMemMap +hipMemMapArrayAsync +hipMemRelease +hipMemRetainAllocationHandle +hipMemSetAccess +hipMemUnmap +hipMemcpy_spt +hipMemcpyAsync_spt +hipStreamSynchronize_spt +hipMemcpyToSymbol_spt +hipMemcpyFromSymbol_spt +hipMemcpy2D_spt +hipMemcpy2DToArray_spt +hipMemcpy2DFromArray_spt +hipMemcpy3D_spt +hipMemset_spt +hipMemset2D_spt +hipMemset3D_spt +hipStreamQuery_spt +hipStreamGetFlags_spt +hipStreamGetPriority_spt +hipStreamWaitEvent_spt +hipEventRecord_spt +hipLaunchKernel_spt +hipLaunchCooperativeKernel_spt +hipStreamWriteValue32 +hipStreamWriteValue64 +hipStreamWaitValue32 +hipStreamWaitValue64 +hipDeviceSetLimit +hipGetStreamDeviceId +hipGraphLaunch_spt +hipStreamBeginCapture_spt +hipStreamEndCapture_spt +hipStreamIsCapturing_spt +hipStreamGetCaptureInfo_spt +hipStreamGetCaptureInfo_v2_spt +hipStreamAddCallback_spt +hipMemsetAsync_spt +hipMemset2DAsync_spt +hipMemset3DAsync_spt +hipMemcpy3DAsync_spt +hipMemcpy2DAsync_spt +hipMemcpyFromSymbolAsync_spt +hipMemcpyToSymbolAsync_spt +hipMemcpyFromArray_spt +hipMemcpy2DToArray_spt +hipMemcpy2DFromArrayAsync_spt +hipDrvGetErrorName +hipDrvGetErrorString +hipUserObjectCreate +hipUserObjectRelease +hipUserObjectRetain +hipGraphRetainUserObject +hipGraphReleaseUserObject +hipLaunchHostFunc +hipLaunchHostFunc_spt +hipGraphDebugDotPrint +hipGraphKernelNodeCopyAttributes +hipGraphNodeGetEnabled +hipGraphNodeSetEnabled +hipGraphUpload +hipGraphAddMemAllocNode +hipGraphMemAllocNodeGetParams +hipGraphAddMemFreeNode +hipGraphMemFreeNodeGetParams diff --git a/projects/clr/hipamd/src/hip_hcc.map.in b/projects/clr/hipamd/src/hip_hcc.map.in new file mode 100644 index 0000000000..204b139f2c --- /dev/null +++ b/projects/clr/hipamd/src/hip_hcc.map.in @@ -0,0 +1,529 @@ +hip_4.2 { +global: + hipChooseDevice; + hipCtxCreate; + hipCtxDestroy; + hipCtxDisablePeerAccess; + hipCtxEnablePeerAccess; + hipCtxGetApiVersion; + hipCtxGetCacheConfig; + hipCtxGetCurrent; + hipCtxGetDevice; + hipCtxGetFlags; + hipCtxGetSharedMemConfig; + hipCtxPopCurrent; + hipCtxPushCurrent; + hipCtxSetCacheConfig; + hipCtxSetCurrent; + hipCtxSetSharedMemConfig; + hipCtxSynchronize; + hipDeviceCanAccessPeer; + hipDeviceComputeCapability; + hipDeviceDisablePeerAccess; + hipDeviceEnablePeerAccess; + hipDeviceGet; + hipDeviceGetAttribute; + hipDeviceGetByPCIBusId; + hipDeviceGetCacheConfig; + hipDeviceGetStreamPriorityRange; + hipDeviceGetLimit; + hipDeviceGetName; + hipDeviceGetPCIBusId; + hipDeviceGetSharedMemConfig; + hipDeviceGetP2PAttribute; + hipDevicePrimaryCtxGetState; + hipDevicePrimaryCtxRelease; + hipDevicePrimaryCtxReset; + hipDevicePrimaryCtxRetain; + hipDevicePrimaryCtxSetFlags; + hipDeviceReset; + hipDeviceSetCacheConfig; + hipDeviceSetSharedMemConfig; + hipDeviceSynchronize; + hipDeviceTotalMem; + hipDriverGetVersion; + hipEventCreate; + hipEventCreateWithFlags; + hipEventDestroy; + hipEventElapsedTime; + hipEventQuery; + hipEventRecord; + hipEventSynchronize; + hipExtGetLinkTypeAndHopCount; + hipExtLaunchMultiKernelMultiDevice; + hipExtMallocWithFlags; + hipExtModuleLaunchKernel; + hipExtLaunchKernel; + hipFree; + hipFreeArray; + hipFuncSetAttribute; + hipFuncSetCacheConfig; + hipFuncSetSharedMemConfig; + hipGetDevice; + hipGetDeviceCount; + hipGetDeviceProperties; + hipGetErrorName; + hipGetErrorString; + hipGetLastError; + hipMemAdvise; + hipMemAllocHost; + hipHostAlloc; + hipHostFree; + hipHostGetDevicePointer; + hipHostGetFlags; + hipHostMalloc; + hipHostRegister; + hipHostUnregister; + hipInit; + hipIpcCloseMemHandle; + hipIpcGetMemHandle; + hipIpcOpenMemHandle; + hipIpcGetEventHandle; + hipIpcOpenEventHandle; + hipMalloc; + hipMalloc3D; + hipMalloc3DArray; + hipMallocManaged; + hipArrayCreate; + hipArray3DCreate; + hipMallocArray; + hipMallocPitch; + hipMemAllocPitch; + hipMemcpy; + hipMemcpyWithStream; + hipMemcpyParam2D; + hipMemcpy2D; + hipMemcpy2DAsync; + hipMemcpy2DToArray; + hipMemcpy3D; + hipMemcpy3DAsync; + hipDrvMemcpy3D; + hipDrvMemcpy3DAsync; + hipMemcpyAsync; + hipMemcpyDtoD; + hipMemcpyDtoDAsync; + hipMemcpyDtoH; + hipMemcpyDtoHAsync; + hipMemcpyFromSymbol; + hipMemcpyFromSymbolAsync; + hipMemcpyHtoD; + hipMemcpyHtoDAsync; + hipMemcpyPeer; + hipMemcpyPeerAsync; + hipMemcpyToArray; + hipMemcpyFromArray; + hipMemcpyToSymbol; + hipMemcpyToSymbolAsync; + hipMemGetAddressRange; + hipGetSymbolAddress; + hipGetSymbolSize; + hipMemGetInfo; + hipMemPrefetchAsync; + hipMemPtrGetInfo; + hipMemRangeGetAttribute; + hipMemRangeGetAttributes; + hipMemset; + hipMemsetAsync; + hipMemsetD8; + hipMemsetD8Async; + hipMemsetD16; + hipMemsetD16Async; + hipMemsetD32; + hipMemsetD32Async; + hipMemset2D; + hipMemset2DAsync; + hipMemset3D; + hipMemset3DAsync; + hipModuleGetFunction; + hipModuleGetGlobal; + hipModuleGetTexRef; + hipModuleLaunchKernel; + hipModuleLaunchKernelExt; + hipLaunchCooperativeKernel; + hipLaunchCooperativeKernelMultiDevice; + hipModuleLoad; + hipModuleLoadData; + hipModuleLoadDataEx; + hipModuleUnload; + hipModuleOccupancyMaxPotentialBlockSize; + hipModuleOccupancyMaxPotentialBlockSizeWithFlags; + hipModuleOccupancyMaxActiveBlocksPerMultiprocessor; + hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + hipOccupancyMaxPotentialBlockSize; + hipOccupancyMaxActiveBlocksPerMultiprocessor; + hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + hipFuncGetAttribute; + hipFuncGetAttributes; + hipPeekAtLastError; + hipPointerSetAttribute; + hipPointerGetAttributes; + hipProfilerStart; + hipProfilerStop; + hipRuntimeGetVersion; + hipGetDeviceFlags; + hipSetDevice; + hipSetDeviceFlags; + hipStreamAddCallback; + hipStreamAttachMemAsync; + hipStreamCreate; + hipStreamCreateWithFlags; + hipStreamCreateWithPriority; + hipStreamDestroy; + hipStreamGetDevice; + hipStreamGetFlags; + hipStreamQuery; + hipStreamSynchronize; + hipStreamWaitEvent; + __hipPopCallConfiguration; + __hipPushCallConfiguration; + __hipRegisterFatBinary; + __hipRegisterFunction; + __hipRegisterVar; + __hipRegisterSurface; + __hipRegisterTexture; + __hipRegisterManagedVar; + __hipUnregisterFatBinary; + __gnu_h2f_ieee; + __gnu_f2h_ieee; + hipConfigureCall; + hipSetupArgument; + hipLaunchByPtr; + hipLaunchKernel; + hipApiName; + hipKernelNameRef; + hipKernelNameRefByPtr; + hipGetStreamDeviceId; + hipProfilerStart; + hipProfilerStop; + hiprtcCompileProgram; + hiprtcCreateProgram; + hiprtcDestroyProgram; + hiprtcGetLoweredName; + hiprtcGetProgramLog; + hiprtcGetProgramLogSize; + hiprtcGetCode; + hiprtcGetCodeSize; + hiprtcGetErrorString; + hiprtcAddNameExpression; + hiprtcVersion; + hiprtcLinkCreate; + hiprtcLinkAddFile; + hiprtcLinkAddData; + hiprtcLinkComplete; + hiprtcLinkDestroy; + hipBindTexture; + hipBindTexture2D; + hipBindTextureToArray; + hipBindTextureToMipmappedArray; + hipGetTextureAlignmentOffset; + hipGetTextureReference; + hipUnbindTexture; + hipCreateChannelDesc; + hipCreateTextureObject; + hipDestroyTextureObject; + hipGetChannelDesc; + hipGetTextureObjectResourceDesc; + hipGetTextureObjectResourceViewDesc; + hipGetTextureObjectTextureDesc; + hipTexRefGetAddress; + hipTexRefGetAddressMode; + hipTexRefGetArray; + hipTexRefGetBorderColor; + hipTexRefGetFilterMode; + hipTexRefGetFlags; + hipTexRefGetFormat; + hipTexRefGetMaxAnisotropy; + hipTexRefGetMipmapFilterMode; + hipTexRefGetMipmapLevelBias; + hipTexRefGetMipmapLevelClamp; + hipTexRefGetMipmappedArray; + hipTexRefSetAddress; + hipTexRefSetAddress2D; + hipTexRefSetAddressMode; + hipTexRefSetArray; + hipTexRefSetBorderColor; + hipTexRefSetFilterMode; + hipTexRefSetFlags; + hipTexRefSetFormat; + hipTexRefSetMaxAnisotropy; + hipTexRefSetMipmapFilterMode; + hipTexRefSetMipmapLevelBias; + hipTexRefSetMipmapLevelClamp; + hipTexRefSetMipmappedArray; + hipMipmappedArrayCreate; + hipMallocMipmappedArray; + hipMipmappedArrayDestroy; + hipFreeMipmappedArray; + hipMipmappedArrayGetLevel; + hipGetMipmappedArrayLevel; + hipMallocHost; + hipFreeHost; + hipTexObjectCreate; + hipTexObjectDestroy; + hipTexObjectGetResourceDesc; + hipTexObjectGetResourceViewDesc; + hipTexObjectGetTextureDesc; + hipGetCmdName*; + hipExtStreamCreateWithCUMask; + hipStreamGetPriority; + hipMemcpy2DFromArray; + hipMemcpy2DFromArrayAsync; + hipMemcpyAtoH; + hipMemcpyHtoA; + hipMemcpyParam2DAsync; + __hipGetPCH; + hipExtStreamGetCUMask; + extern "C++" { + hipCreateSurfaceObject*; + hipDestroySurfaceObject*; + hipHccModuleLaunchKernel*; + hipExtModuleLaunchKernel*; + + }; +local: + *; +}; + +hip_4.3 { +global: + hipGraphCreate; + hipGraphDestroy; + hipGraphAddKernelNode; + hipGraphAddMemsetNode; + hipGraphAddMemcpyNode; + hipGraphAddMemcpyNode1D; + hipGraphInstantiate; + hipGraphLaunch; + hipStreamIsCapturing; + hipStreamBeginCapture; + hipStreamEndCapture; + hipGraphExecDestroy; + hipImportExternalSemaphore; + hipSignalExternalSemaphoresAsync; + hipWaitExternalSemaphoresAsync; + hipDestroyExternalSemaphore; + hipImportExternalMemory; + hipExternalMemoryGetMappedBuffer; + hipDestroyExternalMemory; + hipMemcpy2DToArrayAsync; + hipDrvMemcpy2DUnaligned; + hipArrayDestroy; + hipGLGetDevices; + hipGraphicsGLRegisterBuffer; + hipGraphicsMapResources; + hipGraphicsResourceGetMappedPointer; + hipGraphicsUnmapResources; + hipGraphicsUnregisterResource; +local: + *; +} hip_4.2; + +hip_4.4 { +global: + hipGraphGetNodes; + hipGraphGetRootNodes; + hipGraphKernelNodeGetParams; + hipGraphKernelNodeSetParams; + hipGraphMemcpyNodeGetParams; + hipGraphMemcpyNodeSetParams; + hipGraphMemsetNodeGetParams; + hipGraphMemsetNodeSetParams; + hipGraphAddDependencies; + hipStreamWaitValue32; + hipStreamWaitValue64; + hipStreamWriteValue32; + hipStreamWriteValue64; + hipGraphExecKernelNodeSetParams; + hipGraphAddEmptyNode; +local: + *; +} hip_4.3; + +hip_4.5 { +global: + hipStreamUpdateCaptureDependencies; + hipGraphRemoveDependencies; + hipGraphGetEdges; + hipGraphNodeGetDependencies; + hipGraphNodeGetDependentNodes; + hipGraphNodeGetType; + hipGraphDestroyNode; + hipGraphClone; + hipGraphNodeFindInClone; + hipGraphAddChildGraphNode; + hipGraphChildGraphNodeGetGraph; + hipGraphExecChildGraphNodeSetParams; + hipGraphAddMemcpyNodeFromSymbol; + hipGraphMemcpyNodeSetParamsFromSymbol; + hipGraphExecMemcpyNodeSetParamsFromSymbol; + hipGraphAddMemcpyNodeToSymbol; + hipGraphMemcpyNodeSetParamsToSymbol; + hipGraphExecMemcpyNodeSetParamsToSymbol; + hipGraphExecMemcpyNodeSetParams; + hipGraphMemcpyNodeSetParams1D; + hipGraphExecMemcpyNodeSetParams1D; + hipGraphAddEventRecordNode; + hipGraphEventRecordNodeGetEvent; + hipGraphEventRecordNodeSetEvent; + hipGraphExecEventRecordNodeSetEvent; + hipGraphAddEventWaitNode; + hipGraphEventWaitNodeGetEvent; + hipGraphEventWaitNodeSetEvent; + hipGraphExecEventWaitNodeSetEvent; + hipGraphAddHostNode; + hipGraphHostNodeGetParams; + hipGraphHostNodeSetParams; + hipGraphExecHostNodeSetParams; + hipGraphExecUpdate; + hipGraphInstantiateWithFlags; + hipGraphExecMemsetNodeSetParams; + hipDeviceGetGraphMemAttribute; + hipDeviceSetGraphMemAttribute; + hipDeviceGraphMemTrim; + amd_dbgapi_get_build_name; + amd_dbgapi_get_git_hash; + amd_dbgapi_get_build_id; + hipStreamGetCaptureInfo; + hipStreamGetCaptureInfo_v2; + hipGraphicsGLRegisterImage; + hipGraphicsSubResourceGetMappedArray; +local: + *; +} hip_4.4; + +hip_5.0 { +global: + hipPointerGetAttribute; + hipDrvPointerGetAttributes; + hipThreadExchangeStreamCaptureMode; + hipGraphKernelNodeSetAttribute; + hipGraphKernelNodeGetAttribute; +local: + *; +} hip_4.5; + +hip_5.1 { +global: + hipDeviceGetUuid; + hipDeviceGetDefaultMemPool; + hipDeviceSetMemPool; + hipDeviceGetMemPool; + hipMallocAsync; + hipFreeAsync; + hipMemPoolTrimTo; + hipMemPoolSetAttribute; + hipMemPoolGetAttribute; + hipMemPoolSetAccess; + hipMemPoolGetAccess; + hipMemPoolCreate; + hipMemPoolDestroy; + hipMallocFromPoolAsync; + hipMemPoolExportToShareableHandle; + hipMemPoolImportFromShareableHandle; + hipMemPoolExportPointer; + hipMemPoolImportPointer; + hipMemAddressFree; + hipMemAddressReserve; + hipMemCreate; + hipMemExportToShareableHandle; + hipMemGetAccess; + hipMemGetAllocationGranularity; + hipMemGetAllocationPropertiesFromHandle; + hipMemImportFromShareableHandle; + hipMemMap; + hipMemMapArrayAsync; + hipMemRelease; + hipMemRetainAllocationHandle; + hipMemSetAccess; + hipMemUnmap; +local: + *; +} hip_5.0; + +hip_5.2 { +global: + hipMemcpy_spt; + hipMemcpyAsync_spt; + hipStreamSynchronize_spt; + hipMemcpyToSymbol_spt; + hipMemcpyFromSymbol_spt; + hipMemcpy2D_spt; + hipMemcpy2DToArray_spt; + hipMemcpy2DFromArray_spt; + hipMemcpy3D_spt; + hipMemset_spt; + hipMemset2D_spt; + hipMemset3D_spt; + hipStreamQuery_spt; + hipStreamGetFlags_spt; + hipStreamGetPriority_spt; + hipStreamWaitEvent_spt; + hipEventRecord_spt; + hipLaunchKernel_spt; + hipLaunchCooperativeKernel_spt; +local: + *; +} hip_5.1; + +hip_5.3 { +global: + hipDeviceSetLimit; + hiprtcGetBitcode; + hiprtcGetBitcodeSize; + hipGraphLaunch_spt; + hipStreamBeginCapture_spt; + hipStreamEndCapture_spt; + hipStreamIsCapturing_spt; + hipStreamGetCaptureInfo_spt; + hipStreamGetCaptureInfo_v2_spt; + hipStreamAddCallback_spt; + hipMemsetAsync_spt; + hipMemset2DAsync_spt; + hipMemset3DAsync_spt; + hipMemcpy3DAsync_spt; + hipMemcpy2DAsync_spt; + hipMemcpyFromSymbolAsync_spt; + hipMemcpyToSymbolAsync_spt; + hipMemcpyFromArray_spt; + hipMemcpy2DToArray_spt; + hipMemcpy2DFromArrayAsync_spt; + hipMemcpy2DToArrayAsync_spt; + hipDrvGetErrorName; + hipDrvGetErrorString; + hipUserObjectCreate; + hipUserObjectRelease; + hipUserObjectRetain; + hipGraphRetainUserObject; + hipGraphReleaseUserObject; + hipLaunchHostFunc; + hipLaunchHostFunc_spt; + hipRegisterTracerCallback; + hipGraphDebugDotPrint; + hipGraphKernelNodeCopyAttributes; + hipGraphNodeGetEnabled; + hipGraphNodeSetEnabled; + hipGraphUpload; +local: + *; +} hip_5.2; + +hip_5.5 { +global: + hipModuleLaunchCooperativeKernel; + hipModuleLaunchCooperativeKernelMultiDevice; + hipGraphAddMemAllocNode; + hipGraphMemAllocNodeGetParams; + hipGraphAddMemFreeNode; + hipGraphMemFreeNodeGetParams; +local: + *; +} hip_5.3; + +hip_5.6 { +global: + hipArrayGetInfo; + hipArrayGetDescriptor; + hipArray3DGetDescriptor; +local: + *; +} hip_5.5; \ No newline at end of file diff --git a/projects/clr/hipamd/src/hip_hcc.rc b/projects/clr/hipamd/src/hip_hcc.rc new file mode 100644 index 0000000000..009dc30c18 --- /dev/null +++ b/projects/clr/hipamd/src/hip_hcc.rc @@ -0,0 +1,75 @@ +#define STR(__macro__) #__macro__ +#define XSTR(__macro__) STR(__macro__) + +#if defined(_DEBUG) +#define DEBUG_ONLY(x) x +#else +#define DEBUG_ONLY(x) +#endif + +#define VERSION_PREFIX_MAJOR 2 +#define VERSION_PREFIX_MINOR 0 + + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "winresrc.h" +#include "utils/versions.hpp" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER + FILEFLAGSMASK 0x3fL +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x40004L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "Comments", " \0" + VALUE "CompanyName", "Advanced Micro Devices Inc.\0" + VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0" + VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + VALUE "InternalName", "OpenCL" + VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0" + VALUE "OriginalFilename", "OpenCL.dll" + VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0" + VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// diff --git a/projects/clr/hipamd/src/hip_hcc_in.rc.in b/projects/clr/hipamd/src/hip_hcc_in.rc.in new file mode 100755 index 0000000000..a981d9d1ab --- /dev/null +++ b/projects/clr/hipamd/src/hip_hcc_in.rc.in @@ -0,0 +1,77 @@ +#define STR(__macro__) #__macro__ +#define XSTR(__macro__) STR(__macro__) + +#if defined(_DEBUG) +#define DEBUG_ONLY(x) x +#else +#define DEBUG_ONLY(x) +#endif + +#define VERSION_PREFIX_MAJOR @VERSION_MAJOR_AMDHIP@ +#define VERSION_PREFIX_MINOR @VERSION_MINOR_AMDHIP@ + + + + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "winresrc.h" +#include "utils/versions.hpp" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO +FILEVERSION 10, 0, AMD_PLATFORM_BUILD_NUMBER, AMD_PLATFORM_REVISION_NUMBER +PRODUCTVERSION 10, 0, AMD_PLATFORM_BUILD_NUMBER, AMD_PLATFORM_REVISION_NUMBER +FILEFLAGSMASK 0x3fL +#ifdef _DEBUG +FILEFLAGS 0x1L +#else +FILEFLAGS 0x0L +#endif +FILEOS 0x40004L +FILETYPE 0x2L +FILESUBTYPE 0x0L +BEGIN +BLOCK "StringFileInfo" +BEGIN +BLOCK "040904b0" +BEGIN +VALUE "Comments", " \0" +VALUE "CompanyName", "Advanced Micro Devices Inc.\0" +VALUE "FileDescription", AMD_PLATFORM_NAME " amdhip64 " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0" +VALUE "FileVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) +VALUE "InternalName", "amdhip64" +VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0" +VALUE "OriginalFilename", "amdhip64.dll" +VALUE "ProductName", "amdhip64 " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0" +VALUE "ProductVersion", "10.0." XSTR(AMD_PLATFORM_BUILD_NUMBER) "." XSTR(AMD_PLATFORM_REVISION_NUMBER) +END +END +BLOCK "VarFileInfo" +BEGIN +VALUE "Translation", 0x409, 1200 +END +END + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// diff --git a/projects/clr/hipamd/src/hip_hmm.cpp b/projects/clr/hipamd/src/hip_hmm.cpp new file mode 100644 index 0000000000..ec201663f6 --- /dev/null +++ b/projects/clr/hipamd/src/hip_hmm.cpp @@ -0,0 +1,259 @@ +/* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_conversions.hpp" +#include "platform/context.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" + +// Forward declaraiton of a function +hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); + +// Make sure HIP defines match ROCclr to avoid double conversion +static_assert(hipCpuDeviceId == amd::CpuDeviceId, "CPU device ID mismatch with ROCclr!"); +static_assert(hipInvalidDeviceId == amd::InvalidDeviceId, + "Invalid device ID mismatch with ROCclr!"); + +static_assert(static_cast(hipMemAdviseSetReadMostly) == + amd::MemoryAdvice::SetReadMostly, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemAdviseUnsetReadMostly) == + amd::MemoryAdvice::UnsetReadMostly, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemAdviseSetPreferredLocation) == + amd::MemoryAdvice::SetPreferredLocation, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemAdviseUnsetPreferredLocation) == + amd::MemoryAdvice::UnsetPreferredLocation, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemAdviseSetAccessedBy) == + amd::MemoryAdvice::SetAccessedBy, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemAdviseUnsetAccessedBy) == + amd::MemoryAdvice::UnsetAccessedBy, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemAdviseSetCoarseGrain) == + amd::MemoryAdvice::SetCoarseGrain, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemAdviseUnsetCoarseGrain) == + amd::MemoryAdvice::UnsetCoarseGrain, "Enum mismatch with ROCclr!"); + +static_assert(static_cast(hipMemRangeAttributeReadMostly) == + amd::MemRangeAttribute::ReadMostly, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemRangeAttributePreferredLocation) == + amd::MemRangeAttribute::PreferredLocation, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemRangeAttributeAccessedBy) == + amd::MemRangeAttribute::AccessedBy, "Enum mismatch with ROCclr!"); +static_assert(static_cast(hipMemRangeAttributeLastPrefetchLocation) == + amd::MemRangeAttribute::LastPrefetchLocation, "Enum mismatch with ROCclr!"); + +// ================================================================================================ +hipError_t hipMallocManaged(void** dev_ptr, size_t size, unsigned int flags) { + HIP_INIT_API(hipMallocManaged, dev_ptr, size, flags); + + if ((dev_ptr == nullptr) || (size == 0) || + ((flags != hipMemAttachGlobal) && (flags != hipMemAttachHost))) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipMallocManaged(dev_ptr, size), *dev_ptr); +} + +// ================================================================================================ +hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, + hipStream_t stream) { + HIP_INIT_API(hipMemPrefetchAsync, dev_ptr, count, device, stream); + + if ((dev_ptr == nullptr) || (count == 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorContextIsDestroyed); + } + + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(dev_ptr, offset); + + if ((memObj != nullptr) && (count > (memObj->getSize() - offset))) { + HIP_RETURN(hipErrorInvalidValue); + } + if (device != hipCpuDeviceId && (static_cast(device) >= g_devices.size())) { + HIP_RETURN(hipErrorInvalidDevice); + } + + hip::Stream* hip_stream = nullptr; + amd::Device* dev = nullptr; + bool cpu_access = false; + + if ((memObj == nullptr) && (device != hipCpuDeviceId) && + (!g_devices[device]->devices()[0]->info().hmmCpuMemoryAccessible_)) { + HIP_RETURN(hipErrorNotSupported); + } + + // Pick the specified stream or Null one from the provided device + if (device == hipCpuDeviceId) { + cpu_access = true; + hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : hip::getStream(stream); + } else { + dev = g_devices[device]->devices()[0]; + hip_stream = (stream == nullptr) ? g_devices[device]->NullStream() : hip::getStream(stream); + } + + if (hip_stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Command::EventWaitList waitList; + amd::SvmPrefetchAsyncCommand* command = + new amd::SvmPrefetchAsyncCommand(*hip_stream, waitList, dev_ptr, count, dev, cpu_access); + if (command == nullptr) { + return hipErrorOutOfMemory; + } + + command->enqueue(); + command->release(); + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice, int device) { + HIP_INIT_API(hipMemAdvise, dev_ptr, count, advice, device); + + bool isAdviseReadMostly = (advice == hipMemAdviseSetReadMostly) || + (advice == hipMemAdviseUnsetReadMostly); + + if (!isAdviseReadMostly && ((device != hipCpuDeviceId) && + (static_cast(device) >= g_devices.size()))) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if ((dev_ptr == nullptr) || (count == 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(dev_ptr, offset); + if (memObj == nullptr) { + HIP_RETURN(hipErrorMemoryAllocation); + } + + if (count > (memObj->getSize() - offset)) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Device* dev = (device == hipCpuDeviceId || isAdviseReadMostly) ? + g_devices[0]->devices()[0] : g_devices[device]->devices()[0]; + bool use_cpu = (device == hipCpuDeviceId) ? true : false; + + // Set the allocation attributes in AMD HMM + if (!dev->SetSvmAttributes(dev_ptr, count, static_cast(advice), use_cpu)) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemRangeGetAttribute(void* data, size_t data_size, hipMemRangeAttribute attribute, + const void* dev_ptr, size_t count) { + HIP_INIT_API(hipMemRangeGetAttribute, data, data_size, attribute, dev_ptr, count); + + if ((data == nullptr) || (data_size == 0) || (dev_ptr == nullptr) || (count == 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Shouldn't matter for which device the interface is called + amd::Device* dev = g_devices[0]->devices()[0]; + + // Get the allocation attribute from AMD HMM + if (!dev->GetSvmAttributes(&data, &data_size, reinterpret_cast(&attribute), 1, + dev_ptr, count)) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes, + hipMemRangeAttribute* attributes, size_t num_attributes, + const void* dev_ptr, size_t count) { + HIP_INIT_API(hipMemRangeGetAttributes, data, data_sizes, + attributes, num_attributes, dev_ptr, count); + + if ((data == nullptr) || (data_sizes == nullptr) || (attributes == nullptr) || + (num_attributes == 0) || (dev_ptr == nullptr) || (count == 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Shouldn't matter for which device the interface is called + amd::Device* dev = g_devices[0]->devices()[0]; + // Get the allocation attributes from AMD HMM + if (!dev->GetSvmAttributes(data, data_sizes, reinterpret_cast(attributes), + num_attributes, dev_ptr, count)) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipStreamAttachMemAsync(hipStream_t stream, void* dev_ptr, + size_t length, unsigned int flags) { + HIP_INIT_API(hipStreamAttachMemAsync, stream, dev_ptr, length, flags); + + if ((stream == nullptr) || (dev_ptr == nullptr) || (length == 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Unclear what should be done for this interface in AMD HMM, since it's generic SVM alloc + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align) { + if (ptr == nullptr) { + return hipErrorInvalidValue; + } else if (size == 0) { + *ptr = nullptr; + return hipSuccess; + } + + assert((hip::host_context != nullptr) && "Current host context must be valid"); + amd::Context& ctx = *hip::host_context; + + const amd::Device& dev = *ctx.devices()[0]; + + // Allocate SVM fine grain buffer with the forced host pointer, avoiding explicit memory + // allocation in the device driver + *ptr = amd::SvmBuffer::malloc(ctx, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR, + size, (align == 0) ? dev.info().memBaseAddrAlign_ : align); + + if (*ptr == nullptr) { + return hipErrorMemoryAllocation; + } + size_t offset = 0; //this is ignored + amd::Memory* memObj = getMemoryObject(*ptr, offset); + if (memObj == nullptr) { + return hipErrorMemoryAllocation; + } + //saves the current device id so that it can be accessed later + memObj->getUserData().deviceId = hip::getCurrentDevice()->deviceId(); + + ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipMallocManaged ptr=0x%zx", *ptr); + return hipSuccess; +} diff --git a/projects/clr/hipamd/src/hip_intercept.cpp b/projects/clr/hipamd/src/hip_intercept.cpp new file mode 100644 index 0000000000..da0a699bd3 --- /dev/null +++ b/projects/clr/hipamd/src/hip_intercept.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "hip/hip_runtime.h" +#include "hip_internal.hpp" +#include "hip_platform.hpp" +#include "hip_prof_api.h" + +// HIP API callback/activity + +extern const std::string& FunctionName(const hipFunction_t f); + +extern "C" { + +int hipGetStreamDeviceId(hipStream_t stream) { + if (!hip::isValid(stream)) { + return -1; + } + hip::Stream* s = reinterpret_cast(stream); + return (s != nullptr) ? s->DeviceId() : ihipGetDevice(); +} + +const char* hipKernelNameRef(const hipFunction_t function) { + return (function != nullptr) ? FunctionName(function).c_str() : nullptr; +} + +const char* hipKernelNameRefByPtr(const void* host_function, hipStream_t stream) { + [](auto&&...) {}(stream); + return (host_function != nullptr) ? PlatformState::instance().getStatFuncName(host_function) + : nullptr; +} + +void hipRegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id, + void* data)) { + activity_prof::report_activity.store(function, std::memory_order_relaxed); +} + +const char* hipApiName(uint32_t id) { return hip_api_name(id); } + +} // extern "C" diff --git a/projects/clr/hipamd/src/hip_internal.hpp b/projects/clr/hipamd/src/hip_internal.hpp new file mode 100644 index 0000000000..ca924bd5f8 --- /dev/null +++ b/projects/clr/hipamd/src/hip_internal.hpp @@ -0,0 +1,578 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_SRC_HIP_INTERNAL_H +#define HIP_SRC_HIP_INTERNAL_H + +#include "vdi_common.hpp" +#include "hip_prof_api.h" +#include "trace_helper.h" +#include "utils/debug.hpp" +#include "hip_formatting.hpp" +#include "hip_graph_capture.hpp" + +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +#define KNRM "\x1B[0m" +#define KRED "\x1B[31m" +#define KGRN "\x1B[32m" +#define KYEL "\x1B[33m" +#define KBLU "\x1B[34m" +#define KMAG "\x1B[35m" +#define KCYN "\x1B[36m" +#define KWHT "\x1B[37m" + +/*! IHIP IPC MEMORY Structure */ +#define IHIP_IPC_MEM_HANDLE_SIZE 32 +#define IHIP_IPC_MEM_RESERVED_SIZE LP64_SWITCH(20,12) + +typedef struct ihipIpcMemHandle_st { + char ipc_handle[IHIP_IPC_MEM_HANDLE_SIZE]; ///< ipc memory handle on ROCr + size_t psize; + size_t poffset; + int owners_process_id; + char reserved[IHIP_IPC_MEM_RESERVED_SIZE]; +} ihipIpcMemHandle_t; + +#define IHIP_IPC_EVENT_HANDLE_SIZE 32 +#define IHIP_IPC_EVENT_RESERVED_SIZE LP64_SWITCH(28,24) +typedef struct ihipIpcEventHandle_st { + //hsa_amd_ipc_signal_t ipc_handle; ///< ipc signal handle on ROCr + //char ipc_handle[IHIP_IPC_EVENT_HANDLE_SIZE]; + //char reserved[IHIP_IPC_EVENT_RESERVED_SIZE]; + char shmem_name[IHIP_IPC_EVENT_HANDLE_SIZE]; +}ihipIpcEventHandle_t; + +const char* ihipGetErrorName(hipError_t hip_error); + +static amd::Monitor g_hipInitlock{"hipInit lock"}; +#define HIP_INIT(noReturn) {\ + amd::ScopedLock lock(g_hipInitlock); \ + if (!amd::Runtime::initialized()) { \ + if (!hip::init() && !noReturn) { \ + HIP_RETURN(hipErrorInvalidDevice); \ + } \ + } \ + if (hip::tls.device_ == nullptr && g_devices.size() > 0) { \ + hip::tls.device_ = g_devices[0]; \ + amd::Os::setPreferredNumaNode(g_devices[0]->devices()[0]->getPreferredNumaNode()); \ + } \ + } + +#define HIP_INIT_VOID() {\ + amd::ScopedLock lock(g_hipInitlock); \ + if (!amd::Runtime::initialized()) { \ + if (hip::init()) {} \ + } \ + if (hip::tls.device_ == nullptr && g_devices.size() > 0) { \ + hip::tls.device_ = g_devices[0]; \ + amd::Os::setPreferredNumaNode(g_devices[0]->devices()[0]->getPreferredNumaNode()); \ + } \ + } + + +#define HIP_API_PRINT(...) \ + uint64_t startTimeUs=0; \ + HIPPrintDuration(amd::LOG_INFO, amd::LOG_API, &startTimeUs, \ + "%s %s ( %s ) %s", KGRN, \ + __func__, ToString( __VA_ARGS__ ).c_str(), KNRM); + +#define HIP_ERROR_PRINT(err, ...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%s: Returned %s : %s", \ + __func__, ihipGetErrorName(err), ToString( __VA_ARGS__ ).c_str()); + +#define HIP_INIT_API_INTERNAL(noReturn, cid, ...) \ + HIP_API_PRINT(__VA_ARGS__) \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread) && !noReturn) { \ + HIP_RETURN(hipErrorOutOfMemory); \ + } \ + HIP_INIT(noReturn) \ + HIP_CB_SPAWNER_OBJECT(cid); + +// This macro should be called at the beginning of every HIP API. +#define HIP_INIT_API(cid, ...) \ + HIP_INIT_API_INTERNAL(0, cid, __VA_ARGS__) \ + if (g_devices.size() == 0) { \ + HIP_RETURN(hipErrorNoDevice); \ + } + +#define HIP_INIT_API_NO_RETURN(cid, ...) \ + HIP_INIT_API_INTERNAL(1, cid, __VA_ARGS__) + +#define HIP_RETURN_DURATION(ret, ...) \ + hip::tls.last_error_ = ret; \ + HIPPrintDuration(amd::LOG_INFO, amd::LOG_API, &startTimeUs, \ + "%s: Returned %s : %s", \ + __func__, ihipGetErrorName(hip::tls.last_error_), \ + ToString( __VA_ARGS__ ).c_str()); \ + return hip::tls.last_error_; + +#define HIP_RETURN(ret, ...) \ + hip::tls.last_error_ = ret; \ + HIP_ERROR_PRINT(hip::tls.last_error_, __VA_ARGS__) \ + return hip::tls.last_error_; + +#define HIP_RETURN_ONFAIL(func) \ + do { \ + hipError_t herror = (func); \ + if (herror != hipSuccess) { \ + HIP_RETURN(herror); \ + } \ + } while (0); + +// Cannot be use in place of HIP_RETURN. +// Refrain from using for external HIP APIs +#define IHIP_RETURN_ONFAIL(func) \ + do { \ + hipError_t herror = (func); \ + if (herror != hipSuccess) { \ + return herror; \ + } \ + } while (0); + +#define CHECK_STREAM_CAPTURE_SUPPORTED() \ + if (hip::tls.stream_capture_mode_ == hipStreamCaptureModeThreadLocal) { \ + if (hip::tls.capture_streams_.size() != 0) { \ + HIP_RETURN(hipErrorStreamCaptureUnsupported); \ + } \ + } else if (hip::tls.stream_capture_mode_ == hipStreamCaptureModeGlobal) { \ + if (hip::tls.capture_streams_.size() != 0) { \ + HIP_RETURN(hipErrorStreamCaptureUnsupported); \ + } \ + amd::ScopedLock lock(g_captureStreamsLock); \ + if (g_captureStreams.size() != 0) { \ + HIP_RETURN(hipErrorStreamCaptureUnsupported); \ + } \ + } + +// Sync APIs cannot be called when stream capture is active +#define CHECK_STREAM_CAPTURING() \ + if (!g_captureStreams.empty()) { \ + return hipErrorStreamCaptureImplicit; \ + } + +#define STREAM_CAPTURE(name, stream, ...) \ + getStreamPerThread(stream); \ + if (stream != nullptr && \ + reinterpret_cast(stream)->GetCaptureStatus() == \ + hipStreamCaptureStatusActive) { \ + hipError_t status = capture##name(stream, ##__VA_ARGS__); \ + return status; \ + } + +#define EVENT_CAPTURE(name, event, ...) \ + if (event != nullptr && reinterpret_cast(event)->GetCaptureStatus() == true) { \ + hipError_t status = capture##name(event, ##__VA_ARGS__); \ + HIP_RETURN(status); \ + } + +#define PER_THREAD_DEFAULT_STREAM(stream) \ + if (stream == nullptr) { \ + stream = getPerThreadDefaultStream(); \ + } + +namespace hc { +class accelerator; +class accelerator_view; +}; + +struct ihipExec_t { + dim3 gridDim_; + dim3 blockDim_; + size_t sharedMem_; + hipStream_t hStream_; + std::vector arguments_; +}; + +class stream_per_thread { +private: + std::vector m_streams; +public: + stream_per_thread(); + stream_per_thread(const stream_per_thread& ) = delete; + void operator=(const stream_per_thread& ) = delete; + ~stream_per_thread(); + hipStream_t get(); +}; + +namespace hip { + class Device; + class MemoryPool; + class Stream : public amd::HostQueue { + public: + enum Priority : int { High = -1, Normal = 0, Low = 1 }; + + private: + mutable amd::Monitor lock_; + Device* device_; + Priority priority_; + unsigned int flags_; + bool null_; + const std::vector cuMask_; + + /// Stream capture related parameters + + /// Current capture status of the stream + hipStreamCaptureStatus captureStatus_; + /// Graph that is constructed with capture + hipGraph_t pCaptureGraph_; + /// Based on mode stream capture places restrictions on API calls that can be made within or + /// concurrently + hipStreamCaptureMode captureMode_{hipStreamCaptureModeGlobal}; + bool originStream_; + /// Origin sream has no parent. Parent stream for the derived captured streams with event + /// dependencies + hipStream_t parentStream_ = nullptr; + /// Last graph node captured in the stream + std::vector lastCapturedNodes_; + /// dependencies removed via API hipStreamUpdateCaptureDependencies + std::vector removedDependencies_; + /// Derived streams/Paralell branches from the origin stream + std::vector parallelCaptureStreams_; + /// Capture events + std::unordered_set captureEvents_; + unsigned long long captureID_; + + static inline CommandQueue::Priority convertToQueuePriority(Priority p){ + return p == Priority::High ? amd::CommandQueue::Priority::High : p == Priority::Low ? + amd::CommandQueue::Priority::Low : amd::CommandQueue::Priority::Normal; + } + + public: + Stream(Device* dev, Priority p = Priority::Normal, unsigned int f = 0, bool null_stream = false, + const std::vector& cuMask = {}, + hipStreamCaptureStatus captureStatus = hipStreamCaptureStatusNone); + + /// Creates the hip stream object, including AMD host queue + bool Create(); + virtual bool terminate() override; + /// Get device ID associated with the current stream; + int DeviceId() const; + /// Get HIP device associated with the stream + Device* GetDevice() const { return device_; } + /// Get device ID associated with a stream; + static int DeviceId(const hipStream_t hStream); + /// Returns if stream is null stream + bool Null() const { return null_; } + /// Returns the lock object for the current stream + amd::Monitor& Lock() const { return lock_; } + /// Returns the creation flags for the current stream + unsigned int Flags() const { return flags_; } + /// Returns the priority for the current stream + Priority GetPriority() const { return priority_; } + /// Returns the CU mask for the current stream + const std::vector GetCUMask() const { return cuMask_; } + + /// Sync all non-blocking streams + static void syncNonBlockingStreams(int deviceId); + + /// Check whether any blocking stream running + static bool StreamCaptureBlocking(); + + /// Destroy all streams on a given device + static void destroyAllStreams(int deviceId); + + /// Check Stream Capture status to make sure it is done + static bool StreamCaptureOngoing(void); + + /// Returns capture status of the current stream + hipStreamCaptureStatus GetCaptureStatus() const { return captureStatus_; } + /// Returns capture mode of the current stream + hipStreamCaptureMode GetCaptureMode() const { return captureMode_; } + /// Returns if stream is origin stream + bool IsOriginStream() const { return originStream_; } + void SetOriginStream() { originStream_ = true; } + /// Returns captured graph + hipGraph_t GetCaptureGraph() const { return pCaptureGraph_; } + /// Returns last captured graph node + const std::vector& GetLastCapturedNodes() const { return lastCapturedNodes_; } + /// Set last captured graph node + void SetLastCapturedNode(hipGraphNode_t graphNode) { + lastCapturedNodes_.clear(); + lastCapturedNodes_.push_back(graphNode); + } + /// returns updated dependencies removed + const std::vector& GetRemovedDependencies() { + return removedDependencies_; + } + /// Append captured node via the wait event cross stream + void AddCrossCapturedNode(std::vector graphNodes, bool replace = false) { + // replace dependencies as per flag hipStreamSetCaptureDependencies + if (replace == true) { + for (auto node : lastCapturedNodes_) { + removedDependencies_.push_back(node); + } + lastCapturedNodes_.clear(); + } + for (auto node : graphNodes) { + lastCapturedNodes_.push_back(node); + } + } + /// Set graph that is being captured + void SetCaptureGraph(hipGraph_t pGraph) { + pCaptureGraph_ = pGraph; + captureStatus_ = hipStreamCaptureStatusActive; + } + void SetCaptureId() { + // ID is generated in Begin Capture i.e.. when capture status is active + captureID_ = GenerateCaptureID(); + } + void SetCaptureId(unsigned long long captureId) { + // ID is given from parent stream + captureID_ = captureId; + } + /// reset capture parameters + hipError_t EndCapture(); + /// Set capture status + void SetCaptureStatus(hipStreamCaptureStatus captureStatus) { captureStatus_ = captureStatus; } + /// Set capture mode + void SetCaptureMode(hipStreamCaptureMode captureMode) { captureMode_ = captureMode; } + /// Set parent stream + void SetParentStream(hipStream_t parentStream) { parentStream_ = parentStream; } + /// Get parent stream + hipStream_t GetParentStream() const { return parentStream_; } + /// Generate ID for stream capture unique over the lifetime of the process + static unsigned long long GenerateCaptureID() { + static std::atomic uid(0); + return ++uid; + } + /// Get Capture ID + unsigned long long GetCaptureID() { return captureID_; } + void SetCaptureEvent(hipEvent_t e) { captureEvents_.emplace(e); } + void EraseCaptureEvent(hipEvent_t e) { + auto it = captureEvents_.find(e); + if (it != captureEvents_.end()) { + captureEvents_.erase(it); + } + } + void SetParallelCaptureStream(hipStream_t s) { parallelCaptureStreams_.push_back(s); } + void EraseParallelCaptureStream(hipStream_t s) { + auto it = std::find(parallelCaptureStreams_.begin(), parallelCaptureStreams_.end(), s); + if (it != parallelCaptureStreams_.end()) { + parallelCaptureStreams_.erase(it); + } + } + static bool existsActiveStreamForDevice(hip::Device* device); + + /// The stream should be destroyed via release() rather than delete + private: + ~Stream() {}; + }; + + /// HIP Device class + class Device { + amd::Monitor lock_{"Device lock", true}; + /// ROCclr context + amd::Context* context_; + /// Device's ID + /// Store it here so we don't have to loop through the device list every time + int deviceId_; + /// ROCclr host queue for default streams + Stream* null_stream_ = nullptr; + /// Store device flags + unsigned int flags_; + /// Maintain list of user enabled peers + std::list userEnabledPeers; + + /// True if this device is active + bool isActive_; + + + MemoryPool* default_mem_pool_; //!< Default memory pool for this device + MemoryPool* current_mem_pool_; + MemoryPool* graph_mem_pool_; //!< Memory pool, associated with graphs for this device + + std::set mem_pools_; + + public: + Device(amd::Context* ctx, int devId): context_(ctx), + deviceId_(devId), + flags_(hipDeviceScheduleSpin), + isActive_(false), + default_mem_pool_(nullptr), + current_mem_pool_(nullptr), + graph_mem_pool_(nullptr) + { assert(ctx != nullptr); } + ~Device(); + + bool Create(); + amd::Context* asContext() const { return context_; } + int deviceId() const { return deviceId_; } + void retain() const { context_->retain(); } + void release() const { context_->release(); } + const std::vector& devices() const { return context_->devices(); } + hipError_t EnablePeerAccess(int peerDeviceId){ + amd::ScopedLock lock(lock_); + bool found = (std::find(userEnabledPeers.begin(), userEnabledPeers.end(), peerDeviceId) != userEnabledPeers.end()); + if (found) { + return hipErrorPeerAccessAlreadyEnabled; + } + userEnabledPeers.push_back(peerDeviceId); + return hipSuccess; + } + hipError_t DisablePeerAccess(int peerDeviceId) { + amd::ScopedLock lock(lock_); + bool found = (std::find(userEnabledPeers.begin(), userEnabledPeers.end(), peerDeviceId) != userEnabledPeers.end()); + if (found) { + userEnabledPeers.remove(peerDeviceId); + return hipSuccess; + } else { + return hipErrorPeerAccessNotEnabled; + } + } + unsigned int getFlags() const { return flags_; } + void setFlags(unsigned int flags) { flags_ = flags; } + void Reset(); + + hip::Stream* NullStream(bool skip_alloc = false); + Stream* GetNullStream(); + + + bool GetActiveStatus() { + amd::ScopedLock lock(lock_); + if (isActive_) return true; + if (Stream::existsActiveStreamForDevice(this)) { + isActive_ = true; + return true; + } + return false; + } + + /// Set the current memory pool on the device + void SetCurrentMemoryPool(MemoryPool* pool = nullptr) { + current_mem_pool_ = (pool == nullptr) ? default_mem_pool_ : pool; + } + + /// Get the current memory pool on the device + MemoryPool* GetCurrentMemoryPool() const { return current_mem_pool_; } + + /// Get the default memory pool on the device + MemoryPool* GetDefaultMemoryPool() const { return default_mem_pool_; } + + /// Get the graph memory pool on the device + MemoryPool* GetGraphMemoryPool() const { return graph_mem_pool_; } + + /// Add memory pool to the device + void AddMemoryPool(MemoryPool* pool); + + /// Remove memory pool from the device + void RemoveMemoryPool(MemoryPool* pool); + + /// Free memory from the device + bool FreeMemory(amd::Memory* memory, Stream* stream); + + /// Release freed memory from all pools on the current device + void ReleaseFreedMemory(Stream* stream); + + /// Removes a destroyed stream from the safe list of memory pools + void RemoveStreamFromPools(Stream* stream); + + }; + + /// Thread Local Storage Variables Aggregator Class + class TlsAggregator { + public: + Device* device_; + std::stack ctxt_stack_; + hipError_t last_error_; + std::vector capture_streams_; + hipStreamCaptureMode stream_capture_mode_; + std::stack exec_stack_; + stream_per_thread stream_per_thread_obj_; + + TlsAggregator(): device_(nullptr), + last_error_(hipSuccess), + stream_capture_mode_(hipStreamCaptureModeGlobal) { + } + ~TlsAggregator() { + } + }; + extern thread_local TlsAggregator tls; + + /// Device representing the host - for pinned memory + extern amd::Context* host_context; + + extern bool init(); + + extern Device* getCurrentDevice(); + + extern void setCurrentDevice(unsigned int index); + + /// Get ROCclr queue associated with hipStream + /// Note: This follows the CUDA spec to sync with default streams + /// and Blocking streams + extern hip::Stream* getStream(hipStream_t stream); + /// Get default stream associated with the ROCclr context + extern hip::Stream* getNullStream(amd::Context&); + /// Get default stream of the thread + extern hip::Stream* getNullStream(); + /// Get device ID associated with the ROCclr context + int getDeviceID(amd::Context& ctx); + /// Check if stream is valid + extern bool isValid(hipStream_t& stream); + extern bool isValid(hipEvent_t event); + extern amd::Monitor hipArraySetLock; + extern std::unordered_set hipArraySet; +}; // namespace hip + +extern void WaitThenDecrementSignal(hipStream_t stream, hipError_t status, void* user_data); + +/// Wait all active streams on the blocking queue. The method enqueues a wait command and +/// doesn't stall the current thread +extern void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream = false); + +extern std::vector g_devices; +extern hipError_t ihipDeviceGetCount(int* count); +extern int ihipGetDevice(); + +extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags); +extern amd::Memory* getMemoryObject(const void* ptr, size_t& offset, size_t size = 0); +extern amd::Memory* getMemoryObjectWithOffset(const void* ptr, const size_t size = 0); +extern void getStreamPerThread(hipStream_t& stream); +extern hipStream_t getPerThreadDefaultStream(); +extern hipError_t ihipUnbindTexture(textureReference* texRef); +extern hipError_t ihipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); +extern hipError_t ihipHostUnregister(void* hostPtr); +extern hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device); + +extern hipError_t ihipDeviceGet(hipDevice_t* device, int deviceId); +extern hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void* ptr, + uint64_t value, uint64_t mask, unsigned int flags, size_t sizeBytes); +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hip::Stream& stream, bool isHostAsync = false, bool isGPUAsync = true); +constexpr bool kOptionChangeable = true; +constexpr bool kNewDevProg = false; + +constexpr bool kMarkerDisableFlush = true; //!< Avoids command batch flush in ROCclr + +extern std::vector g_captureStreams; +extern amd::Monitor g_captureStreamsLock; +extern std::unordered_set g_allCapturingStreams; +#endif // HIP_SRC_HIP_INTERNAL_H diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp new file mode 100644 index 0000000000..8b92754ae3 --- /dev/null +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -0,0 +1,4084 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_platform.hpp" +#include "hip_conversions.hpp" +#include "platform/context.hpp" +#include "platform/command.hpp" +#include "platform/memory.hpp" +#include "amdocl/cl_vk_amd.hpp" + +amd::Monitor hip::hipArraySetLock{"Guards global hipArray set"}; +std::unordered_set hip::hipArraySet; + +// ================================================================================================ +amd::Memory* getMemoryObject(const void* ptr, size_t& offset, size_t size) { + auto memObj = amd::MemObjMap::FindMemObj(ptr, &offset); + if (memObj == nullptr) { + // If memObj not found, use arena_mem_obj. arena_mem_obj is null, if HMM and Xnack is disabled. + memObj = (hip::getCurrentDevice()->asContext()->svmDevices()[0])->GetArenaMemObj( + ptr, offset, size); + } + return memObj; +} + +// ================================================================================================ +amd::Memory* getMemoryObjectWithOffset(const void* ptr, const size_t size) { + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + + if (memObj != nullptr) { + if (size > (memObj->getSize() - offset)) { + return nullptr; + } + memObj = new (memObj->getContext()) amd::Buffer(*memObj, memObj->getMemFlags(), offset, size); + if (memObj == nullptr) {; + return nullptr; + } + + if (!memObj->create(nullptr)) { + memObj->release(); + return nullptr; + } + } + + return memObj; +} + +// ================================================================================================ +hipError_t ihipFree(void *ptr) { + if (ptr == nullptr) { + return hipSuccess; + } + + size_t offset = 0; + amd::Memory* memory_object = getMemoryObject(ptr, offset); + if (memory_object != nullptr) { + // Wait on the device, associated with the current memory object during allocation + auto device_id = memory_object->getUserData().deviceId; + auto dev = g_devices[device_id]; + // Skip stream allocation, since if it wasn't allocated until free, then the device wasn't used + constexpr bool SkipStreamAlloc = true; + hip::Stream* stream = dev->NullStream(SkipStreamAlloc); + if (stream != nullptr) { + stream->finish(); + } + hip::Stream::syncNonBlockingStreams(device_id); + // Find out if memory belongs to any memory pool + if (!g_devices[device_id]->FreeMemory(memory_object, nullptr)) { + // External mem is not svm. + if (memory_object->isInterop()) { + amd::MemObjMap::RemoveMemObj(ptr); + memory_object->release(); + } else { + amd::SvmBuffer::free(memory_object->getContext(), ptr); + } + } + return hipSuccess; + } + return hipErrorInvalidValue; +} + +// ================================================================================================ +hipError_t hipImportExternalMemory( + hipExternalMemory_t* extMem_out, + const hipExternalMemoryHandleDesc* memHandleDesc) { + HIP_INIT_API(hipImportExternalMemory, extMem_out, memHandleDesc); + if (extMem_out == nullptr || memHandleDesc == nullptr || + (memHandleDesc->flags != 0 && memHandleDesc->flags != hipExternalMemoryDedicated) || + memHandleDesc->size == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t sizeBytes = memHandleDesc->size; + amd::Context& amdContext = *hip::getCurrentDevice()->asContext(); + + amd::BufferVk* pBufferVk = nullptr; +#ifdef _WIN32 + pBufferVk = new (amdContext) amd::BufferVk(amdContext, sizeBytes, memHandleDesc->handle.win32.handle); +#else + pBufferVk = new (amdContext) amd::BufferVk(amdContext, sizeBytes, memHandleDesc->handle.fd); +#endif + + if (!pBufferVk) { + HIP_RETURN(hipErrorOutOfMemory); + } + + if (!pBufferVk->create()) { + pBufferVk->release(); + HIP_RETURN(hipErrorOutOfMemory); + } + *extMem_out = pBufferVk; + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipExternalMemoryGetMappedBuffer( + void **devPtr, + hipExternalMemory_t extMem, + const hipExternalMemoryBufferDesc *bufferDesc) { + HIP_INIT_API(hipExternalMemoryGetMappedBuffer, devPtr, extMem, bufferDesc); + + if (devPtr == nullptr || extMem == nullptr || bufferDesc == nullptr || bufferDesc->flags != 0) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::BufferVk *buf = reinterpret_cast(extMem); + const device::Memory* devMem = buf->getDeviceMemory(*hip::getCurrentDevice()->devices()[0]); + + if (devMem == nullptr || ((bufferDesc->offset + bufferDesc->size) > devMem->size())) { + HIP_RETURN(hipErrorInvalidValue); + } + *devPtr = reinterpret_cast(devMem->virtualAddress() + bufferDesc->offset); + amd::MemObjMap::AddMemObj(*devPtr, buf); + buf->retain(); + HIP_RETURN(hipSuccess); +} + +hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem) { + HIP_INIT_API(hipDestroyExternalMemory, extMem); + + if (extMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + reinterpret_cast(extMem)->release(); + + HIP_RETURN(hipSuccess); +} + + +hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extSem_out, + const hipExternalSemaphoreHandleDesc* semHandleDesc) +{ + HIP_INIT_API(hipImportExternalSemaphore, extSem_out, semHandleDesc); + if (extSem_out == nullptr || semHandleDesc == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + +#ifdef _WIN32 + if (device->importExtSemaphore(extSem_out, semHandleDesc->handle.win32.handle)) { +#else + if (device->importExtSemaphore( + extSem_out, semHandleDesc->handle.fd)) { +#endif + HIP_RETURN(hipSuccess); + } + HIP_RETURN(hipErrorNotSupported); +} + + +hipError_t hipSignalExternalSemaphoresAsync( + const hipExternalSemaphore_t* extSemArray, const hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream ) +{ + HIP_INIT_API(hipSignalExternalSemaphoresAsync, extSemArray, paramsArray, numExtSems, stream); + if (extSemArray == nullptr || paramsArray == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + for (unsigned int i = 0; i < numExtSems; i++) { + if (extSemArray[i] != nullptr) { + amd::ExternalSemaphoreCmd* command = + new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value, + amd::ExternalSemaphoreCmd::COMMAND_SIGNAL_EXTSEMAPHORE); + if (command == nullptr) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + } else { + HIP_RETURN(hipErrorInvalidValue); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) +{ + HIP_INIT_API(hipWaitExternalSemaphoresAsync, extSemArray, paramsArray, numExtSems, + stream); + if (extSemArray == nullptr || paramsArray == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + for (unsigned int i = 0; i < numExtSems; i++) { + if (extSemArray[i] != nullptr) { + amd::ExternalSemaphoreCmd* command = + new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value, + amd::ExternalSemaphoreCmd::COMMAND_WAIT_EXTSEMAPHORE); + if (command == nullptr) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + } else { + HIP_RETURN(hipErrorInvalidValue); + } + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extSem) +{ + HIP_INIT_API(hipDestroyExternalSemaphore, extSem); + if (extSem == nullptr ) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + device->DestroyExtSemaphore(extSem); + HIP_RETURN(hipSuccess); +} + + +// ================================================================================================ +hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) +{ + if (ptr == nullptr) { + return hipErrorInvalidValue; + } + if (sizeBytes == 0) { + *ptr = nullptr; + return hipSuccess; + } + + bool useHostDevice = (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0; + amd::Context* curDevContext = hip::getCurrentDevice()->asContext(); + amd::Context* amdContext = useHostDevice ? hip::host_context : curDevContext; + + if (amdContext == nullptr) { + return hipErrorOutOfMemory; + } + + const auto& dev_info = amdContext->devices()[0]->info(); + + if ((!useHostDevice && (dev_info.maxMemAllocSize_ < sizeBytes)) || + (useHostDevice && (dev_info.maxPhysicalMemAllocSize_ < sizeBytes))) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, dev_info.memBaseAddrAlign_, + useHostDevice ? curDevContext->svmDevices()[0] : nullptr); + + if (*ptr == nullptr) { + if (!useHostDevice) { + size_t free = 0, total =0; + hipError_t err = hipMemGetInfo(&free, &total); + if (err == hipSuccess) { + LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu | total :%zu \n", sizeBytes, free, total); + } + } else { + LogPrintfError("Allocation failed : Pinned Memory, size :%zu \n", sizeBytes); + } + return hipErrorOutOfMemory; + } + size_t offset = 0; //this is ignored + amd::Memory* memObj = getMemoryObject(*ptr, offset); + //saves the current device id so that it can be accessed later + memObj->getUserData().deviceId = hip::getCurrentDevice()->deviceId(); + return hipSuccess; +} +bool IsHtoHMemcpyValid(void* dst, const void* src, hipMemcpyKind kind) { + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + if (src && dst && srcMemory == nullptr && dstMemory == nullptr) { + if (kind != hipMemcpyHostToHost && kind != hipMemcpyDefault) { + return false; + } + } + return true; +} +hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind) { + if (dst == nullptr || src == nullptr) { + return hipErrorInvalidValue; + } + + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + // Return error if sizeBytes passed to memcpy is more than the actual size allocated + if ((dstMemory && sizeBytes > (dstMemory->getSize() - dOffset)) || + (srcMemory && sizeBytes > (srcMemory->getSize() - sOffset))) { + return hipErrorInvalidValue; + } + //If src and dst ptr are null then kind must be either h2h or def. + if (!IsHtoHMemcpyValid(dst, src, kind)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hip::Stream& stream, bool isAsync) { + amd::Command::EventWaitList waitList; + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + amd::Device* queueDevice = &stream.device(); + amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); + if ((srcMemory == nullptr) && (dstMemory != nullptr)) { + hip::Stream* pStream = &stream; + if (queueDevice != dstMemory->getContext().devices()[0]) { + pStream = hip::getNullStream(dstMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } + } + command = new amd::WriteMemoryCommand(*pStream, CL_COMMAND_WRITE_BUFFER, waitList, + *dstMemory->asBuffer(), dOffset, sizeBytes, src, 0, 0, copyMetadata); + } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { + hip::Stream* pStream = &stream; + if (queueDevice != srcMemory->getContext().devices()[0]) { + pStream = hip::getNullStream(srcMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } + } + command = new amd::ReadMemoryCommand(*pStream, CL_COMMAND_READ_BUFFER, waitList, + *srcMemory->asBuffer(), sOffset, sizeBytes, dst, 0, 0, copyMetadata); + } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { + // Check if the queue device doesn't match the device on any memory object. + // And any of them are not host allocation. + // Hence it's a P2P transfer, because the app has requested access to another GPU + if ((srcMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) && + ((srcMemory->getContext().devices().size() == 1) && + (dstMemory->getContext().devices().size() == 1))) { + command = new amd::CopyMemoryP2PCommand(stream, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); + if (command == nullptr) { + return hipErrorOutOfMemory; + } + // Make sure runtime has valid memory for the command execution. P2P access + // requires page table mapping on the current device to another GPU memory + if (!static_cast(command)->validateMemory()) { + delete command; + return hipErrorInvalidValue; + } + } else { + hip::Stream* pStream = &stream; + if ((srcMemory->getContext().devices()[0] == dstMemory->getContext().devices()[0]) && + (queueDevice != srcMemory->getContext().devices()[0])) { + copyMetadata.copyEnginePreference_ = amd::CopyMetadata::CopyEnginePreference::NONE; + pStream = hip::getNullStream(srcMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } + } else if (srcMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) { + // Scenarios such as DtoH where dst is pinned memory + if ((queueDevice != srcMemory->getContext().devices()[0]) && + (dstMemory->getContext().devices().size() != 1)) { + pStream = hip::getNullStream(srcMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } + // Scenarios such as HtoD where src is pinned memory + } else if ((queueDevice != dstMemory->getContext().devices()[0]) && + (srcMemory->getContext().devices().size() != 1)) { + pStream = hip::getNullStream(dstMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } + } + } + command = new amd::CopyMemoryCommand(*pStream, CL_COMMAND_COPY_BUFFER, waitList, + *srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes, + copyMetadata); + } + } + if (command == nullptr) { + return hipErrorOutOfMemory; + } + if (waitList.size() > 0) { + waitList[0]->release(); + } + return hipSuccess; +} +bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind) { + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + if (srcMemory == nullptr && dstMemory == nullptr) { + if (kind == hipMemcpyHostToHost || kind == hipMemcpyDefault) { + return true; + } + } + return false; +} +void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream) { + stream.finish(); + memcpy(dst, src, sizeBytes); +} +// ================================================================================================ +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hip::Stream& stream, bool isHostAsync, bool isGPUAsync) { + hipError_t status; + if (sizeBytes == 0) { + // Skip if nothing needs writing. + return hipSuccess; + } + status = ihipMemcpy_validate(dst, src, sizeBytes, kind); + if (status != hipSuccess) { + return status; + } + if (src == dst && kind == hipMemcpyDefault) { + return hipSuccess; + } + size_t sOffset = 0; + amd::Memory* srcMemory = getMemoryObject(src, sOffset); + size_t dOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dst, dOffset); + if (srcMemory == nullptr && dstMemory == nullptr) { + ihipHtoHMemcpy(dst, src, sizeBytes, stream); + return hipSuccess; + } else if (((srcMemory == nullptr) && (dstMemory != nullptr)) || + ((srcMemory != nullptr) && (dstMemory == nullptr))) { + isHostAsync = false; + } else { + hipMemoryType srcMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + srcMemory->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + hipMemoryType dstMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + dstMemory->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + // Device to Device copies do not need to host side synchronization. + if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { + isHostAsync = true; + } + } + + amd::Command* command = nullptr; + status = ihipMemcpyCommand(command, dst, src, sizeBytes, kind, stream, isHostAsync); + if (status != hipSuccess) { + return status; + } + command->enqueue(); + if (!isHostAsync) { + command->awaitCompletion(); + } else if (!isGPUAsync) { + hip::Stream* pStream = hip::getNullStream(dstMemory->getContext()); + amd::Command::EventWaitList waitList; + waitList.push_back(command); + amd::Command* depdentMarker = new amd::Marker(*pStream, false, waitList); + if (depdentMarker != nullptr) { + depdentMarker->enqueue(); + depdentMarker->release(); + } + } else { + amd::HostQueue* newQueue = command->queue(); + if (newQueue != &stream) { + amd::Command::EventWaitList waitList; + amd::Command* cmd = newQueue->getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + amd::Command* depdentMarker = new amd::Marker(stream, true, waitList); + if (depdentMarker != nullptr) { + depdentMarker->enqueue(); + depdentMarker->release(); + } + cmd->release(); + } + } + } + command->release(); + return hipSuccess; +} + +// ================================================================================================ +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); + + unsigned int ihipFlags = 0; + if (flags == hipDeviceMallocDefault) { + ihipFlags = 0; + } else if (flags == hipDeviceMallocFinegrained) { + ihipFlags = CL_MEM_SVM_ATOMICS; + } else if (flags == hipDeviceMallocUncached) { + ihipFlags = CL_MEM_SVM_ATOMICS | ROCCLR_MEM_HSA_UNCACHED; + } else if (flags == hipMallocSignalMemory) { + ihipFlags = CL_MEM_SVM_ATOMICS | CL_MEM_SVM_FINE_GRAIN_BUFFER | ROCCLR_MEM_HSA_SIGNAL_MEMORY; + if (sizeBytes != 8) { + HIP_RETURN(hipErrorInvalidValue); + } + } else { + HIP_RETURN(hipErrorInvalidValue); + } + + hipError_t status = ihipMalloc(ptr, sizeBytes, ihipFlags); + + if ((status == hipSuccess) && ((*ptr) != nullptr)) { + size_t offset = 0; // This is ignored + amd::Memory* svmMem = getMemoryObject(*ptr, offset); + // Save the HIP memory flags so that they can be accessed later + svmMem->getUserData().flags = flags; + } + HIP_RETURN(status, (ptr != nullptr)? *ptr : nullptr); +} + +hipError_t hipMalloc(void** ptr, size_t sizeBytes) { + HIP_INIT_API(hipMalloc, ptr, sizeBytes); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN_DURATION(ihipMalloc(ptr, sizeBytes, 0), (ptr != nullptr)? *ptr : nullptr); +} + +hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostMalloc, ptr, sizeBytes, flags); + CHECK_STREAM_CAPTURE_SUPPORTED(); + if (ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *ptr = nullptr; + + const unsigned int coherentFlags = hipHostMallocCoherent | hipHostMallocNonCoherent; + + // can't have both Coherent and NonCoherent flags set at the same time + if ((flags & coherentFlags) == coherentFlags) { + LogPrintfError( + "Cannot have both coherent and non-coherent flags " + "at the same time, flags: %u coherent flags: %u \n", + flags, coherentFlags); + HIP_RETURN(hipErrorInvalidValue); + } + + unsigned int ihipFlags = CL_MEM_SVM_FINE_GRAIN_BUFFER; + if (flags == 0 || + flags & (hipHostMallocCoherent | hipHostMallocMapped | hipHostMallocNumaUser) || + (!(flags & hipHostMallocNonCoherent) && HIP_HOST_COHERENT)) { + ihipFlags |= CL_MEM_SVM_ATOMICS; + } + + if (flags & hipHostMallocNumaUser) { + ihipFlags |= CL_MEM_FOLLOW_USER_NUMA_POLICY; + } + + if (flags & hipHostMallocNonCoherent) { + ihipFlags &= ~CL_MEM_SVM_ATOMICS; + } + + hipError_t status = ihipMalloc(ptr, sizeBytes, ihipFlags); + + if ((status == hipSuccess) && ((*ptr) != nullptr)) { + size_t offset = 0; // This is ignored + amd::Memory* svmMem = getMemoryObject(*ptr, offset); + // Save the HIP memory flags so that they can be accessed later + svmMem->getUserData().flags = flags; + } + + HIP_RETURN_DURATION(status, *ptr); +} + +hipError_t hipFree(void* ptr) { + HIP_INIT_API(hipFree, ptr); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipFree(ptr)); +} + +hipError_t hipMemcpy_common(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream = nullptr) { + CHECK_STREAM_CAPTURING(); + hip::Stream* hip_stream = nullptr; + + if (stream != nullptr) { + hip_stream = hip::getStream(stream); + } else { + hip_stream = hip::getNullStream(); + } + + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream); +} + +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); + HIP_RETURN_DURATION(hipMemcpy_common(dst, src, sizeBytes, kind)); +} + +hipError_t hipMemcpy_spt(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); + HIP_RETURN_DURATION(hipMemcpy_common(dst, src, sizeBytes, kind, getPerThreadDefaultStream())); +} + +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyWithStream, dst, src, sizeBytes, kind, stream); + STREAM_CAPTURE(hipMemcpyAsync, stream, dst, src, sizeBytes, kind); + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorContextIsDestroyed); + } + + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, false)); +} + +hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { + HIP_INIT_API(hipMemPtrGetInfo, ptr, size); + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(ptr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *size = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipHostFree(void* ptr) { + HIP_INIT_API(hipHostFree, ptr); + CHECK_STREAM_CAPTURE_SUPPORTED(); + size_t offset = 0; + amd::Memory* memory_object = getMemoryObject(ptr, offset); + if (memory_object != nullptr) { + if (memory_object->getSvmPtr() == nullptr) { + return hipErrorInvalidValue; + } + } + HIP_RETURN(ihipFree(ptr)); +} + +hipError_t ihipArrayDestroy(hipArray* array) { + if (array == nullptr) { + return hipErrorInvalidValue; + } + { + amd::ScopedLock lock(hip::hipArraySetLock); + if (hip::hipArraySet.find(array) == hip::hipArraySet.end()) { + return hipErrorContextIsDestroyed; + } else { + hip::hipArraySet.erase(array); + } + } + cl_mem memObj = reinterpret_cast(array->data); + if (is_valid(memObj) == false) { + return hipErrorInvalidValue; + } + + for (auto& dev : g_devices) { + hip::Stream* stream = dev->NullStream(true); + if (stream != nullptr) { + stream->finish(); + } + } + + as_amd(memObj)->release(); + delete array; + return hipSuccess; +} + +hipError_t hipFreeArray(hipArray* array) { + HIP_INIT_API(hipFreeArray, array); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipArrayDestroy(array)); +} + +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr) { + HIP_INIT_API(hipMemGetAddressRange, pbase, psize, dptr); + + // Since we are using SVM buffer DevicePtr and HostPtr is the same + void* ptr = dptr; + amd::Memory* svmMem = getMemoryObjectWithOffset(ptr); + if (svmMem == nullptr) { + HIP_RETURN(hipErrorNotFound); + } + + *pbase = svmMem->getSvmPtr(); + *psize = svmMem->getSize(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemGetInfo(size_t* free, size_t* total) { + HIP_INIT_API(hipMemGetInfo, free, total); + + if (free == nullptr && total == nullptr) { + HIP_RETURN(hipSuccess); + } + + size_t freeMemory[2]; + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + if (device == nullptr) { + HIP_RETURN(hipErrorInvalidDevice); + } + + if (!device->globalFreeMemory(freeMemory)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (free != nullptr) { + *free = freeMemory[0] * Ki; + } + + if (total != nullptr) { + *total = device->info().globalMemSize_; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) { + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + + if ((ptr == nullptr) || (pitch == nullptr)) { + return hipErrorInvalidValue; + } + + if ((width == 0) || (height == 0) || (depth == 0)) { + *ptr = nullptr; + return hipSuccess; + } + + if (device && !device->info().imageSupport_) { + LogPrintfError("Image is not supported on device %p \n", device); + return hipErrorInvalidValue; + } + + //avoid size_t overflow for pitch calculation + if (width > (std::numeric_limits::max() - device->info().imagePitchAlignment_)) { + return hipErrorInvalidValue; + } + + *pitch = amd::alignUp(width, device->info().imagePitchAlignment_); + + size_t sizeBytes = *pitch * height * depth; + + if (device->info().maxMemAllocSize_ < sizeBytes) { + return hipErrorOutOfMemory; + } + + *ptr = amd::SvmBuffer::malloc(*hip::getCurrentDevice()->asContext(), 0, sizeBytes, + device->info().memBaseAddrAlign_); + + if (*ptr == nullptr) { + return hipErrorOutOfMemory; + } + size_t offset = 0; //this is ignored + amd::Memory* memObj = getMemoryObject(*ptr, offset); + memObj->getUserData().pitch_ = *pitch; + memObj->getUserData().width_ = width; + memObj->getUserData().height_ = height; + memObj->getUserData().depth_ = depth; + //saves the current device id so that it can be accessed later + memObj->getUserData().deviceId = hip::getCurrentDevice()->deviceId(); + + return hipSuccess; +} + + +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { + HIP_INIT_API(hipMallocPitch, ptr, pitch, width, height); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipMallocPitch(ptr, pitch, width, height, 1), (ptr != nullptr)? *ptr : nullptr); +} + +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { + HIP_INIT_API(hipMalloc3D, pitchedDevPtr, extent); + CHECK_STREAM_CAPTURE_SUPPORTED(); + size_t pitch = 0; + + if (pitchedDevPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipError_t status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, + extent.depth); + + if (status == hipSuccess) { + pitchedDevPtr->pitch = pitch; + pitchedDevPtr->xsize = extent.width; + pitchedDevPtr->ysize = extent.height; + } + + HIP_RETURN(status, *pitchedDevPtr); +} + +amd::Image* ihipImageCreate(const cl_channel_order channelOrder, + const cl_channel_type channelType, + const cl_mem_object_type imageType, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t imageArraySize, + const size_t imageRowPitch, + const size_t imageSlicePitch, + const uint32_t numMipLevels, + amd::Memory* buffer, + hipError_t& status) { + status = hipSuccess; + const amd::Image::Format imageFormat({channelOrder, channelType}); + if (!imageFormat.isValid()) { + LogPrintfError("Invalid Image format for channel Order:%u Type:%u \n", channelOrder, + channelType); + status = hipErrorInvalidValue; + return nullptr; + } + + amd::Context& context = *hip::getCurrentDevice()->asContext(); + if (!imageFormat.isSupported(context, imageType)) { + LogPrintfError("Image type: %u not supported \n", imageType); + status = hipErrorInvalidValue; + return nullptr; + } + + const std::vector& devices = context.devices(); + if (!devices[0]->info().imageSupport_) { + LogPrintfError("Device: 0x%x does not support image \n", devices[0]); + status = hipErrorInvalidValue; + return nullptr; + } + + if (!amd::Image::validateDimensions(devices, + imageType, + imageWidth, + imageHeight, + imageDepth, + imageArraySize)) { + DevLogError("Image does not have valid dimensions \n"); + status = hipErrorInvalidValue; + return nullptr; + } + + if (numMipLevels > 0) { + size_t max_dim = std::max(std::max(imageWidth, imageHeight), imageDepth); + size_t mip_levels = 0; + for (mip_levels = 0; max_dim > 0; max_dim >>=1, mip_levels++); + // empty for loop + + if (mip_levels < numMipLevels) { + LogPrintfError("Invalid Mip Levels: %d", numMipLevels); + status = hipErrorInvalidValue; + return nullptr; + } + } + + // TODO validate the image descriptor. + + amd::Image* image = nullptr; + if (buffer != nullptr) { + switch (imageType) { + case CL_MEM_OBJECT_IMAGE1D_BUFFER: + case CL_MEM_OBJECT_IMAGE2D: + image = new (context) amd::Image(*buffer->asBuffer(), + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + (imageHeight == 0) ? 1 : imageHeight, + (imageDepth == 0) ? 1 : imageDepth, + imageRowPitch, + imageSlicePitch); + break; + default: + LogPrintfError("Cannot create image of imageType: 0x%x \n", imageType); + } + } else { + switch (imageType) { + case CL_MEM_OBJECT_IMAGE1D: + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE3D: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + (imageHeight == 0) ? 1 : imageHeight, + (imageDepth == 0) ? 1 : imageDepth, + imageWidth * imageFormat.getElementSize(), /* row pitch */ + imageWidth * imageHeight * imageFormat.getElementSize(), /* slice pitch */ + numMipLevels); + break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + imageArraySize, + 1, /* image depth */ + imageWidth * imageFormat.getElementSize(), + imageWidth * imageHeight * imageFormat.getElementSize(), + numMipLevels); + break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + image = new (context) amd::Image(context, + imageType, + CL_MEM_READ_WRITE, + imageFormat, + imageWidth, + imageHeight, + imageArraySize, + imageWidth * imageFormat.getElementSize(), + imageWidth * imageHeight * imageFormat.getElementSize(), + numMipLevels); + break; + default: + LogPrintfError("Cannot create image of imageType: 0x%x \n", imageType); + } + } + + if (image == nullptr) { + status = hipErrorOutOfMemory; + return nullptr; + } + + if (!image->create(nullptr)) { + LogPrintfError("Cannot create image: 0x%x \n", image); + status = hipErrorOutOfMemory; + delete image; + return nullptr; + } + + return image; +} + +hipError_t ihipArrayCreate(hipArray** array, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray, + unsigned int numMipmapLevels) { + if (array == nullptr) { + return hipErrorInvalidValue; + } + + // NumChannels specifies the number of packed components per HIP array element; it may be 1, 2, or 4; + if ((pAllocateArray->NumChannels != 1) && + (pAllocateArray->NumChannels != 2) && + (pAllocateArray->NumChannels != 4)) { + return hipErrorInvalidValue; + } + + if (pAllocateArray->Flags & hipArrayCubemap) { + return hipErrorInvalidValue; + } + + if ((pAllocateArray->Flags & hipArraySurfaceLoadStore) || + (pAllocateArray->Flags & hipArrayTextureGather)) { + return hipErrorNotSupported; + } + + const cl_channel_order channelOrder = hip::getCLChannelOrder(pAllocateArray->NumChannels, 0); + const cl_channel_type channelType = hip::getCLChannelType(pAllocateArray->Format, hipReadModeElementType); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pAllocateArray->Width, + pAllocateArray->Height, + pAllocateArray->Depth, + pAllocateArray->Flags); + hipError_t status = hipSuccess; + amd::Image* image = ihipImageCreate(channelOrder, + channelType, + imageType, + pAllocateArray->Width, + pAllocateArray->Height, + pAllocateArray->Depth, + // The number of layers is determined by the depth extent. + pAllocateArray->Depth, /* array size */ + 0, /* row pitch */ + 0, /* slice pitch */ + numMipmapLevels, + nullptr, /* buffer */ + status); + + if (image == nullptr) { + return status; + } + + cl_mem memObj = as_cl(image); + *array = new hipArray{reinterpret_cast(memObj)}; + + // It is UB to call hipGet*() on an array created via hipArrayCreate()/hipArray3DCreate(). + // This is due to hip not differentiating between runtime and driver types. + // TODO change the hipArray struct in driver_types.h. + (*array)->desc = hip::getChannelFormatDesc(pAllocateArray->NumChannels, pAllocateArray->Format); + (*array)->width = pAllocateArray->Width; + (*array)->height = pAllocateArray->Height; + (*array)->depth = pAllocateArray->Depth; + (*array)->Format = pAllocateArray->Format; + (*array)->NumChannels = pAllocateArray->NumChannels; + (*array)->flags = pAllocateArray->Flags; + { + amd::ScopedLock lock(hip::hipArraySetLock); + hip::hipArraySet.insert(*array); + } + return hipSuccess; +} + +hipError_t hipArrayCreate(hipArray** array, + const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArrayCreate, array, pAllocateArray); + if (pAllocateArray == nullptr) { + return hipErrorInvalidValue; + } + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_ARRAY3D_DESCRIPTOR desc = {pAllocateArray->Width, + pAllocateArray->Height, + 0, /* Depth */ + pAllocateArray->Format, + pAllocateArray->NumChannels, + hipArrayDefault /* Flags */}; + + HIP_RETURN(ihipArrayCreate(array, &desc, 0)); +} + + +hipError_t hipMallocArray(hipArray** array, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + unsigned int flags) { + HIP_INIT_API(hipMallocArray, array, desc, width, height, flags); + if (array == nullptr || desc == nullptr) { + return hipErrorInvalidValue; + } + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_ARRAY3D_DESCRIPTOR allocateArray = {width, + height, + 0, /* Depth */ + hip::getArrayFormat(*desc), + hip::getNumChannels(*desc), + flags}; + if(!hip::CheckArrayFormat(*desc)) { + return hipErrorInvalidValue; + } + HIP_RETURN(ihipArrayCreate(array, &allocateArray, 0 /* numMipLevels */)); +} + +hipError_t hipArray3DCreate(hipArray** array, + const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) { + HIP_INIT_API(hipArray3DCreate, array, pAllocateArray); + CHECK_STREAM_CAPTURE_SUPPORTED(); + if (pAllocateArray == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(ihipArrayCreate(array, pAllocateArray, 0 /* numMipLevels */)); +} + +hipError_t hipMalloc3DArray(hipArray_t* array, + const hipChannelFormatDesc* desc, + hipExtent extent, + unsigned int flags) { + HIP_INIT_API(hipMalloc3DArray, array, desc, extent, flags); + if (array == nullptr || desc == nullptr) { + return hipErrorInvalidValue; + } + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_ARRAY3D_DESCRIPTOR allocateArray = {extent.width, + extent.height, + extent.depth, + hip::getArrayFormat(*desc), + hip::getNumChannels(*desc), + flags}; + if(!hip::CheckArrayFormat(*desc)) { + return hipErrorInvalidValue; + } + + HIP_RETURN(ihipArrayCreate(array, &allocateArray, 0)); +} + +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { + HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr); + + if (flagsPtr == nullptr || hostPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t offset = 0; + amd::Memory* svmMem = getMemoryObject(hostPtr, offset); + + if (svmMem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // To match with Nvidia behaviour validate that hostPtr passed was allocated using hipHostMalloc(), and not hipMalloc() + if (!(svmMem->getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Retrieve HIP memory flags + *flagsPtr = svmMem->getUserData().flags; + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { + if (hostPtr == nullptr || sizeBytes == 0 || flags > 15) { + return hipErrorInvalidValue; + } else { + amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, + CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes); + + constexpr bool sysMemAlloc = false; + constexpr bool skipAlloc = false; + constexpr bool forceAlloc = true; + if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { + mem->release(); + LogPrintfError("Cannot create memory for size: %u with flags: %d \n", sizeBytes, flags); + return hipErrorInvalidValue; + } + + amd::MemObjMap::AddMemObj(hostPtr, mem); + for (const auto& device : g_devices) { + // Since the amd::Memory object is shared between all devices + // it's fine to have multiple addresses mapped to it + const device::Memory* devMem = mem->getDeviceMemory(*device->devices()[0]); + void* vAddr = reinterpret_cast(devMem->virtualAddress()); + if ((hostPtr != vAddr) && (amd::MemObjMap::FindMemObj(vAddr) == nullptr)) { + amd::MemObjMap::AddMemObj(vAddr, mem); + } + } + + if (mem != nullptr) { + mem->getUserData().deviceId = hip::getCurrentDevice()->deviceId(); + // Save the HIP memory flags so that they can be accessed later + mem->getUserData().flags = flags; + } + return hipSuccess; + } +} + +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipHostRegister(hostPtr, sizeBytes,flags)); +} + +hipError_t ihipHostUnregister(void* hostPtr) { + if (hostPtr == nullptr) { + return hipErrorInvalidValue; + } + size_t offset = 0; + amd::Memory* mem = getMemoryObject(hostPtr, offset); + + if (mem != nullptr) { + // Wait on the device, associated with the current memory object during allocation + auto device_id = mem->getUserData().deviceId; + + hip::Stream* stream = g_devices[device_id]->NullStream(true); + if (stream != nullptr) { + stream->finish(); + } + + amd::MemObjMap::RemoveMemObj(hostPtr); + for (const auto& device: g_devices) { + const device::Memory* devMem = mem->getDeviceMemory(*device->devices()[0]); + if (devMem != nullptr) { + void* vAddr = reinterpret_cast(devMem->virtualAddress()); + if ((vAddr != hostPtr) && amd::MemObjMap::FindMemObj(vAddr)) { + amd::MemObjMap::RemoveMemObj(vAddr); + } + } + } + mem->release(); + return hipSuccess; + } + + LogPrintfError("Cannot unregister host_ptr: 0x%x \n", hostPtr); + return hipErrorHostMemoryNotRegistered; +} + + +hipError_t hipHostUnregister(void* hostPtr) { + HIP_INIT_API(hipHostUnregister, hostPtr); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipHostUnregister(hostPtr)); +} + +// Deprecated function: +hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { + HIP_INIT_API(hipHostAlloc, ptr, sizeBytes, flags); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipMalloc(ptr, sizeBytes, flags), (ptr != nullptr)? *ptr : nullptr); +}; + +inline hipError_t ihipMemcpySymbol_validate(const void* symbol, size_t sizeBytes, size_t offset, size_t &sym_size, hipDeviceptr_t &device_ptr) { + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, &sym_size)); + + /* Size Check to make sure offset is correct */ + if ((offset + sizeBytes) > sym_size) { + LogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); + HIP_RETURN(hipErrorInvalidValue); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + return hipSuccess; +} + +hipError_t hipMemcpyToSymbol_common(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream=nullptr) { + CHECK_STREAM_CAPTURING(); + + if (kind != hipMemcpyHostToDevice && kind != hipMemcpyDeviceToDevice) { + HIP_RETURN(hipErrorInvalidMemcpyDirection); + } + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + + /* Copy memory from source to destination address */ + return hipMemcpy_common(device_ptr, src, sizeBytes, kind, stream); +} + +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToSymbol, symbol, src, sizeBytes, offset, kind); + HIP_RETURN_DURATION(hipMemcpyToSymbol_common(symbol, src, sizeBytes, offset, kind)); +} + +hipError_t hipMemcpyToSymbol_spt(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToSymbol, symbol, src, sizeBytes, offset, kind); + HIP_RETURN_DURATION(hipMemcpyToSymbol_common(symbol, src, sizeBytes, offset, kind, + getPerThreadDefaultStream())); +} + +hipError_t hipMemcpyFromSymbol_common(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream=nullptr) { + CHECK_STREAM_CAPTURING(); + + if (kind != hipMemcpyDeviceToHost && kind != hipMemcpyDeviceToDevice) { + HIP_RETURN(hipErrorInvalidMemcpyDirection); + } + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + + /* Copy memory from source to destination address */ + return hipMemcpy_common(dst, device_ptr, sizeBytes, kind, stream); +} + +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, sizeBytes, offset, kind); + HIP_RETURN_DURATION(hipMemcpyFromSymbol_common(dst, symbol, sizeBytes, offset, kind)); +} + +hipError_t hipMemcpyFromSymbol_spt(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, sizeBytes, offset, kind); + HIP_RETURN_DURATION(hipMemcpyFromSymbol_common(dst, symbol, sizeBytes, offset, kind, + getPerThreadDefaultStream())); +} + +hipError_t hipMemcpyToSymbolAsync_common(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + STREAM_CAPTURE(hipMemcpyToSymbolAsync, stream, symbol, src, sizeBytes, offset, kind); + + if (kind != hipMemcpyHostToDevice && kind != hipMemcpyDeviceToDevice) { + return hipErrorInvalidMemcpyDirection; + } + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + /* Copy memory from source to destination address */ + return hipMemcpyAsync(device_ptr, src, sizeBytes, kind, stream); +} + +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, sizeBytes, offset, kind, stream); + HIP_RETURN_DURATION(hipMemcpyToSymbolAsync_common(symbol, src, sizeBytes, offset, kind, stream)); +} + +hipError_t hipMemcpyToSymbolAsync_spt(const void* symbol, const void* src, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, sizeBytes, offset, kind, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN_DURATION(hipMemcpyToSymbolAsync_common(symbol, src, sizeBytes, offset, kind, stream)); +} + +hipError_t hipMemcpyFromSymbolAsync_common(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + STREAM_CAPTURE(hipMemcpyFromSymbolAsync, stream, dst, symbol, sizeBytes, offset, kind); + + if (kind != hipMemcpyDeviceToHost && kind != hipMemcpyDeviceToDevice) { + return hipErrorInvalidMemcpyDirection; + } + + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; + + hipError_t status = ihipMemcpySymbol_validate(symbol, sizeBytes, offset, sym_size, device_ptr); + if (status != hipSuccess) { + return status; + } + + /* Copy memory from source to destination address */ + return hipMemcpyAsync(dst, device_ptr, sizeBytes, kind, stream); +} + +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromSymbolAsync, dst, symbol, sizeBytes, offset, kind, stream); + HIP_RETURN_DURATION(hipMemcpyFromSymbolAsync_common(dst, symbol, sizeBytes, offset, kind, stream)); +} + +hipError_t hipMemcpyFromSymbolAsync_spt(void* dst, const void* symbol, size_t sizeBytes, + size_t offset, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromSymbolAsync, dst, symbol, sizeBytes, offset, kind, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN_DURATION(hipMemcpyFromSymbolAsync_common(dst, symbol, sizeBytes, offset, kind, stream)); +} + +hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, + void* srcHost, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyHtoD, dstDevice, srcHost, ByteCount); + CHECK_STREAM_CAPTURING(); + hip::Stream* stream = hip::getStream(nullptr); + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *stream)); +} + +hipError_t hipMemcpyDtoH(void* dstHost, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoH, dstHost, srcDevice, ByteCount); + CHECK_STREAM_CAPTURING(); + hip::Stream* stream = hip::getStream(nullptr); + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN_DURATION(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *stream)); +} + +hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoD, dstDevice, srcDevice, ByteCount); + CHECK_STREAM_CAPTURING(); + hip::Stream* stream = hip::getStream(nullptr); + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *stream)); +} + +hipError_t hipMemcpyAsync_common(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + STREAM_CAPTURE(hipMemcpyAsync, stream, dst, src, sizeBytes, kind); + + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, true); +} + +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream); + HIP_RETURN_DURATION(hipMemcpyAsync_common(dst, src, sizeBytes, kind, stream)); +} + +hipError_t hipMemcpyAsync_spt(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyAsync, dst, src, sizeBytes, kind, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN_DURATION(hipMemcpyAsync_common(dst, src, sizeBytes, kind, stream)); +} + +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, void* srcHost, size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyHtoDAsync, dstDevice, srcHost, ByteCount, stream); + hipMemcpyKind kind = hipMemcpyHostToDevice; + STREAM_CAPTURE(hipMemcpyHtoDAsync, stream, dstDevice, srcHost, ByteCount, kind); + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN_DURATION( + ihipMemcpy(dstDevice, srcHost, ByteCount, kind, *hip_stream, true)); +} + +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoDAsync, dstDevice, srcDevice, ByteCount, stream); + hipMemcpyKind kind = hipMemcpyDeviceToDevice; + STREAM_CAPTURE(hipMemcpyDtoDAsync, stream, dstDevice, srcDevice, ByteCount, kind); + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN_DURATION( + ihipMemcpy(dstDevice, srcDevice, ByteCount, kind, *hip_stream, true)); +} + +hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyDtoHAsync, dstHost, srcDevice, ByteCount, stream); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + STREAM_CAPTURE(hipMemcpyDtoHAsync, stream, dstHost, srcDevice, ByteCount, kind); + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN_DURATION( + ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *hip_stream, true)); +} + +hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t dstRowPitch, size_t dstSlicePitch, + amd::Memory*& dstMemory, amd::Image*& srcImage, + amd::BufferRect& srcRect, amd::BufferRect& dstRect) { + size_t dstOffset = 0; + dstMemory = getMemoryObject(dstDevice, dstOffset); + if (srcArray == nullptr || (dstMemory == nullptr)) { + return hipErrorInvalidValue; + } + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + if (!is_valid(srcMemObj)) { + return hipErrorInvalidValue; + } + + srcImage = as_amd(srcMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = srcImage->getImageFormat().getElementSize(); + static_cast(srcOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), + srcImage->getRowPitch(), srcImage->getSlicePitch())) { + return hipErrorInvalidValue; + } + + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), + dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + const size_t copySizeInBytes = + copyRegion[0] * copyRegion[1] * copyRegion[2] * srcImage->getImageFormat().getElementSize(); + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !dstMemory->validateRegion(dstOrigin, {copySizeInBytes, 0, 0})) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, void* dstDevice, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, + hip::Stream* stream) { + amd::BufferRect srcRect; + amd::BufferRect dstRect; + amd::Memory* dstMemory; + amd::Image* srcImage; + hipError_t status = + ihipMemcpyAtoDValidate(srcArray, dstDevice, srcOrigin, dstOrigin, copyRegion, dstRowPitch, + dstSlicePitch, dstMemory, srcImage, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + + amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE_TO_BUFFER, + amd::Command::EventWaitList{}, *srcImage, *dstMemory, + srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); + + if (cpyMemCmd == nullptr) { + return hipErrorOutOfMemory; + } + + if (!cpyMemCmd->validatePeerMemory()) { + delete cpyMemCmd; + return hipErrorInvalidValue; + } + command = cpyMemCmd; + return hipSuccess; +} + +hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, + amd::Memory*& srcMemory, amd::BufferRect& dstRect, + amd::BufferRect& srcRect) { + size_t srcOffset = 0; + srcMemory = getMemoryObject(srcDevice, srcOffset); + if ((srcMemory == nullptr) || dstArray == nullptr) { + return hipErrorInvalidValue; + } + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (!is_valid(dstMemObj)) { + return hipErrorInvalidValue; + } + + dstImage = as_amd(dstMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = dstImage->getImageFormat().getElementSize(); + static_cast(dstOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), + srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), + dstImage->getRowPitch(), dstImage->getSlicePitch())) { + return hipErrorInvalidValue; + } + + const size_t copySizeInBytes = + copyRegion[0] * copyRegion[1] * copyRegion[2] * dstImage->getImageFormat().getElementSize(); + if (!srcMemory->validateRegion(srcOrigin, {copySizeInBytes, 0, 0}) || + !dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArray* dstArray, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, + hip::Stream* stream) { + amd::Image* dstImage; + amd::Memory* srcMemory; + amd::BufferRect dstRect; + amd::BufferRect srcRect; + hipError_t status = + ihipMemcpyDtoAValidate(srcDevice, dstArray, srcOrigin, dstOrigin, copyRegion, srcRowPitch, + srcSlicePitch, dstImage, srcMemory, dstRect, srcRect); + if (status != hipSuccess) { + return status; + } + amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_BUFFER_TO_IMAGE, + amd::Command::EventWaitList{}, *srcMemory, *dstImage, + srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); + + if (cpyMemCmd == nullptr) { + return hipErrorOutOfMemory; + } + + if (!cpyMemCmd->validatePeerMemory()) { + delete cpyMemCmd; + return hipErrorInvalidValue; + } + command = cpyMemCmd; + return hipSuccess; +} + +hipError_t ihipMemcpyDtoDValidate(void* srcDevice, void* dstDevice, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, + size_t dstSlicePitch, amd::Memory*& srcMemory, + amd::Memory*& dstMemory, amd::BufferRect& srcRect, + amd::BufferRect& dstRect) { + size_t srcOffset = 0; + srcMemory = getMemoryObject(srcDevice, srcOffset); + size_t dstOffset = 0; + dstMemory = getMemoryObject(dstDevice, dstOffset); + + if ((srcMemory == nullptr) || (dstMemory == nullptr)) { + return hipErrorInvalidValue; + } + + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), + srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { + return hipErrorInvalidValue; + } + + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), + dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void* dstDevice, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream) { + amd::Memory* srcMemory; + amd::Memory* dstMemory; + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + hipError_t status = ihipMemcpyDtoDValidate(srcDevice, dstDevice, srcOrigin, dstOrigin, copyRegion, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + srcMemory, dstMemory, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::CopyMemoryCommand* copyCommand = new amd::CopyMemoryCommand( + *stream, CL_COMMAND_COPY_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, *dstMemory, + srcStart, dstStart, copyRegion, srcRect, dstRect); + + if (copyCommand == nullptr) { + return hipErrorOutOfMemory; + } + + if (!copyCommand->validatePeerMemory()) { + delete copyCommand; + return hipErrorInvalidValue; + } + command = copyCommand; + return hipSuccess; +} + +hipError_t ihipMemcpyDtoHValidate(void* srcDevice, void* dstHost, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, + size_t dstSlicePitch, amd::Memory*& srcMemory, + amd::BufferRect& srcRect, amd::BufferRect& dstRect) { + size_t srcOffset = 0; + srcMemory = getMemoryObject(srcDevice, srcOffset); + + if ((srcMemory == nullptr) || (dstHost == nullptr)) { + return hipErrorInvalidValue; + } + + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), + srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + srcRect.start_ += srcOffset; + srcRect.end_ += srcOffset; + + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { + return hipErrorInvalidValue; + } + + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), + dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void* dstHost, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, + bool isAsync = false) { + amd::Memory* srcMemory; + amd::BufferRect srcRect; + amd::BufferRect dstRect; + hipError_t status = ihipMemcpyDtoHValidate(srcDevice, dstHost, srcOrigin, dstOrigin, copyRegion, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + srcMemory, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); + amd::ReadMemoryCommand* readCommand = + new amd::ReadMemoryCommand(*stream, CL_COMMAND_READ_BUFFER_RECT, amd::Command::EventWaitList{}, + *srcMemory, srcStart, copyRegion, dstHost, srcRect, dstRect, + copyMetadata); + + if (readCommand == nullptr) { + return hipErrorOutOfMemory; + } + + if (!readCommand->validatePeerMemory()) { + delete readCommand; + return hipErrorInvalidValue; + } + command = readCommand; + return hipSuccess; +} + +hipError_t ihipMemcpyHtoDValidate(const void* srcHost, void* dstDevice, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, + size_t dstSlicePitch, amd::Memory*& dstMemory, + amd::BufferRect& srcRect, amd::BufferRect& dstRect) { + size_t dstOffset = 0; + dstMemory = getMemoryObject(dstDevice, dstOffset); + + if ((srcHost == nullptr) || (dstMemory == nullptr)) { + return hipErrorInvalidValue; + } + + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), + srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), + dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + dstRect.start_ += dstOffset; + dstRect.end_ += dstOffset; + + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, void* dstDevice, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, + bool isAsync = false) { + amd::Memory* dstMemory; + amd::BufferRect srcRect; + amd::BufferRect dstRect; + + hipError_t status = ihipMemcpyHtoDValidate(srcHost, dstDevice, srcOrigin, dstOrigin, copyRegion, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + dstMemory, srcRect, dstRect); + if (status != hipSuccess) { + return status; + } + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); + amd::WriteMemoryCommand* writeCommand = new amd::WriteMemoryCommand( + *stream, CL_COMMAND_WRITE_BUFFER_RECT, amd::Command::EventWaitList{}, *dstMemory, dstStart, + copyRegion, srcHost, dstRect, srcRect, copyMetadata); + + if (writeCommand == nullptr) { + return hipErrorOutOfMemory; + } + + if (!writeCommand->validatePeerMemory()) { + delete writeCommand; + return hipErrorInvalidValue; + } + command = writeCommand; + return hipSuccess; +} + +hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + hip::Stream* stream) { + if ((srcHost == nullptr) || (dstHost == nullptr)) { + return hipErrorInvalidValue; + } + + amd::BufferRect srcRect; + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), + srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + amd::BufferRect dstRect; + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), + dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + if (stream) { + stream->finish(); + } + + for (size_t slice = 0; slice < copyRegion[2]; slice++) { + for (size_t row = 0; row < copyRegion[1]; row++) { + const void* srcRow = static_cast(srcHost) + srcRect.start_ + + row * srcRect.rowPitch_ + slice * srcRect.slicePitch_; + void* dstRow = static_cast(dstHost) + dstRect.start_ + row * dstRect.rowPitch_ + + slice * dstRect.slicePitch_; + std::memcpy(dstRow, srcRow, copyRegion[0]); + } + } + + return hipSuccess; +} + +hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + amd::Image*& srcImage, amd::Image*& dstImage) { + if (dstArray == nullptr || srcArray == nullptr) { + return hipErrorInvalidValue; + } + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (!is_valid(srcMemObj) || !is_valid(dstMemObj)) { + return hipErrorInvalidValue; + } + + srcImage = as_amd(srcMemObj)->asImage(); + dstImage = as_amd(dstMemObj)->asImage(); + + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + // Note that src and dst should have the same element size. + assert(srcImage->getImageFormat().getElementSize() == + dstImage->getImageFormat().getElementSize()); + const size_t elementSize = srcImage->getImageFormat().getElementSize(); + static_cast(srcOrigin)[0] /= elementSize; + static_cast(dstOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hipArray* dstArray, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, hip::Stream* stream) { + amd::Image* srcImage; + amd::Image* dstImage; + + hipError_t status = ihipMemcpyAtoAValidate(srcArray, dstArray, srcOrigin, dstOrigin, copyRegion, + srcImage, dstImage); + if (status != hipSuccess) { + return status; + } + + amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE, + amd::Command::EventWaitList{}, *srcImage, *dstImage, + srcOrigin, dstOrigin, copyRegion); + + if (cpyMemCmd == nullptr) { + return hipErrorOutOfMemory; + } + + if (!cpyMemCmd->validatePeerMemory()) { + delete cpyMemCmd; + return hipErrorInvalidValue; + } + command = cpyMemCmd; + return hipSuccess; +} + +hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray, + amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, + amd::Coord3D& copyRegion, size_t srcRowPitch, + size_t srcSlicePitch, amd::Image*& dstImage, + amd::BufferRect& srcRect) { + if ((srcHost == nullptr) || dstArray == nullptr) { + return hipErrorInvalidValue; + } + cl_mem dstMemObj = reinterpret_cast(dstArray->data); + if (!is_valid(dstMemObj)) { + return hipErrorInvalidValue; + } + + if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), + srcRowPitch, srcSlicePitch)) { + return hipErrorInvalidValue; + } + + dstImage = as_amd(dstMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = dstImage->getImageFormat().getElementSize(); + static_cast(dstOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!dstImage->validateRegion(dstOrigin, copyRegion)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hipArray* dstArray, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, + hip::Stream* stream, bool isAsync = false) { + amd::Image* dstImage; + amd::BufferRect srcRect; + + hipError_t status = ihipMemcpyHtoAValidate(srcHost, dstArray, srcOrigin, dstOrigin, copyRegion, + srcRowPitch, srcSlicePitch, dstImage, srcRect); + if (status != hipSuccess) { + return status; + } + + amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); + amd::WriteMemoryCommand* writeMemCmd = new amd::WriteMemoryCommand( + *stream, CL_COMMAND_WRITE_IMAGE, amd::Command::EventWaitList{}, *dstImage, dstOrigin, + copyRegion, static_cast(srcHost) + srcRect.start_, srcRowPitch, srcSlicePitch, + copyMetadata); + + if (writeMemCmd == nullptr) { + return hipErrorOutOfMemory; + } + + if (!writeMemCmd->validatePeerMemory()) { + delete writeMemCmd; + return hipErrorInvalidValue; + } + command = writeMemCmd; + return hipSuccess; +} + +hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3D& srcOrigin, + amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, + size_t dstRowPitch, size_t dstSlicePitch, amd::Image*& srcImage, + amd::BufferRect& dstRect) { + if (srcArray == nullptr || (dstHost == nullptr)) { + return hipErrorInvalidValue; + } + cl_mem srcMemObj = reinterpret_cast(srcArray->data); + if (!is_valid(srcMemObj)) { + return hipErrorInvalidValue; + } + + if (!dstRect.create(static_cast(dstOrigin), static_cast(copyRegion), + dstRowPitch, dstSlicePitch)) { + return hipErrorInvalidValue; + } + + srcImage = as_amd(srcMemObj)->asImage(); + // HIP assumes the width is in bytes, but OCL assumes it's in pixels. + const size_t elementSize = srcImage->getImageFormat().getElementSize(); + static_cast(srcOrigin)[0] /= elementSize; + static_cast(copyRegion)[0] /= elementSize; + + if (!srcImage->validateRegion(srcOrigin, copyRegion) || + !srcImage->isRowSliceValid(dstRowPitch, dstSlicePitch, copyRegion[0], copyRegion[1])) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, void* dstHost, + amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, + amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, + hip::Stream* stream, bool isAsync = false) { + amd::Image* srcImage; + amd::BufferRect dstRect; + amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); + + hipError_t status = ihipMemcpyAtoHValidate(srcArray, dstHost, srcOrigin, dstOrigin, copyRegion, + dstRowPitch, dstSlicePitch, srcImage, dstRect); + if (status != hipSuccess) { + return status; + } + + amd::ReadMemoryCommand* readMemCmd = new amd::ReadMemoryCommand( + *stream, CL_COMMAND_READ_IMAGE, amd::Command::EventWaitList{}, *srcImage, srcOrigin, + copyRegion, static_cast(dstHost) + dstRect.start_, dstRowPitch, dstSlicePitch, + copyMetadata); + + if (readMemCmd == nullptr) { + return hipErrorOutOfMemory; + } + + if (!readMemCmd->validatePeerMemory()) { + delete readMemCmd; + return hipErrorInvalidValue; + } + command = readMemCmd; + return hipSuccess; +} + +hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3D* pCopy, + hip::Stream* stream) { + size_t offset = 0; + // If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the + // (unified virtual address space) base address of the source data and the bytes per row to + // apply. {src/dst}Array is ignored. + hipMemoryType srcMemoryType = pCopy->srcMemoryType; + if (srcMemoryType == hipMemoryTypeUnified) { + amd::Memory* memObj = getMemoryObject(pCopy->srcDevice, offset); + if (memObj != nullptr) { + srcMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags()) ? hipMemoryTypeHost : hipMemoryTypeDevice; + } else { + srcMemoryType = hipMemoryTypeHost; + } + if (srcMemoryType == hipMemoryTypeHost) { + // {src/dst}Host may be unitialized. Copy over {src/dst}Device into it if we detect system + // memory. + const_cast(pCopy)->srcHost = pCopy->srcDevice; + const_cast(pCopy)->srcXInBytes += offset; + } + } + offset = 0; + hipMemoryType dstMemoryType = pCopy->dstMemoryType; + if (dstMemoryType == hipMemoryTypeUnified) { + amd::Memory* memObj = getMemoryObject(pCopy->dstDevice, offset); + if (memObj != nullptr) { + dstMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags()) ? hipMemoryTypeHost : hipMemoryTypeDevice; + } else { + dstMemoryType = hipMemoryTypeHost; + } + + if (dstMemoryType == hipMemoryTypeHost) { + const_cast(pCopy)->dstHost = pCopy->dstDevice; + const_cast(pCopy)->dstXInBytes += offset; + } + } + + // If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned. + // In that case upgrade the copy type to hipMemoryTypeDevice to avoid extra pinning. + offset = 0; + if (srcMemoryType == hipMemoryTypeHost) { + srcMemoryType = getMemoryObject(pCopy->srcHost, offset) ? hipMemoryTypeDevice : + hipMemoryTypeHost; + + if (srcMemoryType == hipMemoryTypeDevice) { + const_cast(pCopy)->srcDevice = const_cast(pCopy->srcHost); + const_cast(pCopy)->srcXInBytes += offset; + } + } + offset = 0; + if (dstMemoryType == hipMemoryTypeHost) { + dstMemoryType = getMemoryObject(pCopy->dstHost, offset) ? hipMemoryTypeDevice : + hipMemoryTypeHost; + + if (dstMemoryType == hipMemoryTypeDevice) { + const_cast(pCopy)->dstDevice = const_cast(pCopy->dstHost); + const_cast(pCopy)->dstXInBytes += offset; + } + } + + amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; + amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; + amd::Coord3D copyRegion = {pCopy->WidthInBytes, pCopy->Height, pCopy->Depth}; + + if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { + // Host to Device. + return ihipMemcpyHtoDCommand(command, pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin, + copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, + pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { + // Device to Host. + return ihipMemcpyDtoHCommand(command, pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin, + copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, + pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { + // Device to Device. + return ihipMemcpyDtoDCommand(command, pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin, + copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, + pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); + } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { + // Host to Image. + return ihipMemcpyHtoACommand(command, pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin, + copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, + stream); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { + // Image to Host. + return ihipMemcpyAtoHCommand(command, pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin, + copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, + stream); + } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { + // Device to Image. + return ihipMemcpyDtoACommand(command, pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin, + copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, + stream); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { + // Image to Device. + return ihipMemcpyAtoDCommand(command, pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin, + copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, + stream); + } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { + // Image to Image. + return ihipMemcpyAtoACommand(command, pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin, + copyRegion, stream); + } else { + ShouldNotReachHere(); + } + + return hipSuccess; +} + +inline hipError_t ihipMemcpyCmdEnqueue(amd::Command* command, bool isAsync = false) { + hipError_t status = hipSuccess; + if (command == nullptr) { + return hipErrorOutOfMemory; + } + command->enqueue(); + if (!isAsync) { + if (!command->awaitCompletion()) { + status = hipErrorUnknown; + } + } + command->release(); + return status; +} + +hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, hipStream_t stream, bool isAsync = false) { + hipError_t status; + size_t offset = 0; + if (pCopy == nullptr) { + return hipErrorInvalidValue; + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + if (pCopy->WidthInBytes == 0 || pCopy->Height == 0 || pCopy->Depth == 0) { + LogPrintfInfo("Either Width :%d or Height: %d and Depth: %d is zero", pCopy->WidthInBytes, + pCopy->Height, pCopy->Depth); + return hipSuccess; + } + // If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the (unified virtual address space) + // base address of the source data and the bytes per row to apply. {src/dst}Array is ignored. + hipMemoryType srcMemoryType = pCopy->srcMemoryType; + if (srcMemoryType == hipMemoryTypeUnified) { + amd::Memory* memObj = getMemoryObject(pCopy->srcDevice, offset); + if (memObj != nullptr) { + srcMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags()) ? hipMemoryTypeHost : hipMemoryTypeDevice; + } else { + srcMemoryType = hipMemoryTypeHost; + } + + if (srcMemoryType == hipMemoryTypeHost) { + // {src/dst}Host may be unitialized. Copy over {src/dst}Device into it if we detect system memory. + const_cast(pCopy)->srcHost = pCopy->srcDevice; + const_cast(pCopy)->srcXInBytes += offset; + } + } + offset = 0; + hipMemoryType dstMemoryType = pCopy->dstMemoryType; + if (dstMemoryType == hipMemoryTypeUnified) { + amd::Memory* memObj = getMemoryObject(pCopy->dstDevice, offset); + if (memObj != nullptr) { + dstMemoryType = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags()) ? hipMemoryTypeHost : hipMemoryTypeDevice; + } else { + dstMemoryType = hipMemoryTypeHost; + } + + if (dstMemoryType == hipMemoryTypeHost) { + const_cast(pCopy)->dstHost = pCopy->dstDevice; + const_cast(pCopy)->dstXInBytes += offset; + } + } + // If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned. + // In that case upgrade the copy type to hipMemoryTypeDevice to avoid extra pinning. + offset = 0; + if (srcMemoryType == hipMemoryTypeHost) { + srcMemoryType = getMemoryObject(pCopy->srcHost, offset) ? hipMemoryTypeDevice : + hipMemoryTypeHost; + } + if (dstMemoryType == hipMemoryTypeHost) { + dstMemoryType = getMemoryObject(pCopy->dstHost, offset) ? hipMemoryTypeDevice : + hipMemoryTypeHost; + } + + if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeHost)) { + amd::Coord3D srcOrigin = {pCopy->srcXInBytes, pCopy->srcY, pCopy->srcZ}; + amd::Coord3D dstOrigin = {pCopy->dstXInBytes, pCopy->dstY, pCopy->dstZ}; + amd::Coord3D copyRegion = {pCopy->WidthInBytes, (pCopy->Height != 0) ? pCopy->Height : 1, + (pCopy->Depth != 0) ? pCopy->Depth : 1}; + + // Host to Host. + return ihipMemcpyHtoH(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, + pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, + pCopy->dstPitch * pCopy->dstHeight, hip::getStream(stream)); + } else { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + status = ihipGetMemcpyParam3DCommand(command, pCopy, hip_stream); + if (status != hipSuccess) return status; + + // Transfers from device memory to pageable host memory and transfers from any + // host memory to any host memory are synchronous with respect to the host. + // Device to Device copies do not need to host side synchronization. + if (dstMemoryType == hipMemoryTypeHost || + ((pCopy->srcMemoryType == hipMemoryTypeHost) && + (pCopy->dstMemoryType == hipMemoryTypeHost))) { + isAsync = false; + } else if ((pCopy->srcMemoryType == hipMemoryTypeDevice) && + (pCopy->dstMemoryType == hipMemoryTypeDevice)) { + // Device to Device copies dont need to wait for host synchronization + isAsync = true; + } + return ihipMemcpyCmdEnqueue(command, isAsync); + } +} + +hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, + hipStream_t stream, + bool isAsync = false) { + HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*pCopy); + + return ihipMemcpyParam3D(&desc, stream, isAsync); +} + +hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + if ((width == 0) || (height == 0)) { + return hipSuccess; + } + if ((width > dpitch) || (width > spitch)) { + return hipErrorInvalidPitchValue; + } + + desc.srcXInBytes = 0; + desc.srcY = 0; + desc.srcMemoryType = std::get<0>(hip::getMemoryType(kind)); + desc.srcHost = src; + desc.srcDevice = const_cast(src); + desc.srcArray = nullptr; // Ignored. + desc.srcPitch = spitch; + + desc.dstXInBytes = 0; + desc.dstY = 0; + desc.dstMemoryType = std::get<1>(hip::getMemoryType(kind)); + desc.dstHost = dst; + desc.dstDevice = dst; + desc.dstArray = nullptr; // Ignored. + desc.dstPitch = dpitch; + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + HIP_INIT_API(hipMemcpyParam2D, pCopy); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpyParam2D(pCopy, nullptr)); +} + +hipError_t hipMemcpy2DValidateParams(hipMemcpyKind kind, hipStream_t stream = nullptr) { + + if (kind < hipMemcpyHostToHost || kind > hipMemcpyDefault) { + return hipErrorInvalidMemcpyDirection; + } + + if (!hip::isValid(stream)) { + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +hipError_t hipMemcpy2DValidateBuffer(const void* buf, size_t pitch, size_t width) { + + if (buf == nullptr) { + return hipErrorInvalidValue; + } + + if (pitch == 0 || pitch < width) { + return hipErrorInvalidPitchValue; + } + + return hipSuccess; +} + +hipError_t hipMemcpy2DValidateArray(hipArray_const_t arr, size_t wOffset, size_t hOffset, + size_t width, size_t height) { + + if (arr == nullptr) { + return hipErrorInvalidHandle; + } + + int FormatSize = hip::getElementSize(arr); + if ((width + wOffset) > (arr->width * FormatSize)) { + return hipErrorInvalidValue; + } + if (arr->height == 0) {//1D hipArray + if (height + hOffset > 1) { + return hipErrorInvalidValue; + } + } else if ((height + hOffset) > (arr->height)) {//2D hipArray + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +hipError_t hipMemcpy2D_common(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream = nullptr, + bool isAsync = false) { + + hipError_t validateParams = hipSuccess, validateSrc = hipSuccess, validateDst = hipSuccess; + if ((validateParams = hipMemcpy2DValidateParams(kind,stream)) != hipSuccess) { + return validateParams; + } + if ((validateSrc = hipMemcpy2DValidateBuffer(src,spitch,width)) != hipSuccess) { + return validateSrc; + } + if ((validateDst = hipMemcpy2DValidateBuffer(dst,dpitch, width)) != hipSuccess) { + return validateDst; + } + return ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind, stream, isAsync); +} + +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(hipMemcpy2D_common(dst, dpitch, src, spitch, width, height, kind)); +} + +hipError_t hipMemcpy2D_spt(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2D, dst, dpitch, src, spitch, width, height, kind); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(hipMemcpy2D_common(dst, dpitch, src, spitch, width, height, kind, + getPerThreadDefaultStream())); +} + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DAsync, dst, dpitch, src, spitch, width, height, kind, stream); + STREAM_CAPTURE(hipMemcpy2DAsync, stream, dst, dpitch, src, spitch, width, height, kind); + HIP_RETURN_DURATION(hipMemcpy2D_common(dst, dpitch, src, spitch, width, height, kind, stream, true)); +} + +hipError_t hipMemcpy2DAsync_spt(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DAsync, dst, dpitch, src, spitch, width, height, kind, stream); + PER_THREAD_DEFAULT_STREAM(stream); + STREAM_CAPTURE(hipMemcpy2DAsync, stream, dst, dpitch, src, spitch, width, height, kind); + HIP_RETURN_DURATION(hipMemcpy2D_common(dst, dpitch, src, spitch, width, height, kind, stream, true)); +} + +hipError_t ihipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + if (dst == nullptr) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = 0; + desc.srcY = 0; + desc.srcMemoryType = std::get<0>(hip::getMemoryType(kind)); + desc.srcHost = const_cast(src); + desc.srcDevice = const_cast(src); + desc.srcArray = nullptr; + desc.srcPitch = spitch; + + desc.dstXInBytes = wOffset; + desc.dstY = hOffset; + desc.dstMemoryType = hipMemoryTypeArray; + desc.dstHost = nullptr; + desc.dstDevice = nullptr; + desc.dstArray = dst; + desc.dstPitch = 0; // Ignored. + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy2DToArray_common(hipArray* dst, size_t wOffset, size_t hOffset, + const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream=nullptr, + bool isAsync = false) { + + hipError_t validateParams = hipSuccess, validateSrc = hipSuccess, validateDst = hipSuccess; + if ((validateParams = hipMemcpy2DValidateParams(kind,stream)) != hipSuccess) { + return validateParams; + } + if ((validateSrc = hipMemcpy2DValidateBuffer(src,spitch,width)) != hipSuccess) { + return validateSrc; + } + if ((validateDst = hipMemcpy2DValidateArray(dst, wOffset, hOffset, width, height)) != hipSuccess) { + return validateDst; + } + return ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, isAsync); +} + +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind)); +} + +hipError_t hipMemcpy2DToArray_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, + width, height, kind, getPerThreadDefaultStream())); +} + +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyToArray, dst, wOffset, hOffset, src, count, kind); + CHECK_STREAM_CAPTURING(); + if (dst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (dst->height != 0) ? dst->height : 1; + const size_t witdthInBytes = count / arrayHeight; + + const size_t height = (count / dst->width) / hip::getElementSize(dst); + + HIP_RETURN_DURATION(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, 0 /* spitch */, witdthInBytes, height, kind, nullptr)); +} + +hipError_t ihipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + if (src == nullptr) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = wOffsetSrc; + desc.srcY = hOffsetSrc; + desc.srcMemoryType = hipMemoryTypeArray; + desc.srcHost = nullptr; + desc.srcDevice = nullptr; + desc.srcArray = const_cast(src); + desc.srcPitch = 0; // Ignored. + + desc.dstXInBytes = 0; + desc.dstY = 0; + desc.dstMemoryType = std::get<1>(hip::getMemoryType(kind)); + desc.dstHost = dst; + desc.dstDevice = dst; + desc.dstArray = nullptr; + desc.dstPitch = dpitch; + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpyFromArray_common(void* dst, hipArray_const_t src, size_t wOffsetSrc, + size_t hOffset, size_t count, hipMemcpyKind kind, hipStream_t stream) { + CHECK_STREAM_CAPTURING(); + if (src == nullptr) { + return hipErrorInvalidValue; + } + + const size_t arrayHeight = (src->height != 0) ? src->height : 1; + const size_t witdthInBytes = count / arrayHeight; + + const size_t height = (count / src->width) / hip::getElementSize(src); + + return ihipMemcpy2DFromArray(dst, 0 /* dpitch */, src, wOffsetSrc, hOffset, witdthInBytes, height, kind, stream); +} + +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromArray, dst, src, wOffsetSrc, hOffset, count, kind); + HIP_RETURN_DURATION(hipMemcpyFromArray_common(dst, src, wOffsetSrc, hOffset, count, kind, nullptr)); +} + +hipError_t hipMemcpyFromArray_spt(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t count, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyFromArray, dst, src, wOffsetSrc, hOffset, count, kind); + HIP_RETURN_DURATION(hipMemcpyFromArray_common(dst, src, wOffsetSrc, hOffset, count, kind, + getPerThreadDefaultStream())); +} + +hipError_t ihipMemcpyAtoD(hipArray* srcArray, void* dstDevice, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, + size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = + ihipMemcpyAtoDCommand(command, srcArray, dstDevice, srcOrigin, dstOrigin, copyRegion, + dstRowPitch, dstSlicePitch, hip_stream); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} +hipError_t ihipMemcpyDtoA(void* srcDevice, hipArray* dstArray, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, + size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = + ihipMemcpyDtoACommand(command, srcDevice, dstArray, srcOrigin, dstOrigin, copyRegion, + srcRowPitch, srcSlicePitch, hip_stream); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} +hipError_t ihipMemcpyDtoD(void* srcDevice, void* dstDevice, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + hipStream_t stream, bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = ihipMemcpyDtoDCommand(command, srcDevice, dstDevice, srcOrigin, dstOrigin, + copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, + dstSlicePitch, hip_stream); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} +hipError_t ihipMemcpyDtoH(void* srcDevice, void* dstHost, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + hipStream_t stream, bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = ihipMemcpyDtoHCommand(command, srcDevice, dstHost, srcOrigin, dstOrigin, + copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, + dstSlicePitch, hip_stream, isAsync); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} +hipError_t ihipMemcpyHtoD(const void* srcHost, void* dstDevice, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + hipStream_t stream, bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = ihipMemcpyHtoDCommand(command, srcHost, dstDevice, srcOrigin, dstOrigin, + copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, + dstSlicePitch, hip_stream, isAsync); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} +hipError_t ihipMemcpyAtoA(hipArray* srcArray, hipArray* dstArray, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, hipStream_t stream, + bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = ihipMemcpyAtoACommand(command, srcArray, dstArray, srcOrigin, dstOrigin, + copyRegion, hip_stream); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} +hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray* dstArray, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, + size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = + ihipMemcpyHtoACommand(command, srcHost, dstArray, srcOrigin, dstOrigin, copyRegion, + srcRowPitch, srcSlicePitch, hip_stream, isAsync); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} +hipError_t ihipMemcpyAtoH(hipArray* srcArray, void* dstHost, amd::Coord3D srcOrigin, + amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, + size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { + amd::Command* command; + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + hipError_t status = + ihipMemcpyAtoHCommand(command, srcArray, dstHost, srcOrigin, dstOrigin, copyRegion, + dstRowPitch, dstSlicePitch, hip_stream, isAsync); + if (status != hipSuccess) return status; + return ihipMemcpyCmdEnqueue(command, isAsync); +} + +hipError_t hipMemcpyHtoA(hipArray* dstArray, + size_t dstOffset, + const void* srcHost, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyHtoA, dstArray, dstOffset, srcHost, ByteCount); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyAtoH(void* dstHost, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoH, dstHost, srcArray, srcOffset, ByteCount); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) { + // Passing more than one non-zero source or destination will cause hipMemcpy3D() to + // return an error. + if (p == nullptr || ((p->srcArray != nullptr) && (p->srcPtr.ptr != nullptr)) || + ((p->dstArray != nullptr) && (p->dstPtr.ptr != nullptr))) { + return hipErrorInvalidValue; + } + // The struct passed to hipMemcpy3D() must specify one of srcArray or srcPtr and one of dstArray + // or dstPtr. + if (((p->srcArray == nullptr) && (p->srcPtr.ptr == nullptr)) || + ((p->dstArray == nullptr) && (p->dstPtr.ptr == nullptr))) { + return hipErrorInvalidValue; + } + + // If the source and destination are both arrays, hipMemcpy3D() will return an error if they do + // not have the same element size. + if (((p->srcArray != nullptr) && (p->dstArray != nullptr)) && + (hip::getElementSize(p->dstArray) != hip::getElementSize(p->dstArray))) { + return hipErrorInvalidValue; + } + + // Pitch should not be less than width for both src and dst. + if (p->srcPtr.pitch < p->srcPtr.xsize || p->dstPtr.pitch < p->dstPtr.xsize) { + return hipErrorInvalidPitchValue; + } + + if (p->kind < hipMemcpyHostToHost || p->kind > hipMemcpyDefault) { + return hipErrorInvalidMemcpyDirection; + } + //If src and dst ptr are null then kind must be either h2h or def. + if (!IsHtoHMemcpyValid(p->dstPtr.ptr, p->srcPtr.ptr, p->kind)) { + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p, + hip::Stream* stream) { + const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p); + return ihipGetMemcpyParam3DCommand(command, &desc, stream); +} + +hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { + hipError_t status = ihipMemcpy3D_validate(p); + if (status != hipSuccess) { + return status; + } + const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p); + + return ihipMemcpyParam3D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy3D_common(const hipMemcpy3DParms* p, hipStream_t stream = nullptr) { + CHECK_STREAM_CAPTURING(); + return ihipMemcpy3D(p, stream); +} + +hipError_t hipMemcpy3D(const hipMemcpy3DParms* p) { + HIP_INIT_API(hipMemcpy3D, p); + HIP_RETURN_DURATION(hipMemcpy3D_common(p)); +} + +hipError_t hipMemcpy3D_spt(const hipMemcpy3DParms* p) { + HIP_INIT_API(hipMemcpy3D, p); + HIP_RETURN_DURATION(hipMemcpy3D_common(p, getPerThreadDefaultStream())); +} + +hipError_t hipMemcpy3DAsync_common(const hipMemcpy3DParms* p, hipStream_t stream) { + STREAM_CAPTURE(hipMemcpy3DAsync, stream, p); + return ihipMemcpy3D(p, stream, true); +} + +hipError_t hipMemcpy3DAsync(const hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DAsync, p, stream); + HIP_RETURN_DURATION(hipMemcpy3DAsync_common(p, stream)); +} + +hipError_t hipMemcpy3DAsync_spt(const hipMemcpy3DParms* p, hipStream_t stream) { + HIP_INIT_API(hipMemcpy3DAsync, p, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN_DURATION(hipMemcpy3DAsync_common(p, stream)); +} + +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) { + HIP_INIT_API(hipDrvMemcpy3D, pCopy); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpyParam3D(pCopy, nullptr)); +} + +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { + HIP_INIT_API(hipDrvMemcpy3DAsync, pCopy, stream); + + HIP_RETURN_DURATION(ihipMemcpyParam3D(pCopy, stream, true)); +} + +hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, size_t offset, + int64_t value, size_t valueSize, size_t sizeBytes, + hip::Stream* stream) { + if ((memory == nullptr) || (stream == nullptr)) { + return hipErrorInvalidValue; + } + + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + // surface=[pitch, width, height] + amd::Coord3D surface(sizeBytes, sizeBytes, 1); + amd::FillMemoryCommand* fillMemCommand = + new amd::FillMemoryCommand(*stream, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize, surface); + if (fillMemCommand == nullptr) { + return hipErrorOutOfMemory; + } + + if (!fillMemCommand->validatePeerMemory()) { + delete fillMemCommand; + return hipErrorInvalidValue; + } + command = fillMemCommand; + return hipSuccess; +} + +hipError_t ihipMemset_validate(void* dst, int64_t value, size_t valueSize, + size_t sizeBytes) { + if (sizeBytes == 0) { + // Skip if nothing needs filling. + return hipSuccess; + } + + if (dst == nullptr) { + return hipErrorInvalidValue; + } + + size_t offset = 0; + amd::Memory* memory = getMemoryObject(dst, offset); + if (memory == nullptr) { + // dst ptr is host ptr hence error + return hipErrorInvalidValue; + } + // Return error if sizeBytes passed to memcpy is more than the actual size allocated + if (sizeBytes > (memory->getSize() - offset)){ + return hipErrorInvalidValue; + } + return hipSuccess; +} + +hipError_t ihipGraphMemsetParams_validate(const hipMemsetParams* pNodeParams) { + if (pNodeParams == nullptr) { + return hipErrorInvalidValue; + } + + if (pNodeParams->width == 0) { + return hipErrorInvalidValue; + } + + if (pNodeParams->elementSize != 1 && pNodeParams->elementSize != 2 && pNodeParams->elementSize != 4) { + return hipErrorInvalidValue; + } + + if (pNodeParams->height <= 0) { + return hipErrorInvalidValue; + } + + size_t discardOffset = 0; + amd::Memory *memObj = getMemoryObject(pNodeParams->dst, discardOffset); + if (memObj != nullptr) { + if ((pNodeParams->pitch * pNodeParams->height) > memObj->getSize()) { + return hipErrorInvalidValue; + } + } + + return hipSuccess; +} + +hipError_t ihipMemsetCommand(std::vector& commands, void* dst, int64_t value, + size_t valueSize, size_t sizeBytes, hip::Stream* stream) { + hipError_t hip_error = hipSuccess; + auto aligned_dst = amd::alignUp(reinterpret_cast
(dst), sizeof(uint64_t)); + size_t offset = 0; + amd::Memory* memory = getMemoryObject(dst, offset); + size_t n_head_bytes = 0; + size_t n_tail_bytes = 0; + amd::Command* command; + + hip_error = packFillMemoryCommand(command, memory, offset, value, valueSize, sizeBytes, + stream); + commands.push_back(command); + + return hip_error; +} + +hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeBytes, + hipStream_t stream, bool isAsync = false) { + hipError_t hip_error = hipSuccess; + do { + // Nothing to do, fill size is 0. Returns hipSuccess. + if (sizeBytes == 0) { + break; + } + + // In case of validation failure stop processing. Returns hip_error. + hip_error = ihipMemset_validate(dst, value, valueSize, sizeBytes); + if (hip_error != hipSuccess) { + break; + } + // This is required to comply with the spec + // spec says hipMemset will be asynchronous when destination memory is device memory + // and pointer is non-offseted + if (isAsync == false) { + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(dst, offset); + auto flags = memObj->getMemFlags(); + if ((memObj->getUserData().sync_mem_ops_) + || (offset == 0 && !(flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER + | CL_MEM_SVM_ATOMICS | CL_MEM_USE_HOST_PTR)))) { + isAsync = true; + } + } + std::vector commands; + hip::Stream* hip_stream = hip::getStream(stream); + hip_error = ihipMemsetCommand(commands, dst, value, valueSize, sizeBytes, hip_stream); + if (hip_error != hipSuccess) { + break; + } + + for (auto command : commands) { + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } + } while (0); + return hip_error; +} + +hipError_t hipMemset_common(void* dst, int value, size_t sizeBytes, hipStream_t stream=nullptr) { + CHECK_STREAM_CAPTURING(); + return ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream); +} + +hipError_t hipMemset_spt(void* dst, int value, size_t sizeBytes) { + HIP_INIT_API(hipMemset, dst, value, sizeBytes); + HIP_RETURN(hipMemset_common(dst, value, sizeBytes, getPerThreadDefaultStream())); +} + +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { + HIP_INIT_API(hipMemset, dst, value, sizeBytes); + HIP_RETURN(hipMemset_common(dst, value, sizeBytes)); +} + +hipError_t hipMemsetAsync_common(void* dst, int value, size_t sizeBytes, hipStream_t stream) { + size_t valueSize = sizeof(int8_t); + STREAM_CAPTURE(hipMemsetAsync, stream, dst, value, valueSize, sizeBytes); + return ihipMemset(dst, value, sizeof(int8_t), sizeBytes, stream, true); +} + +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); + HIP_RETURN(hipMemsetAsync_common(dst, value, sizeBytes, stream)); +} + +hipError_t hipMemsetAsync_spt(void* dst, int value, size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemsetAsync, dst, value, sizeBytes, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipMemsetAsync_common(dst, value, sizeBytes, stream)); +} + +hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) { + HIP_INIT_API(hipMemsetD8, dst, value, count); + CHECK_STREAM_CAPTURING(); + HIP_RETURN(ihipMemset(dst, value, sizeof(int8_t), count * sizeof(int8_t), nullptr)); +} + +hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD8Async, dst, value, count, stream); + int iValue = value; + size_t valueSize = sizeof(int8_t); + size_t sizeBytes = count * sizeof(int8_t); + STREAM_CAPTURE(hipMemsetAsync, stream, dst, iValue, valueSize, sizeBytes); + HIP_RETURN(ihipMemset(dst, value, valueSize, sizeBytes, stream, true)); +} + +hipError_t hipMemsetD16(hipDeviceptr_t dst, unsigned short value, size_t count) { + HIP_INIT_API(hipMemsetD16, dst, value, count); + CHECK_STREAM_CAPTURING(); + HIP_RETURN(ihipMemset(dst, value, sizeof(int16_t), count * sizeof(int16_t), nullptr)); +} + +hipError_t hipMemsetD16Async(hipDeviceptr_t dst, unsigned short value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD16Async, dst, value, count, stream); + int iValue = value; + size_t valueSize = sizeof(int16_t); + size_t sizeBytes = count * sizeof(int16_t); + STREAM_CAPTURE(hipMemsetAsync, stream, dst, iValue, valueSize, sizeBytes); + HIP_RETURN(ihipMemset(dst, value, valueSize, sizeBytes, stream, true)); +} + +hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { + HIP_INIT_API(hipMemsetD32, dst, value, count); + CHECK_STREAM_CAPTURING(); + HIP_RETURN(ihipMemset(dst, value, sizeof(int32_t), count * sizeof(int32_t), nullptr)); +} + +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream) { + HIP_INIT_API(hipMemsetD32Async, dst, value, count, stream); + int iValue = value; + size_t valueSize = sizeof(int32_t); + size_t sizeBytes = count * sizeof(int32_t); + STREAM_CAPTURE(hipMemsetAsync, stream, dst, iValue, valueSize, sizeBytes); + HIP_RETURN(ihipMemset(dst, value, valueSize, sizeBytes, stream, true)); +} + +hipError_t ihipMemset3D_validate(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + size_t sizeBytes) { + size_t offset = 0; + amd::Memory* memory = getMemoryObject(pitchedDevPtr.ptr, offset, sizeBytes); + + if (memory == nullptr) { + return hipErrorInvalidValue; + } + // Return error if sizeBytes passed to memcpy is more than the actual size allocated + if (sizeBytes > (memory->getSize() - offset)){ + return hipErrorInvalidValue; + } + if (pitchedDevPtr.pitch == memory->getUserData().pitch_) { + if (extent.height > memory->getUserData().height_) { + return hipErrorInvalidValue; + } + } + return hipSuccess; +} + +hipError_t ihipMemset3DCommand(std::vector &commands, hipPitchedPtr pitchedDevPtr, + int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1) { + size_t offset = 0; + auto sizeBytes = extent.width * extent.height * extent.depth; + amd::Memory* memory = getMemoryObject(pitchedDevPtr.ptr, offset); + if (pitchedDevPtr.pitch == extent.width) { + return ihipMemsetCommand(commands, pitchedDevPtr.ptr, value, elementSize, + static_cast(sizeBytes), stream); + } + // Workaround for cases when pitch > row until fill kernel will be updated to support pitch. + // Fall back to filling one row at a time. + amd::Coord3D origin(offset); + amd::Coord3D region(extent.width, extent.height, extent.depth); + amd::Coord3D surface(pitchedDevPtr.pitch, pitchedDevPtr.xsize, pitchedDevPtr.ysize); + amd::BufferRect rect; + if (pitchedDevPtr.pitch == 0 || + !rect.create(static_cast(origin), + static_cast(amd::Coord3D{pitchedDevPtr.xsize, pitchedDevPtr.ysize, extent.depth}), + pitchedDevPtr.pitch, 0)) { + return hipErrorInvalidValue; + } + amd::FillMemoryCommand* command; + command = new amd::FillMemoryCommand( + *stream, CL_COMMAND_FILL_BUFFER, amd::Command::EventWaitList{}, *memory->asBuffer(), + &value, elementSize, origin, region, surface); + commands.push_back(command); + return hipSuccess; +} + + +hipError_t ihipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, + hipStream_t stream, bool isAsync = false) { + auto sizeBytes = extent.width * extent.height * extent.depth; + + if (sizeBytes == 0) { + // sizeBytes is zero hence returning early as nothing to be set + return hipSuccess; + } + hipError_t status = ihipMemset3D_validate(pitchedDevPtr, value, extent, sizeBytes); + if (status != hipSuccess) { + return status; + } + // This is required to comply with the spec + // spec says hipMemset will be asynchronous when destination memory is device memory + // and pointer is non-offseted + if (isAsync == false) { + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(pitchedDevPtr.ptr, offset); + auto flags = memObj->getMemFlags(); + if (offset == 0 && + !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS | CL_MEM_SVM_FINE_GRAIN_BUFFER))) { + isAsync = true; + } + } + hip::Stream* hip_stream = hip::getStream(stream); + std::vector commands; + status = ihipMemset3DCommand(commands, pitchedDevPtr, value, extent, hip_stream); + if (status != hipSuccess) { + return status; + } + for (auto& command : commands) { + command->enqueue(); + if (!isAsync) { + command->awaitCompletion(); + } + command->release(); + } + return hipSuccess; +} + +hipError_t hipMemset2D_common(void* dst, size_t pitch, int value, size_t width, + size_t height, hipStream_t stream=nullptr) { + CHECK_STREAM_CAPTURING(); + return ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, stream); +} + +hipError_t hipMemset2D_spt(void* dst, size_t pitch, int value, size_t width, size_t height) { + HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height); + hipStream_t stream = getPerThreadDefaultStream(); + HIP_RETURN(hipMemset2D_common(dst, pitch, value, width, height, stream)); +} + +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { + HIP_INIT_API(hipMemset2D, dst, pitch, value, width, height); + HIP_RETURN(hipMemset2D_common(dst, pitch, value, width, height)); +} + +hipError_t hipMemset2DAsync_common(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream) { + STREAM_CAPTURE(hipMemset2DAsync, stream, dst, pitch, value, width, height); + + return ihipMemset3D({dst, pitch, width, height}, value, {width, height, 1}, stream, true); +} + +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream) { + HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream); + HIP_RETURN(hipMemset2DAsync_common(dst, pitch, value, width, height, stream)); +} + +hipError_t hipMemset2DAsync_spt(void* dst, size_t pitch, int value, + size_t width, size_t height, hipStream_t stream) { + HIP_INIT_API(hipMemset2DAsync, dst, pitch, value, width, height, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipMemset2DAsync_common(dst, pitch, value, width, height, stream)); +} + +hipError_t hipMemset3D_common(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream=nullptr) { + CHECK_STREAM_CAPTURING(); + return ihipMemset3D(pitchedDevPtr, value, extent, stream); +} + +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { + HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, extent); + HIP_RETURN(hipMemset3D_common(pitchedDevPtr, value, extent)); +} + +hipError_t hipMemset3D_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { + HIP_INIT_API(hipMemset3D, pitchedDevPtr, value, extent); + hipStream_t stream = getPerThreadDefaultStream(); + HIP_RETURN(hipMemset3D_common(pitchedDevPtr, value, extent,stream)); +} + +hipError_t hipMemset3DAsync_common(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) { + STREAM_CAPTURE(hipMemset3DAsync, stream, pitchedDevPtr, value, extent); + return ihipMemset3D(pitchedDevPtr, value, extent, stream, true); +} + +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) { + HIP_INIT_API(hipMemset3DAsync, pitchedDevPtr, value, extent, stream); + HIP_RETURN(hipMemset3DAsync_common(pitchedDevPtr, value, extent, stream)); +} + +hipError_t hipMemset3DAsync_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream) { + HIP_INIT_API(hipMemset3DAsync, pitchedDevPtr, value, extent, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipMemset3DAsync_common(pitchedDevPtr, value, extent, stream)); +} + +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, + size_t height, unsigned int elementSizeBytes) { + HIP_INIT_API(hipMemAllocPitch, dptr, pitch, widthInBytes, height, elementSizeBytes); + CHECK_STREAM_CAPTURE_SUPPORTED(); + if (widthInBytes == 0 || height == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + if (elementSizeBytes != 4 && elementSizeBytes != 8 && elementSizeBytes != 16) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(hipMallocPitch(dptr, pitch, widthInBytes, height)); +} + +hipError_t hipMemAllocHost(void** ptr, size_t size) { + HIP_INIT_API(hipMemAllocHost, ptr, size); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN_DURATION(hipHostMalloc(ptr, size, 0)); +} + +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { + HIP_INIT_API(hipIpcGetMemHandle, handle, dev_ptr); + + amd::Device* device = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if ((handle == nullptr) || (dev_ptr == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + device = hip::getCurrentDevice()->devices()[0]; + ihandle = reinterpret_cast(handle); + + if(!device->IpcCreate(dev_ptr, &(ihandle->psize), &(ihandle->ipc_handle), &(ihandle->poffset))) { + LogPrintfError("IPC memory creation failed for memory: 0x%x", dev_ptr); + HIP_RETURN(hipErrorInvalidValue); + } + ihandle->owners_process_id = amd::Os::getProcessId(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigned int flags) { + HIP_INIT_API(hipIpcOpenMemHandle, dev_ptr, &handle, flags); + + amd::Memory* amd_mem_obj = nullptr; + amd::Device* device = nullptr; + ihipIpcMemHandle_t* ihandle = nullptr; + + if (dev_ptr == nullptr || flags != hipIpcMemLazyEnablePeerAccess) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Call the IPC Attach from Device class */ + device = hip::getCurrentDevice()->devices()[0]; + ihandle = reinterpret_cast(&handle); + + if (ihandle->psize == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (ihandle->owners_process_id == amd::Os::getProcessId()) { + HIP_RETURN(hipErrorInvalidContext); + } + + if(!device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, + ihandle->poffset, flags, dev_ptr)) { + LogPrintfError("Cannot attach ipc_handle: with ipc_size: %u" + "ipc_offset: %u flags: %u", ihandle->psize, flags); + HIP_RETURN(hipErrorInvalidDevicePointer); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipIpcCloseMemHandle(void* dev_ptr) { + HIP_INIT_API(hipIpcCloseMemHandle, dev_ptr); + + amd::Device* device = nullptr; + amd::Memory* amd_mem_obj = nullptr; + + hip::getNullStream()->finish(); + + if (dev_ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + /* Call IPC Detach from Device class */ + device = hip::getCurrentDevice()->devices()[0]; + if (device == nullptr) { + HIP_RETURN(hipErrorNoDevice); + } + + /* detach the memory */ + if (!device->IpcDetach(dev_ptr)){ + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + + +hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsigned flags) { + HIP_INIT_API(hipHostGetDevicePointer, devicePointer, hostPointer, flags); + + if (devicePointer == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t offset = 0; + + amd::Memory* memObj = getMemoryObject(hostPointer, offset); + if (!memObj) { + HIP_RETURN(hipErrorInvalidValue); + } +*devicePointer = reinterpret_cast(memObj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0])->virtualAddress() + offset); + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { + HIP_INIT_API(hipPointerGetAttributes, attributes, ptr); + + if (attributes == nullptr || ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + int device = 0; + device::Memory* devMem = nullptr; + memset(attributes, 0, sizeof(hipPointerAttribute_t)); + + if (memObj != nullptr) { + attributes->type = ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + if (attributes->type == hipMemoryTypeHost) { + if (memObj->getHostMem() != nullptr) { + attributes->hostPointer = static_cast(memObj->getHostMem()) + offset; + } + else { + attributes->hostPointer = static_cast(memObj->getSvmPtr()) + offset; + } + } + // the pointer that attribute is retrieved for might not be on the current device + for (const auto& device : g_devices) { + if(device->deviceId() == memObj->getUserData().deviceId) { + devMem = memObj->getDeviceMemory(*device->devices()[0]); + break; + } + } + //getDeviceMemory can fail, hence validate the sanity of the mem obtained + if (nullptr == devMem) { + DevLogPrintfError("getDeviceMemory for ptr failed : %p \n", ptr); + HIP_RETURN(hipErrorMemoryAllocation); + } + + attributes->devicePointer = reinterpret_cast(devMem->virtualAddress() + offset); + constexpr uint32_t kManagedAlloc = (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR); + attributes->isManaged = + ((memObj->getMemFlags() & kManagedAlloc) == kManagedAlloc) ? true : false; + attributes->allocationFlags = memObj->getUserData().flags; + attributes->device = memObj->getUserData().deviceId; + HIP_RETURN(hipSuccess); + } + + LogPrintfError("Cannot get amd_mem_obj for ptr: 0x%x \n", ptr); + HIP_RETURN(hipErrorInvalidValue); +} + +// ================================================================================================ +hipError_t ihipPointerSetAttribute(const void* value, hipPointer_attribute attribute, + hipDeviceptr_t ptr) { + if (attribute != HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS) { + return hipErrorInvalidValue; + } + + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + if (memObj == nullptr) { + return hipErrorInvalidDevicePointer; + } + + memObj->getUserData().sync_mem_ops_ + = static_cast(*(reinterpret_cast(value))); + + return hipSuccess; +} + +// ================================================================================================ +hipError_t ihipPointerGetAttributes(void* data, hipPointer_attribute attribute, + hipDeviceptr_t ptr) { + + size_t offset = 0; + amd::Memory* memObj = getMemoryObject(ptr, offset); + constexpr uint32_t kManagedAlloc = (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR); + + hipError_t status = hipSuccess; + + switch (attribute) { + case HIP_POINTER_ATTRIBUTE_CONTEXT : { + status = hipErrorNotSupported; + break; + } + case HIP_POINTER_ATTRIBUTE_MEMORY_TYPE : { + if (memObj) { // checks for host type or device type + *reinterpret_cast(data) = + ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memObj->getMemFlags())? hipMemoryTypeHost : hipMemoryTypeDevice; + } else { // checks for array type + cl_mem dstMemObj = reinterpret_cast((static_cast(ptr))->data); + if (!is_valid(dstMemObj)) { + *reinterpret_cast(data) = 0; + return hipErrorInvalidValue; + } + amd::Image* dstImage = as_amd(dstMemObj)->asImage(); + if (dstImage){ + *reinterpret_cast(data) = hipMemoryTypeArray; + } else { + *reinterpret_cast(data) = 0; + return hipErrorInvalidValue; + } + } + break; + } + case HIP_POINTER_ATTRIBUTE_DEVICE_POINTER : { + if (memObj) { + device::Memory* devMem = memObj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0]); + + //getDeviceMemory can fail, hence validate the sanity of the mem obtained + if (nullptr == devMem) { + DevLogPrintfError("getDeviceMemory for ptr failed : %p \n", ptr); + return hipErrorMemoryAllocation; + } + *reinterpret_cast(data) = + reinterpret_cast(devMem->virtualAddress() + offset); + } else { + *reinterpret_cast(data) = nullptr; + return hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_HOST_POINTER : { + if (memObj) { + if ((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & memObj->getMemFlags()) { + if (memObj->getHostMem() != nullptr) { + // Registered memory + *reinterpret_cast(data) = + static_cast(memObj->getHostMem()) + offset; + } else { + // Prepinned memory + *reinterpret_cast(data) = + static_cast(memObj->getSvmPtr()) + offset; + } + } else { + *reinterpret_cast(data) = nullptr; + status = hipErrorInvalidValue; + } + } else { // Host Memory + *reinterpret_cast(data) = nullptr; + status = hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_P2P_TOKENS : { + // Currently not supported, deprecated in cuda as well + status = hipErrorNotSupported; + break; + } + case HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS : { + // This attribute is ideally used in hipPointerSetAttribute, defaults to true + *reinterpret_cast(data) = true; + break; + } + case HIP_POINTER_ATTRIBUTE_BUFFER_ID : { + if (memObj) { + *reinterpret_cast(data) = memObj->getUniqueId(); + } else { // ptr passed must be allocated using HIP memory allocation API + *reinterpret_cast(data) = 0; + return hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_IS_MANAGED : { + if (memObj) { + *reinterpret_cast(data) = + ((memObj->getMemFlags() & kManagedAlloc) == kManagedAlloc) ? true : false; + } else { + *reinterpret_cast(data) = false; + return hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL : { + if (memObj) { + *reinterpret_cast(data) = memObj->getUserData().deviceId; + } else { + // for host memory, -2 is returned by default similar to cuda + *reinterpret_cast(data) = -2; + status = hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE : { + // TODO: Unclear what to be done for this attribute + status = hipErrorNotSupported; + break; + } + case HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR : { + if (memObj) { + if (memObj->getHostMem() != nullptr) { + *reinterpret_cast(data) = + static_cast(memObj->getHostMem()); + } else { + device::Memory* devMem = + memObj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0]); + + //getDeviceMemory can fail, hence validate the sanity of the mem obtained + if (nullptr == devMem) { + DevLogPrintfError("getDeviceMemory for ptr failed : %p \n", ptr); + return hipErrorMemoryAllocation; + } + *reinterpret_cast(data) = + reinterpret_cast(devMem->virtualAddress()); + } + } else { + // Input is host memory pointer, invalid for device. + *reinterpret_cast(data) = nullptr; + status = hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_RANGE_SIZE : { + if (memObj) { + *reinterpret_cast(data) = memObj->getSize(); + } else { + *reinterpret_cast(data) = 0; + status = hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_MAPPED : { + if (memObj) { + *reinterpret_cast(data) = true; + } else { + *reinterpret_cast(data) = false; + status = hipErrorInvalidValue; + } + break; + } + case HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES : { + // hipMemAllocationHandleType is not yet supported + LogPrintfWarning("attribute %d is not supported.", attribute); + status = hipErrorNotSupported; + break; + } + case HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE : { + // GPUDirect RDMA API is not yet supported + LogPrintfWarning("attribute %d is not supported.", attribute); + status = hipErrorNotSupported; + break; + } + case HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS : { + if (memObj) { + *reinterpret_cast(data) = memObj->getUserData().flags; + } else { + *reinterpret_cast(data) = 0; + } + break; + } + case HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE : { + // allocations from mempool are not yet supported + LogPrintfWarning("attribute %d is not supported.", attribute); + status = hipErrorNotSupported; + break; + } + default: { + LogPrintfError("Invalid attribute: %d ", attribute); + status = hipErrorInvalidValue; + break; + } + } + return status; +} + +// ================================================================================================ +hipError_t hipPointerSetAttribute(const void* value, hipPointer_attribute attribute, + hipDeviceptr_t ptr) { + HIP_INIT_API(hipPointerSetAttribute, value, attribute, ptr); + + if (ptr == nullptr || value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipPointerSetAttribute(value, attribute, ptr)); +} + +// ================================================================================================ + +hipError_t hipPointerGetAttribute(void* data, hipPointer_attribute attribute, hipDeviceptr_t ptr) { + HIP_INIT_API(hipPointerGetAttribute, data, attribute, ptr); + + if (ptr == nullptr || data == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipPointerGetAttributes(data, attribute, ptr)); +} + +// ================================================================================================ +hipError_t hipDrvPointerGetAttributes(unsigned int numAttributes, hipPointer_attribute* attributes, + void** data, hipDeviceptr_t ptr) { + HIP_INIT_API(hipDrvPointerGetAttributes, numAttributes, attributes, data, ptr); + + if (numAttributes == 0 || attributes == nullptr || data == nullptr || ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Ignore the status, hipDrvPointerGetAttributes always returns success + // If the ptr is invalid, the queried attributes will be assigned default values + for (int i = 0; i < numAttributes; ++i) { + hipError_t status = ihipPointerGetAttributes(data[i], attributes[i], ptr); + } + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipArrayDestroy(hipArray* array) { + HIP_INIT_API(hipArrayDestroy, array); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipArrayDestroy(array)); +} + +hipError_t ihipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* desc, + hipArray* array) { + { + amd::ScopedLock lock(hip::hipArraySetLock); + if (hip::hipArraySet.find(array) == hip::hipArraySet.end()) { + return hipErrorInvalidHandle; + } + } + + desc->Width = array->width; + desc->Height = array->height; + desc->Depth = array->depth; + desc->Format = array->Format; + desc->NumChannels = array->NumChannels; + desc->Flags = array->flags; + + return hipSuccess; +} + +hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, + hipExtent* extent, + unsigned int* flags, + hipArray* array) { + HIP_INIT_API(hipArrayGetInfo, desc, extent, flags, array); + CHECK_STREAM_CAPTURE_SUPPORTED(); + + if (array == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + // If all output parameters are nullptr, then no need to proceed further + if ((desc == nullptr) && (extent == nullptr) && (flags == nullptr)) { + HIP_RETURN(hipSuccess); + } + + HIP_ARRAY3D_DESCRIPTOR array3DDescriptor; + hipError_t status = ihipArray3DGetDescriptor(&array3DDescriptor, array); + + // Fill each output parameter + if (status == hipSuccess) { + if (desc != nullptr) { + *desc = hip::getChannelFormatDesc(array3DDescriptor.NumChannels, array3DDescriptor.Format); + } + + if (extent != nullptr) { + extent->width = array3DDescriptor.Width; + extent->height = array3DDescriptor.Height; + extent->depth = array3DDescriptor.Depth; + } + + if (flags != nullptr) { + *flags = array3DDescriptor.Flags; + } + } + + HIP_RETURN(status); +} + +hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + HIP_INIT_API(hipArrayGetDescriptor, pArrayDescriptor, array); + CHECK_STREAM_CAPTURE_SUPPORTED(); + + if (array == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + if (pArrayDescriptor == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_ARRAY3D_DESCRIPTOR array3DDescriptor; + hipError_t status = ihipArray3DGetDescriptor(&array3DDescriptor, array); + + // Fill each output parameter + if (status == hipSuccess) { + pArrayDescriptor->Width = array3DDescriptor.Width; + pArrayDescriptor->Height = array3DDescriptor.Height; + pArrayDescriptor->Format = array3DDescriptor.Format; + pArrayDescriptor->NumChannels = array3DDescriptor.NumChannels; + } + + HIP_RETURN(status); +} + +hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, + hipArray* array) { + HIP_INIT_API(hipArray3DGetDescriptor, pArrayDescriptor, array); + CHECK_STREAM_CAPTURE_SUPPORTED(); + + if (array == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + if (pArrayDescriptor == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipArray3DGetDescriptor(pArrayDescriptor, array)); +} + +hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyParam2DAsync, pCopy); + STREAM_CAPTURE(hipMemcpyParam2DAsync, stream, pCopy); + HIP_RETURN(ihipMemcpyParam2D(pCopy, stream, true)); +} + +hipError_t ihipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream, bool isAsync = false) { + hip_Memcpy2D desc = {}; + + desc.srcXInBytes = wOffsetSrc; + desc.srcY = hOffsetSrc; + desc.srcMemoryType = hipMemoryTypeArray; + desc.srcHost = nullptr; + desc.srcDevice = nullptr; + desc.srcArray = const_cast(src); + desc.srcPitch = 0; // Ignored. + + desc.dstXInBytes = wOffsetDst; + desc.dstY = hOffsetDst; + desc.dstMemoryType = hipMemoryTypeArray; + desc.dstHost = nullptr; + desc.dstDevice = nullptr; + desc.dstArray = dst; + desc.dstPitch = 0; // Ignored. + + desc.WidthInBytes = width; + desc.Height = height; + + return ihipMemcpyParam2D(&desc, stream, isAsync); +} + +hipError_t hipMemcpy2DArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DArrayToArray, dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind); + CHECK_STREAM_CAPTURING(); + hipError_t validateParam = hipSuccess, validateSrc = hipSuccess, validateDst = hipSuccess; + if ((validateParam = hipMemcpy2DValidateParams(kind)) != hipSuccess) { + HIP_RETURN(validateParam); + } + if ((validateSrc = hipMemcpy2DValidateArray(src, wOffsetSrc, hOffsetSrc, width, height)) != hipSuccess) { + HIP_RETURN(validateSrc); + } + if ((validateDst = hipMemcpy2DValidateArray(dst, wOffsetDst, hOffsetDst, width, height)) != hipSuccess) { + HIP_RETURN(validateDst); + } + HIP_RETURN_DURATION(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind, nullptr)); +} + +hipError_t hipMemcpyArrayToArray(hipArray_t dst, size_t wOffsetDst, size_t hOffsetDst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpyArrayToArray, dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind, nullptr)); +} + +hipError_t hipMemcpy2DFromArray_common(void* dst, size_t dpitch, hipArray_const_t src, + size_t wOffsetSrc, size_t hOffset, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream=nullptr, + bool isAsync=false) { + + hipError_t validateParam = hipSuccess, validateSrc = hipSuccess, validateDst = hipSuccess; + if ((validateParam = hipMemcpy2DValidateParams(kind,stream)) != hipSuccess) { + return validateParam; + } + if ((validateSrc = hipMemcpy2DValidateArray(src,wOffsetSrc, hOffset, width, height)) != hipSuccess) { + return validateSrc; + } + if ((validateDst = hipMemcpy2DValidateBuffer(dst,dpitch,width)) != hipSuccess) { + return validateDst; + } + return ihipMemcpy2DFromArray(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind, stream, isAsync); +} + +hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch,hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DFromArray, dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(hipMemcpy2DFromArray_common(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind)); +} + +hipError_t hipMemcpy2DFromArray_spt(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind) { + HIP_INIT_API(hipMemcpy2DFromArray, dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind); + hipStream_t stream = getPerThreadDefaultStream(); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(hipMemcpy2DFromArray_common(dst, dpitch, src, wOffsetSrc, hOffset, width, height, kind, stream)); +} + +hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DFromArrayAsync, dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream); + STREAM_CAPTURE(hipMemcpy2DFromArrayAsync, stream, dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, + height, kind); + HIP_RETURN_DURATION(hipMemcpy2DFromArray_common(dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream, true)); +} + +hipError_t hipMemcpy2DFromArrayAsync_spt(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DFromArrayAsync, dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream); + PER_THREAD_DEFAULT_STREAM(stream); + STREAM_CAPTURE(hipMemcpy2DFromArrayAsync, stream, dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, + height, kind); + HIP_RETURN_DURATION(hipMemcpy2DFromArray_common(dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream, true)); +} + +hipError_t hipMemcpyFromArrayAsync(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyFromArrayAsync, dst, src, wOffsetSrc, hOffsetSrc, count, kind, stream); + STREAM_CAPTURE(hipMemcpyFromArrayAsync, stream, dst, src, wOffsetSrc, hOffsetSrc, count, kind); + + if (src == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (src->height != 0) ? src->height : 1; + const size_t widthInBytes = count / arrayHeight; + + const size_t height = (count / src->width) / hip::getElementSize(src); + + HIP_RETURN_DURATION(ihipMemcpy2DFromArray(dst, 0 /* dpitch */, src, wOffsetSrc, hOffsetSrc, widthInBytes, height, kind, stream, true)); +} + +hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DToArrayAsync, dst, wOffset, hOffset, src, spitch, width, height, kind, stream); + STREAM_CAPTURE(hipMemcpy2DToArrayAsync, stream, dst, wOffset, hOffset, src, spitch, width, height, + kind); + HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, true)); +} + +hipError_t hipMemcpy2DToArrayAsync_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpy2DToArrayAsync, dst, wOffset, hOffset, src, spitch, width, height, kind, stream); + PER_THREAD_DEFAULT_STREAM(stream); + STREAM_CAPTURE(hipMemcpy2DToArrayAsync, stream, dst, wOffset, hOffset, src, spitch, width, height, + kind); + HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, true)); +} + +hipError_t hipMemcpyToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_API(hipMemcpyToArrayAsync, dst, wOffset, hOffset, src, count, kind); + STREAM_CAPTURE(hipMemcpyToArrayAsync, stream, dst, wOffset, hOffset, src, count, kind); + + if (dst == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const size_t arrayHeight = (dst->height != 0) ? dst->height : 1; + const size_t widthInBytes = count / arrayHeight; + + const size_t height = (count / dst->width) / hip::getElementSize(dst); + + HIP_RETURN_DURATION(ihipMemcpy2DToArray(dst, wOffset, hOffset, src, 0 /* spitch */, widthInBytes, height, kind, stream, true)); +} + +hipError_t hipMemcpyAtoA(hipArray* dstArray, + size_t dstOffset, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoA, dstArray, dstOffset, srcArray, srcOffset, ByteCount); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpyAtoA(srcArray, dstArray, {srcOffset, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, nullptr)); +} + +hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyAtoD, dstDevice, srcArray, srcOffset, ByteCount); + + HIP_RETURN_DURATION(ihipMemcpyAtoD(srcArray, dstDevice, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyAtoHAsync(void* dstHost, + hipArray* srcArray, + size_t srcOffset, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyAtoHAsync, dstHost, srcArray, srcOffset, ByteCount, stream); + STREAM_CAPTURE(hipMemcpyAtoHAsync, stream, dstHost, srcArray, srcOffset, ByteCount); + HIP_RETURN_DURATION(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); +} + +hipError_t hipMemcpyDtoA(hipArray* dstArray, + size_t dstOffset, + hipDeviceptr_t srcDevice, + size_t ByteCount) { + HIP_INIT_API(hipMemcpyDtoA, dstArray, dstOffset, srcDevice, ByteCount); + CHECK_STREAM_CAPTURING(); + HIP_RETURN_DURATION(ihipMemcpyDtoA(srcDevice, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); +} + +hipError_t hipMemcpyHtoAAsync(hipArray* dstArray, + size_t dstOffset, + const void* srcHost, + size_t ByteCount, + hipStream_t stream) { + HIP_INIT_API(hipMemcpyHtoAAsync, dstArray, dstOffset, srcHost, ByteCount, stream); + STREAM_CAPTURE(hipMemcpyHtoAAsync, stream, dstArray, dstOffset, srcHost, ByteCount); + HIP_RETURN_DURATION(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); +} + +hipError_t hipMallocHost(void** ptr, + size_t size) { + HIP_INIT_API(hipMallocHost, ptr, size); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN_DURATION(ihipMalloc(ptr, size, CL_MEM_SVM_FINE_GRAIN_BUFFER), (ptr != nullptr)? *ptr : nullptr); +} + +hipError_t hipFreeHost(void *ptr) { + HIP_INIT_API(hipFreeHost, ptr); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipFree(ptr)); +} + +hipError_t hipDrvMemcpy2DUnaligned(const hip_Memcpy2D* pCopy) { + HIP_INIT_API(hipDrvMemcpy2DUnaligned, pCopy); + + HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*pCopy); + + HIP_RETURN(ihipMemcpyParam3D(&desc, nullptr)); +} + +hipError_t hipMallocMipmappedArray(hipMipmappedArray_t *mipmappedArray, + const hipChannelFormatDesc* desc, + hipExtent extent, + unsigned int numLevels, + unsigned int flags) { + HIP_INIT_API(hipMallocMipmappedArray, mipmappedArray, desc, extent, numLevels, flags); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray) { + HIP_INIT_API(hipFreeMipmappedArray, mipmappedArray); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipGetMipmappedArrayLevel(hipArray_t *levelArray, + hipMipmappedArray_const_t mipmappedArray, + unsigned int level) { + HIP_INIT_API(hipGetMipmappedArrayLevel, levelArray, mipmappedArray, level); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t ihipMipmapArrayCreate(hipMipmappedArray_t* mipmapped_array_pptr, + HIP_ARRAY3D_DESCRIPTOR* mipmapped_array_desc_ptr, + unsigned int num_mipmap_levels) { + bool mipMapSupport = true; + amd::Context& context = *hip::getCurrentDevice()->asContext(); + const std::vector& devices = context.devices(); + for (auto& dev : devices) { + if (!dev->settings().checkExtension(ClKhrMipMapImage)) { + mipMapSupport = false; + } + } + if (mipMapSupport == false) { + LogPrintfError("Mipmap not supported on one of the devices, Mip Level: %d", num_mipmap_levels); + return hipErrorNotSupported; + } + const cl_channel_order channel_order = hip::getCLChannelOrder( + mipmapped_array_desc_ptr->NumChannels, 0); + const cl_channel_type channel_type = hip::getCLChannelType(mipmapped_array_desc_ptr->Format, + hipReadModeElementType); + const cl_mem_object_type image_type = hip::getCLMemObjectType(mipmapped_array_desc_ptr->Width, + mipmapped_array_desc_ptr->Height, + mipmapped_array_desc_ptr->Depth, + mipmapped_array_desc_ptr->Flags); + hipError_t status = hipSuccess; + // Create a new amd::Image with mipmap + amd::Image* image = ihipImageCreate(channel_order, + channel_type, + image_type, + mipmapped_array_desc_ptr->Width, + mipmapped_array_desc_ptr->Height, + mipmapped_array_desc_ptr->Depth, + mipmapped_array_desc_ptr->Depth, + 0 /* row pitch */, + 0 /* slice pitch */, + num_mipmap_levels, + nullptr, /* buffer */ + status); + + if (image == nullptr) { + return status; + } + + cl_mem cl_mem_obj = as_cl(image); + *mipmapped_array_pptr = new hipMipmappedArray(); + (*mipmapped_array_pptr)->data = reinterpret_cast(cl_mem_obj); + + (*mipmapped_array_pptr)->desc = hip::getChannelFormatDesc( + mipmapped_array_desc_ptr->NumChannels, + mipmapped_array_desc_ptr->Format); + (*mipmapped_array_pptr)->type = image_type; + (*mipmapped_array_pptr)->width = mipmapped_array_desc_ptr->Width; + (*mipmapped_array_pptr)->height = mipmapped_array_desc_ptr->Height; + (*mipmapped_array_pptr)->depth = mipmapped_array_desc_ptr->Depth; + (*mipmapped_array_pptr)->min_mipmap_level = 0; + (*mipmapped_array_pptr)->max_mipmap_level = num_mipmap_levels; + (*mipmapped_array_pptr)->flags = mipmapped_array_desc_ptr->Flags; + (*mipmapped_array_pptr)->format = mipmapped_array_desc_ptr->Format; + + return hipSuccess; +} + +hipError_t ihipMipmappedArrayDestroy(hipMipmappedArray_t mipmapped_array_ptr) { + + if (mipmapped_array_ptr == nullptr) { + return hipErrorInvalidValue; + } + + cl_mem mem_obj = reinterpret_cast(mipmapped_array_ptr->data); + if (is_valid(mem_obj) == false) { + return hipErrorInvalidValue; + } + + for (auto& dev : g_devices) { + hip::Stream* stream = dev->NullStream(true); + if (stream != nullptr) { + stream->finish(); + } + } + + as_amd(mem_obj)->release(); + + delete mipmapped_array_ptr; + + return hipSuccess; +} + +hipError_t ihipMipmappedArrayGetLevel(hipArray_t* level_array_pptr, + hipMipmappedArray_t mipmapped_array_ptr, + unsigned int mip_level) { + + if (level_array_pptr == nullptr || mipmapped_array_ptr == nullptr) { + return hipErrorInvalidValue; + } + + // Convert the raw data to amd::Image + cl_mem cl_mem_obj = reinterpret_cast(mipmapped_array_ptr->data); + if (is_valid(cl_mem_obj) == false) { + return hipErrorInvalidValue; + } + + amd::Image* image = as_amd(cl_mem_obj)->asImage(); + if (image == nullptr) { + return hipErrorInvalidValue; + } + + // Create new hip Array parameter and create an image view with new mip level. + (*level_array_pptr) = new hipArray(); + (*level_array_pptr)->data = as_cl(image->createView(image->getContext(), + image->getImageFormat(), + NULL, mip_level, 0)); + + // Copy the new width, height & depth details of the flag to hipArray. + cl_mem cl_mip_mem_obj = reinterpret_cast((*level_array_pptr)->data); + if (is_valid(cl_mem_obj) == false) { + return hipErrorInvalidValue; + } + + // Fill the hip_array info from newly created amd::Image's view + amd::Image* mipmap_image = as_amd(cl_mip_mem_obj)->asImage(); + (*level_array_pptr)->width = mipmap_image->getWidth(); + (*level_array_pptr)->height = mipmap_image->getHeight(); + (*level_array_pptr)->depth = mipmap_image->getDepth(); + + const cl_mem_object_type image_type = hip::getCLMemObjectType((*level_array_pptr)->width, + (*level_array_pptr)->height, + (*level_array_pptr)->depth, + mipmapped_array_ptr->flags); + (*level_array_pptr)->type = image_type; + (*level_array_pptr)->Format = mipmapped_array_ptr->format; + (*level_array_pptr)->desc = mipmapped_array_ptr->desc; + (*level_array_pptr)->NumChannels = hip::getNumChannels((*level_array_pptr)->desc); + (*level_array_pptr)->isDrv = 0; + (*level_array_pptr)->textureType = 0; + + return hipSuccess; +} + +hipError_t hipMipmappedArrayCreate(hipMipmappedArray_t* mipmapped_array_pptr, + HIP_ARRAY3D_DESCRIPTOR* mipmapped_array_desc_ptr, + unsigned int num_mipmap_levels) { + HIP_INIT_API(hipMipmappedArrayCreate, mipmapped_array_pptr, mipmapped_array_desc_ptr, + num_mipmap_levels); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipMipmapArrayCreate(mipmapped_array_pptr, mipmapped_array_desc_ptr, + num_mipmap_levels)); +} + +hipError_t hipMipmappedArrayDestroy(hipMipmappedArray_t mipmapped_array_ptr) { + HIP_INIT_API(hipMipmappedArrayDestroy, mipmapped_array_ptr); + CHECK_STREAM_CAPTURE_SUPPORTED(); + HIP_RETURN(ihipMipmappedArrayDestroy(mipmapped_array_ptr)); +} + +hipError_t hipMipmappedArrayGetLevel(hipArray_t* level_array_pptr, + hipMipmappedArray_t mipmapped_array_ptr, + unsigned int mip_level) { + HIP_INIT_API(hipMipmappedArrayGetLevel, level_array_pptr, mipmapped_array_ptr, mip_level); + + HIP_RETURN(ihipMipmappedArrayGetLevel(level_array_pptr, mipmapped_array_ptr, mip_level)); +} + diff --git a/projects/clr/hipamd/src/hip_mempool.cpp b/projects/clr/hipamd/src/hip_mempool.cpp new file mode 100644 index 0000000000..f798f8c813 --- /dev/null +++ b/projects/clr/hipamd/src/hip_mempool.cpp @@ -0,0 +1,296 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "hip_mempool_impl.hpp" + +/** + * API interfaces + */ +extern hipError_t ihipFree(void* ptr); + +// ================================================================================================ +hipError_t hipDeviceGetDefaultMemPool(hipMemPool_t* mem_pool, int device) { + HIP_INIT_API(hipDeviceGetDefaultMemPool, mem_pool, device); + if (mem_pool == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (device < 0 || device >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + *mem_pool = reinterpret_cast(g_devices[device]->GetDefaultMemoryPool()); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipDeviceSetMemPool(int device, hipMemPool_t mem_pool) { + HIP_INIT_API(hipDeviceSetMemPool, device, mem_pool); + if ((mem_pool == nullptr) || (device >= g_devices.size())) { + HIP_RETURN(hipErrorInvalidValue); + } + + auto poolDevice = reinterpret_cast(mem_pool)->Device(); + if (poolDevice->deviceId() != device) { + HIP_RETURN(hipErrorInvalidDevice); + } + + g_devices[device]->SetCurrentMemoryPool(reinterpret_cast(mem_pool)); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool, int device) { + HIP_INIT_API(hipDeviceGetMemPool, mem_pool, device); + if ((mem_pool == nullptr) || (device >= g_devices.size())) { + HIP_RETURN(hipErrorInvalidValue); + } + *mem_pool = reinterpret_cast(g_devices[device]->GetCurrentMemoryPool()); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipStream_t stream) { + HIP_INIT_API(hipMallocAsync, dev_ptr, size, stream); + if ((dev_ptr == nullptr) || (size == 0) || (!hip::isValid(stream))) { + HIP_RETURN(hipErrorInvalidValue); + } + auto hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : + reinterpret_cast(stream); + auto device = hip_stream->GetDevice(); + auto mem_pool = device->GetCurrentMemoryPool(); + + STREAM_CAPTURE(hipMallocAsync, stream, reinterpret_cast(mem_pool), size, dev_ptr); + + *dev_ptr = mem_pool->AllocateMemory(size, hip_stream); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipFreeAsync(void* dev_ptr, hipStream_t stream) { + HIP_INIT_API(hipFreeAsync, dev_ptr, stream); + if ((dev_ptr == nullptr) || (!hip::isValid(stream))) { + HIP_RETURN(hipErrorInvalidValue); + } + STREAM_CAPTURE(hipFreeAsync, stream, dev_ptr); + size_t offset = 0; + auto memory = getMemoryObject(dev_ptr, offset); + if (memory != nullptr) { + auto id = memory->getUserData().deviceId; + auto hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : + reinterpret_cast(stream); + if (!g_devices[id]->FreeMemory(memory, hip_stream)) { + //! @todo It's not the most optimal logic. The current implementation has unconditional waits + HIP_RETURN(ihipFree(dev_ptr)); + } + } + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemPoolTrimTo(hipMemPool_t mem_pool, size_t min_bytes_to_hold) { + HIP_INIT_API(hipMemPoolTrimTo, mem_pool, min_bytes_to_hold); + if (mem_pool == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hip::MemoryPool* hip_mem_pool = reinterpret_cast(mem_pool); + hip_mem_pool->TrimTo(min_bytes_to_hold); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemPoolSetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value) { + HIP_INIT_API(hipMemPoolSetAttribute, mem_pool, attr, value); + if (mem_pool == nullptr || value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + auto hip_mem_pool = reinterpret_cast(mem_pool); + HIP_RETURN(hip_mem_pool->SetAttribute(attr, value)); +} + +// ================================================================================================ +hipError_t hipMemPoolGetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value) { + HIP_INIT_API(hipMemPoolGetAttribute, mem_pool, attr, value); + if (mem_pool == nullptr || value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + auto hip_mem_pool = reinterpret_cast(mem_pool); + HIP_RETURN(hip_mem_pool->GetAttribute(attr, value)); +} + +// ================================================================================================ +hipError_t hipMemPoolSetAccess( + hipMemPool_t mem_pool, + const hipMemAccessDesc* desc_list, + size_t count) { + HIP_INIT_API(hipMemPoolSetAccess, mem_pool, desc_list, count); + if ((mem_pool == nullptr) || (desc_list == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + auto hip_mem_pool = reinterpret_cast(mem_pool); + for (int i = 0; i < count; ++i) { + if (desc_list[i].location.type == hipMemLocationTypeDevice) { + if (desc_list[i].location.id >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + if (desc_list[i].flags > hipMemAccessFlagsProtReadWrite) { + HIP_RETURN(hipErrorInvalidValue); + } + auto device = g_devices[desc_list[i].location.id]; + hip_mem_pool->SetAccess(device, desc_list[i].flags); + } else { + HIP_RETURN(hipErrorInvalidValue); + } + } + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemPoolGetAccess( + hipMemAccessFlags* flags, + hipMemPool_t mem_pool, + hipMemLocation* location) { + HIP_INIT_API(hipMemPoolGetAccess, flags, mem_pool, location); + if ((mem_pool == nullptr) || (location == nullptr) || (flags == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + auto hip_mem_pool = reinterpret_cast(mem_pool); + if (location->type == hipMemLocationTypeDevice) { + if (location->id >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + auto device = g_devices[location->id]; + hip_mem_pool->GetAccess(device, flags); + } else { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_props) { + HIP_INIT_API(hipMemPoolCreate, mem_pool, pool_props); + if (mem_pool == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + // validate hipMemAllocationType value + if (pool_props->allocType != hipMemAllocationTypePinned) { + HIP_RETURN(hipErrorInvalidValue); + } + // Make sure the pool creation occurs on a valid device + if ((pool_props->location.type != hipMemLocationTypeDevice) || + (pool_props->location.id >= g_devices.size())) { + HIP_RETURN(hipErrorInvalidValue); + } + auto device = g_devices[pool_props->location.id]; + auto pool = new hip::MemoryPool(device); + if (pool == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *mem_pool = reinterpret_cast(pool); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemPoolDestroy(hipMemPool_t mem_pool) { + HIP_INIT_API(hipMemPoolDestroy, mem_pool); + if (mem_pool == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hip::MemoryPool* hip_mem_pool = reinterpret_cast(mem_pool); + hip_mem_pool->ReleaseFreedMemory(); + + auto device = hip_mem_pool->Device(); + + // Force default pool if the current one is destroyed + if (hip_mem_pool == device->GetCurrentMemoryPool()) { + device->SetCurrentMemoryPool(device->GetDefaultMemoryPool()); + } + + hip_mem_pool->release(); + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMallocFromPoolAsync( + void** dev_ptr, + size_t size, + hipMemPool_t mem_pool, + hipStream_t stream) { + HIP_INIT_API(hipMallocFromPoolAsync, dev_ptr, size, mem_pool, stream); + if ((dev_ptr == nullptr) || (size == 0) || (mem_pool == nullptr) || (!hip::isValid(stream))) { + HIP_RETURN(hipErrorInvalidValue); + } + STREAM_CAPTURE(hipMallocAsync, stream, mem_pool, size, dev_ptr); + + auto mpool = reinterpret_cast(mem_pool); + auto hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : + reinterpret_cast(stream); + *dev_ptr = mpool->AllocateMemory(size, hip_stream); + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipMemPoolExportToShareableHandle( + void* shared_handle, + hipMemPool_t mem_pool, + hipMemAllocationHandleType handle_type, + unsigned int flags) { + HIP_INIT_API(hipMemPoolExportToShareableHandle, shared_handle, mem_pool, handle_type, flags); + if (mem_pool == nullptr || shared_handle == nullptr || flags == -1) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(hipErrorNotSupported); +} + +// ================================================================================================ +hipError_t hipMemPoolImportFromShareableHandle( + hipMemPool_t* mem_pool, + void* shared_handle, + hipMemAllocationHandleType handle_type, + unsigned int flags) { + HIP_INIT_API(hipMemPoolImportFromShareableHandle, mem_pool, shared_handle, handle_type, flags); + if (mem_pool == nullptr || shared_handle == nullptr || flags == -1) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(hipErrorNotSupported); +} + +// ================================================================================================ +hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* ptr) { + HIP_INIT_API(hipMemPoolExportPointer, export_data, ptr); + if (export_data == nullptr || ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + HIP_RETURN(hipErrorNotSupported); +} + +// ================================================================================================ +hipError_t hipMemPoolImportPointer( + void** ptr, + hipMemPool_t mem_pool, + hipMemPoolPtrExportData* export_data) { + HIP_INIT_API(hipMemPoolImportPointer, ptr, mem_pool, export_data); + if (mem_pool == nullptr || export_data == nullptr || ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/projects/clr/hipamd/src/hip_mempool_impl.cpp b/projects/clr/hipamd/src/hip_mempool_impl.cpp new file mode 100644 index 0000000000..2606688ff0 --- /dev/null +++ b/projects/clr/hipamd/src/hip_mempool_impl.cpp @@ -0,0 +1,406 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "hip_mempool_impl.hpp" + +namespace hip { + +// ================================================================================================ +void Heap::AddMemory(amd::Memory* memory, hip::Stream* stream) { + allocations_.insert({memory, {stream, nullptr}}); + total_size_ += memory->getSize(); + max_total_size_ = std::max(max_total_size_, total_size_); +} + +// ================================================================================================ +void Heap::AddMemory(amd::Memory* memory, const MemoryTimestamp& ts) { + allocations_.insert({memory, ts}); + total_size_ += memory->getSize(); + max_total_size_ = std::max(max_total_size_, total_size_); +} + +// ================================================================================================ +amd::Memory* Heap::FindMemory(size_t size, hip::Stream* stream, bool opportunistic, void* dptr) { + amd::Memory* memory = nullptr; + for (auto it = allocations_.begin(); it != allocations_.end();) { + bool check_address = (dptr == nullptr) || (it->first->getSvmPtr() == dptr); + // Check if size can match and it's safe to use this resource + if ((it->first->getSize() >= size) && check_address && + (it->second.IsSafeFind(stream, opportunistic))) { + memory = it->first; + total_size_ -= memory->getSize(); + // Remove found allocation from the map + it = allocations_.erase(it); + break; + } else { + ++it; + } + } + return memory; +} + +// ================================================================================================ +bool Heap::RemoveMemory(amd::Memory* memory, MemoryTimestamp* ts) { + if (auto it = allocations_.find(memory); it != allocations_.end()) { + if (ts != nullptr) { + // Preserve timestamp info for possible reuse later + *ts = it->second; + } else { + // Runtime will delete the timestamp object, hence make sure HIP event is released + it->second.Wait(); + it->second.SetEvent(nullptr); + } + total_size_ -= memory->getSize(); + allocations_.erase(it); + return true; + } + return false; +} + +// ================================================================================================ +std::unordered_map::iterator +Heap::EraseAllocaton(std::unordered_map::iterator& it) { + const device::Memory* dev_mem = it->first->getDeviceMemory(*device_->devices()[0]); + amd::SvmBuffer::free(it->first->getContext(), reinterpret_cast(dev_mem->virtualAddress())); + total_size_ -= it->first->getSize(); + // Clear HIP event + it->second.SetEvent(nullptr); + // Remove the allocation from the map + return allocations_.erase(it); +} + +// ================================================================================================ +bool Heap::ReleaseAllMemory(size_t min_bytes_to_hold, bool safe_release) { + for (auto it = allocations_.begin(); it != allocations_.end();) { + // Make sure the heap is smaller than the minimum value to hold + if (total_size_ <= min_bytes_to_hold) { + return true; + } + // Safe release forces unconditional wait for memory + if (safe_release) { + it->second.Wait(); + } + if (it->second.IsSafeRelease()) { + it = EraseAllocaton(it); + } else { + ++it; + } + } + return true; +} + +// ================================================================================================ +bool Heap::ReleaseAllMemory(hip::Stream* stream) { + for (auto it = allocations_.begin(); it != allocations_.end();) { + // Make sure the heap holds the minimum number of bytes + if (total_size_ <= release_threshold_) { + return true; + } + if (it->second.IsSafeRelease()) { + it = EraseAllocaton(it); + } else { + ++it; + } + } + return true; +} + +// ================================================================================================ +void Heap::RemoveStream(hip::Stream* stream) { + for (auto it : allocations_) { + it.second.safe_streams_.erase(stream); + } +} + +// ================================================================================================ +void Heap::SetAccess(hip::Device* device, bool enable) { + for (const auto& it : allocations_) { + auto peer_device = device->asContext()->devices()[0]; + device::Memory* mem = it.first->getDeviceMemory(*peer_device); + if (mem != nullptr) { + if (!mem->getAllowedPeerAccess() && enable) { + // Enable p2p access for the specified device + peer_device->allowPeerAccess(mem); + mem->setAllowedPeerAccess(true); + } else if (mem->getAllowedPeerAccess() && !enable) { + mem->setAllowedPeerAccess(false); + } + } else { + LogError("Couldn't find device memory for P2P access"); + } + } +} + +// ================================================================================================ +void* MemoryPool::AllocateMemory(size_t size, hip::Stream* stream, void* dptr) { + amd::ScopedLock lock(lock_pool_ops_); + + void* dev_ptr = nullptr; + amd::Memory* memory = free_heap_.FindMemory(size, stream, Opportunistic(), dptr); + if (memory == nullptr) { + amd::Context* context = device_->asContext(); + const auto& dev_info = context->devices()[0]->info(); + if (dev_info.maxMemAllocSize_ < size) { + return nullptr; + } + + dev_ptr = amd::SvmBuffer::malloc(*context, 0, size, dev_info.memBaseAddrAlign_, nullptr); + if (dev_ptr == nullptr) { + size_t free = 0, total =0; + hipError_t err = hipMemGetInfo(&free, &total); + if (err == hipSuccess) { + LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu | total :%zu \n", + size, free, total); + } + return nullptr; + } + + size_t offset = 0; + memory = getMemoryObject(dev_ptr, offset); + // Saves the current device id so that it can be accessed later + memory->getUserData().deviceId = device_->deviceId(); + + // Update access for the new allocation from other devices + for (const auto& it : access_map_) { + auto vdi_device = it.first->asContext()->devices()[0]; + device::Memory* mem = memory->getDeviceMemory(*vdi_device); + if ((mem != nullptr) && (it.second != hipMemAccessFlagsProtNone)) { + vdi_device->allowPeerAccess(mem); + mem->setAllowedPeerAccess(true); + } + } + } else { + free_heap_.RemoveMemory(memory); + const device::Memory* dev_mem = memory->getDeviceMemory(*device_->devices()[0]); + dev_ptr = reinterpret_cast(dev_mem->virtualAddress()); + } + // Place the allocated memory into the busy heap + busy_heap_.AddMemory(memory, stream); + + // Increment the reference counter on the pool + retain(); + + return dev_ptr; +} + +// ================================================================================================ +bool MemoryPool::FreeMemory(amd::Memory* memory, hip::Stream* stream) { + amd::ScopedLock lock(lock_pool_ops_); + + MemoryTimestamp ts; + // Remove memory object from the busy pool + if (!busy_heap_.RemoveMemory(memory, &ts)) { + // This pool doesn't contain memory + return false; + } + + if (stream != nullptr) { + // The stream of destruction is a safe stream, because the app must handle sync + ts.AddSafeStream(stream); + + // Add a marker to the stream to trace availability of this memory + Event* e = new hip::Event(0); + if (e != nullptr) { + if (hipSuccess == e->addMarker(reinterpret_cast(stream), nullptr, true)) { + ts.SetEvent(e); + } + } + } else { + // Assume a safe release from hipFree() if stream is nullptr + ts.SetEvent(nullptr); + } + free_heap_.AddMemory(memory, ts); + + // Decrement the reference counter on the pool + release(); + + return true; +} + +// ================================================================================================ +void MemoryPool::ReleaseAllMemory() { + constexpr bool kSafeRelease = true; + free_heap_.ReleaseAllMemory(0, kSafeRelease); + busy_heap_.ReleaseAllMemory(0, kSafeRelease); +} + +// ================================================================================================ +void MemoryPool::ReleaseFreedMemory(hip::Stream* stream) { + amd::ScopedLock lock(lock_pool_ops_); + + free_heap_.ReleaseAllMemory(stream); +} + +// ================================================================================================ +void MemoryPool::RemoveStream(hip::Stream* stream) { + amd::ScopedLock lock(lock_pool_ops_); + + free_heap_.RemoveStream(stream); +} + +// ================================================================================================ +void MemoryPool::TrimTo(size_t min_bytes_to_hold) { + amd::ScopedLock lock(lock_pool_ops_); + + free_heap_.ReleaseAllMemory(min_bytes_to_hold); +} + +// ================================================================================================ +hipError_t MemoryPool::SetAttribute(hipMemPoolAttr attr, void* value) { + amd::ScopedLock lock(lock_pool_ops_); + uint64_t reset; + + switch (attr) { + case hipMemPoolReuseFollowEventDependencies: + // Enable/disable HIP events tracking from the app's dependencies + state_.event_dependencies_ = *reinterpret_cast(value); + break; + case hipMemPoolReuseAllowOpportunistic: + // Enable/disable HIP event check for freed memory + state_.opportunistic_ = *reinterpret_cast(value); + break; + case hipMemPoolReuseAllowInternalDependencies: + // Enable/disable internal extra dependencies introduced in runtime + state_.internal_dependencies_ = *reinterpret_cast(value); + break; + case hipMemPoolAttrReleaseThreshold: + free_heap_.SetReleaseThreshold(*reinterpret_cast(value)); + break; + case hipMemPoolAttrReservedMemCurrent: + // Should be GetAttribute only + return hipErrorInvalidValue; + break; + case hipMemPoolAttrReservedMemHigh: + reset = *reinterpret_cast(value); + // Only 0 is accepted + if (reset != 0) { + return hipErrorInvalidValue; + } + free_heap_.SetMaxTotalSize(reset); + case hipMemPoolAttrUsedMemCurrent: + // Should be GetAttribute only + return hipErrorInvalidValue; + break; + case hipMemPoolAttrUsedMemHigh: + reset = *reinterpret_cast(value); + // Only 0 is accepted + if (reset != 0) { + return hipErrorInvalidValue; + } + busy_heap_.SetMaxTotalSize(reset); + break; + default: + return hipErrorInvalidValue; + } + return hipSuccess; +} + +// ================================================================================================ +hipError_t MemoryPool::GetAttribute(hipMemPoolAttr attr, void* value) { + amd::ScopedLock lock(lock_pool_ops_); + + switch (attr) { + case hipMemPoolReuseFollowEventDependencies: + // Enable/disable HIP events tracking from the app's dependencies + *reinterpret_cast(value) = EventDependencies(); + break; + case hipMemPoolReuseAllowOpportunistic: + // Enable/disable HIP event check for freed memory + *reinterpret_cast(value) = Opportunistic(); + break; + case hipMemPoolReuseAllowInternalDependencies: + // Enable/disable internal extra dependencies introduced in runtime + *reinterpret_cast(value) = InternalDependencies(); + break; + case hipMemPoolAttrReleaseThreshold: + *reinterpret_cast(value) = free_heap_.GetReleaseThreshold(); + break; + case hipMemPoolAttrReservedMemCurrent: + // All allocate memory by the pool in OS + *reinterpret_cast(value) = busy_heap_.GetTotalSize() + free_heap_.GetTotalSize(); + break; + case hipMemPoolAttrReservedMemHigh: + // High watermark of all allocated memory in OS, since the last reset + *reinterpret_cast(value) = busy_heap_.GetTotalSize() + free_heap_.GetMaxTotalSize(); + break; + case hipMemPoolAttrUsedMemCurrent: + // Total currently used memory by the pool + *reinterpret_cast(value) = busy_heap_.GetTotalSize(); + break; + case hipMemPoolAttrUsedMemHigh: + // High watermark of all used memoryS, since the last reset + *reinterpret_cast(value) = busy_heap_.GetMaxTotalSize(); + break; + default: + return hipErrorInvalidValue; + } + return hipSuccess; +} + +// ================================================================================================ +void MemoryPool::SetAccess(hip::Device* device, hipMemAccessFlags flags) { + amd::ScopedLock lock(lock_pool_ops_); + + // Check if the requested device is the pool device where memory was allocated + if (device == device_) { + return; + } + + hipMemAccessFlags current_flags = hipMemAccessFlagsProtNone; + + // Check if access was enabled before + if (access_map_.find(device) != access_map_.end()) { + current_flags = access_map_[device]; + } + + if (current_flags != flags) { + bool enable_access = false; + // Save the access state in the device map + access_map_[device] = flags; + // Check if access is enabled + if ((flags == hipMemAccessFlagsProtRead) || (flags == hipMemAccessFlagsProtReadWrite)) { + enable_access = true; + } + // Update device access on the both pools + busy_heap_.SetAccess(device, enable_access); + free_heap_.SetAccess(device, enable_access); + } +} + +// ================================================================================================ +void MemoryPool::GetAccess(hip::Device* device, hipMemAccessFlags* flags) { + amd::ScopedLock lock(lock_pool_ops_); + + // Current pool device has full access to memory allocation + *flags = (device == device_) ? hipMemAccessFlagsProtReadWrite : hipMemAccessFlagsProtNone; + + // Check if access was enabled before + if (access_map_.find(device) != access_map_.end()) { + *flags = access_map_[device]; + } +} + +void MemoryPool::FreeAllMemory(hip::Stream* stream) { + while (!busy_heap_.Allocations().empty()) { + FreeMemory(busy_heap_.Allocations().begin()->first, stream); + } +} + +} diff --git a/projects/clr/hipamd/src/hip_mempool_impl.hpp b/projects/clr/hipamd/src/hip_mempool_impl.hpp new file mode 100644 index 0000000000..5e18cb3599 --- /dev/null +++ b/projects/clr/hipamd/src/hip_mempool_impl.hpp @@ -0,0 +1,242 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once + +#include +#include "hip_event.hpp" +#include "hip_internal.hpp" +#include +#include + +namespace hip { + +class Device; +class Stream; + +struct MemoryTimestamp { + MemoryTimestamp(hip::Stream* stream, hip::Event* event = nullptr): event_(event) { + safe_streams_.insert(stream); + } + MemoryTimestamp(): event_(nullptr) {} + + /// Adds a safe stream to the list of stream for possible reuse + void AddSafeStream(hip::Stream* stream) { + if (safe_streams_.find(stream) != safe_streams_.end()) { + safe_streams_.insert(stream); + } + } + /// Changes last known valid event asociated with memory + void SetEvent(hip::Event* event) { + delete event_; + event_ = event; + } + /// Wait for memory to be available + void Wait() { + if (event_ != nullptr) { + auto hip_error = event_->synchronize(); + } + } + /// Returns if memory object is safe for reuse + bool IsSafeFind(hip::Stream* stream = nullptr, bool opportunistic = true) { + bool result = false; + if (safe_streams_.find(stream) != safe_streams_.end()) { + // A safe stream doesn't require TS validation + result = true; + } else if (opportunistic && (event_ != nullptr)) { + // Check HIP event for a retired status + result = (event_->query() == hipSuccess) ? true : false; + } + return result; + } + /// Returns if memory object is safe for reuse + bool IsSafeRelease() { + bool result = true; + if (event_ != nullptr) { + // Check HIP event for a retired status + result = (event_->query() == hipSuccess) ? true : false; + } + return result; + } + + std::unordered_set safe_streams_; //!< Safe streams for memory reuse + hip::Event* event_; //!< Last known HIP event, associated with the memory object +}; + +class Heap : public amd::EmbeddedObject { +public: + Heap(hip::Device* device): + total_size_(0), max_total_size_(0), release_threshold_(0), device_(device) {} + ~Heap() {} + + /// Adds allocation into the heap on a specific stream + void AddMemory(amd::Memory* memory, hip::Stream* stream); + + /// Adds allocation into the heap with specific TS + void AddMemory(amd::Memory* memory, const MemoryTimestamp& ts); + + /// Finds memory object with the specified size + amd::Memory* FindMemory(size_t size, hip::Stream* stream, bool opportunistic, void* dptr = nullptr); + + /// Removes allocation from the map + bool RemoveMemory(amd::Memory* memory, MemoryTimestamp* ts = nullptr); + + /// Releases all memory, until the threshold value is met + bool ReleaseAllMemory(size_t min_bytes_to_hold = std::numeric_limits::max(), bool safe_release = false); + + /// Releases all memory, safe to the provided stream, until the threshold value is met + bool ReleaseAllMemory(hip::Stream* stream); + + /// Remove the provided stream from the safe list + void RemoveStream(hip::Stream* stream); + + /// Enables P2P access to the provided device + void SetAccess(hip::Device* device, bool enable); + + /// Heap doesn't have any allocations + bool IsEmpty() const { return (allocations_.size() == 0) ? true : false; } + + /// Set the memory release threshold + void SetReleaseThreshold(uint64_t value) { release_threshold_ = value; } + + /// Set the memory release threshold + uint64_t GetReleaseThreshold() const { return release_threshold_; } + + /// Get the size of all allocations in the heap + uint64_t GetTotalSize() const { return total_size_; } + + /// Get the size of all allocations in the heap + uint64_t GetMaxTotalSize() const { return max_total_size_; } + + /// Set maximum total, allocated by the heap + void SetMaxTotalSize(uint64_t value) { max_total_size_ = value; } + + /// Erases single allocation form the heap's map + std::unordered_map::iterator EraseAllocaton( + std::unordered_map::iterator& it); + + /// Checks if memory belongs to this heap + bool IsActiveMemory(amd::Memory* memory) const { + return (allocations_.find(memory) != allocations_.end()); + } + const auto& Allocations() { return allocations_; } +private: + Heap() = delete; + Heap(const Heap&) = delete; + Heap& operator=(const Heap&) = delete; + + std::unordered_map allocations_; //!< Map of allocations on a specific stream + uint64_t total_size_; //!< Size of all allocations in the heap + uint64_t max_total_size_; //!< Maximum heap allocation size + uint64_t release_threshold_; //!< Threshold size in bytes for memory release from heap, default 0 + + hip::Device* device_; //!< Hip device the allocations will reside +}; + +/// Allocates memory in the pool on the specified stream and places the allocation into busy_heap_ +/// @note: the logic also will look in free_heap for possible reuse. +/// hipMemPoolReuseAllowOpportunistic option will validate if HIP event, +/// associated with memory is done, then reuse can be performed. +class MemoryPool : public amd::ReferenceCountedObject { +public: + MemoryPool(hip::Device* device): + busy_heap_(device), + free_heap_(device), + lock_pool_ops_("Pool operations", true), device_(device) { + device_->AddMemoryPool(this); + state_.event_dependencies_ = 1; + state_.opportunistic_ = 1; + state_.internal_dependencies_ = 1; + } + virtual ~MemoryPool() { + if (!busy_heap_.IsEmpty()) { + LogError("Shouldn't destroy pool with busy allocations!"); + } + ReleaseAllMemory(); + // Remove memory pool from the list of all pool on the current device + device_->RemoveMemoryPool(this); + } + + /// The same stream can reuse memory without HIP event validation + void* AllocateMemory(size_t size, hip::Stream* stream, void* dptr = nullptr); + + /// Frees memory by placing memory object with HIP event into free_heap_ + bool FreeMemory(amd::Memory* memory, hip::Stream* stream); + + /// Check if memory is active and belongs to the busy heap + bool IsBusyMemory(amd::Memory* memory) const { + return busy_heap_.IsActiveMemory(memory); + } + + /// Releases all allocations from free_heap_. It can be called on Stream or Device synchronization + /// @note The caller must make sure it's safe to release memory + void ReleaseFreedMemory(hip::Stream* stream = nullptr); + + /// Removes a stream from tracking + void RemoveStream(hip::Stream* stream); + + /// Releases all allocations in MemoryPool + void ReleaseAllMemory(); + + /// Trims the pool until it has only min_bytes_to_hold + void TrimTo(size_t min_bytes_to_hold); + + /// Trims the pool until it has only min_bytes_to_hold + hip::Device* Device() const { return device_; } + + /// Set memory pool control attributes + hipError_t SetAttribute(hipMemPoolAttr attr, void* value); + + /// Get memory pool control attributes + hipError_t GetAttribute(hipMemPoolAttr attr, void* value); + + /// Set memory pool access by different devices + void SetAccess(hip::Device* device, hipMemAccessFlags flags); + + /// Set memory pool access by different devices + void GetAccess(hip::Device* device, hipMemAccessFlags* flags); + + /// Frees all busy memory + void FreeAllMemory(hip::Stream* stream = nullptr); + + /// Accessors for the pool state + bool EventDependencies() const { return (state_.event_dependencies_) ? true : false; } + bool Opportunistic() const { return (state_.opportunistic_) ? true : false; } + bool InternalDependencies() const { return (state_.internal_dependencies_) ? true : false; } + +private: + MemoryPool() = delete; + MemoryPool(const MemoryPool&) = delete; + MemoryPool& operator=(const MemoryPool&) = delete; + + Heap busy_heap_; //!< Heap of busy allocations + Heap free_heap_; //!< Heap of freed allocations + struct { + uint32_t event_dependencies_ : 1; //!< Event dependencies tracking is enabled + uint32_t opportunistic_ : 1; //!< HIP event check is enabled + uint32_t internal_dependencies_ : 1; //!< Runtime adds internal events to handle memory dependencies + } state_; + + amd::Monitor lock_pool_ops_; //!< Access to the pool must be lock protected + std::map access_map_; //!< Map of access to the pool from devices + hip::Device* device_; //!< Hip device the heap will reside +}; + +} // Mamespace hip diff --git a/projects/clr/hipamd/src/hip_module.cpp b/projects/clr/hipamd/src/hip_module.cpp new file mode 100644 index 0000000000..83bd08be50 --- /dev/null +++ b/projects/clr/hipamd/src/hip_module.cpp @@ -0,0 +1,836 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include + +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "hip_event.hpp" +#include "hip_platform.hpp" + +hipError_t ihipModuleLoadData(hipModule_t* module, const void* mmap_ptr, size_t mmap_size); + +extern hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags); + +const std::string& FunctionName(const hipFunction_t f) { + return hip::DeviceFunc::asFunction(f)->kernel()->name(); +} + +static uint64_t ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } + +hipError_t hipModuleUnload(hipModule_t hmod) { + HIP_INIT_API(hipModuleUnload, hmod); + if (hmod == nullptr) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + HIP_RETURN(PlatformState::instance().unloadModule(hmod)); +} + +hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { + HIP_INIT_API(hipModuleLoad, module, fname); + + HIP_RETURN(PlatformState::instance().loadModule(module, fname)); +} + +hipError_t hipModuleLoadData(hipModule_t* module, const void* image) { + HIP_INIT_API(hipModuleLoadData, module, image); + HIP_RETURN(PlatformState::instance().loadModule(module, 0, image)); +} + +hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, + hipJitOption* options, void** optionsValues) { + /* TODO: Pass options to Program */ + HIP_INIT_API(hipModuleLoadDataEx, module, image); + HIP_RETURN(PlatformState::instance().loadModule(module, 0, image)); +} + +extern hipError_t __hipExtractCodeObjectFromFatBinary( + const void* data, const std::vector& devices, + std::vector>& code_objs); + +hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { + HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); + + if (hfunc == nullptr || name == nullptr || strlen(name) == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + if (hmod == nullptr) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + if (hipSuccess != PlatformState::instance().getDynFunc(hfunc, hmod, name)) { + LogPrintfError("Cannot find the function: %s for module: 0x%x \n", name, hmod); + HIP_RETURN(hipErrorNotFound); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name) { + HIP_INIT_API(hipModuleGetGlobal, dptr, bytes, hmod, name); + + if (dptr == nullptr || bytes == nullptr) { + // If either is nullptr, ignore it + HIP_RETURN(hipSuccess); + } + if ((dptr == nullptr && bytes == nullptr) || name == nullptr || strlen(name) == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + if (hmod == nullptr) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + /* Get address and size for the global symbol */ + if (hipSuccess != PlatformState::instance().getDynGlobalVar(name, hmod, dptr, bytes)) { + LogPrintfError("Cannot find global Var: %s for module: 0x%x at device: %d \n", name, hmod, + ihipGetDevice()); + HIP_RETURN(hipErrorNotFound); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc) { + HIP_INIT_API(hipFuncGetAttribute, value, attrib, hfunc); + + if ((value == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(hfunc); + if (function == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::Kernel* kernel = function->kernel(); + if (kernel == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + const device::Kernel::WorkGroupInfo* wrkGrpInfo = + kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo(); + if (wrkGrpInfo == nullptr) { + HIP_RETURN(hipErrorMissingConfiguration); + } + + switch (attrib) { + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + *value = static_cast(wrkGrpInfo->size_); + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->privateMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + *value = static_cast(wrkGrpInfo->usedVGPRs_); + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + *value = 30; // Defaults to 3.0 as HCC + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + *value = static_cast(kernel->signature().version()); + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->availableLDSSize_ - wrkGrpInfo->localMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + *value = 0; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { + HIP_INIT_API(hipFuncGetAttributes, attr, func); + + HIP_RETURN_ONFAIL(PlatformState::instance().getStatFuncAttr(attr, func, ihipGetDevice())); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) { + HIP_INIT_API(hipFuncSetAttribute, func, attr, value); + + // No way to set function attribute yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { + HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig); + + // No way to set cache config yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) { + HIP_INIT_API(hipFuncSetSharedMemConfig, func, config); + + // No way to set Shared Memory config function yet. + + HIP_RETURN(hipSuccess); +} + +hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, void** kernelParams, void** extra, + int deviceId, uint32_t params = 0) { + if (f == nullptr) { + LogPrintfError("%s", "Function passed is null"); + return hipErrorInvalidImage; + } + if ((kernelParams != nullptr) && (extra != nullptr)) { + LogPrintfError("%s", + "Both, kernelParams and extra Params are provided, only one should be provided"); + return hipErrorInvalidValue; + } + if (globalWorkSizeX == 0 || globalWorkSizeY == 0 || globalWorkSizeZ == 0 || blockDimX == 0 || + blockDimY == 0 || blockDimZ == 0) { + return hipErrorInvalidValue; + } + + const amd::Device* device = g_devices[deviceId]->devices()[0]; + const auto& info = device->info(); + if (sharedMemBytes > info.localMemSizePerCU_) { //sharedMemPerBlock + return hipErrorInvalidValue; + } + // Make sure dispatch doesn't exceed max workgroup size limit + if (blockDimX * blockDimY * blockDimZ > device->info().maxWorkGroupSize_) { + return hipErrorInvalidValue; + } + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(f); + amd::Kernel* kernel = function->kernel(); + if (!kernel->getDeviceKernel(*device)) { + return hipErrorInvalidDevice; + } + // Make sure the launch params are not larger than if specified launch_bounds + // If it exceeds, then return a failure + if (blockDimX * blockDimY * blockDimZ > + kernel->getDeviceKernel(*device)->workGroupInfo()->size_) { + LogPrintfError("Launch params (%u, %u, %u) are larger than launch bounds (%lu) for kernel %s", + blockDimX, blockDimY, blockDimZ, + kernel->getDeviceKernel(*device)->workGroupInfo()->size_, + function->name().c_str()); + return hipErrorLaunchFailure; + } + + if (params & amd::NDRangeKernelCommand::CooperativeGroups) { + if (!device->info().cooperativeGroups_) { + return hipErrorLaunchFailure; + } + int num_blocks = 0; + int max_blocks_per_grid = 0; + int best_block_size = 0; + int block_size = blockDimX * blockDimY * blockDimZ; + hipError_t err = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, *device, f, block_size, sharedMemBytes, + true); + if (err != hipSuccess) { + return err; + } + if (((globalWorkSizeX * globalWorkSizeY * globalWorkSizeZ) / block_size) > + unsigned(max_blocks_per_grid)) { + return hipErrorCooperativeLaunchTooLarge; + } + } + if (params & amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups) { + if (!device->info().cooperativeMultiDeviceGroups_) { + return hipErrorLaunchFailure; + } + } + address kernargs = nullptr; + // 'extra' is a struct that contains the following info: { + // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, + // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, + // HIP_LAUNCH_PARAM_END } + if (extra != nullptr) { + if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE || + extra[4] != HIP_LAUNCH_PARAM_END) { + return hipErrorInvalidValue; + } + kernargs = reinterpret_cast
(extra[1]); + } + + const amd::KernelSignature& signature = kernel->signature(); + for (size_t i = 0; i < signature.numParameters(); ++i) { + const amd::KernelParameterDescriptor& desc = signature.at(i); + if (kernelParams == nullptr) { + assert(kernargs != nullptr); + kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, + desc.type_ == T_POINTER /*svmBound*/); + } else { + assert(extra == nullptr); + kernel->parameters().set(i, desc.size_, kernelParams[i], + desc.type_ == T_POINTER /*svmBound*/); + } + } + return hipSuccess; +} + +hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, + uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, + uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, uint32_t sharedMemBytes, + hip::Stream* stream, void** kernelParams, void** extra, + hipEvent_t startEvent = nullptr, hipEvent_t stopEvent = nullptr, + uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, + uint32_t numGrids = 0, uint64_t prevGridSum = 0, + uint64_t allGridSum = 0, uint32_t firstDevice = 0) { + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(f); + amd::Kernel* kernel = function->kernel(); + + size_t globalWorkOffset[3] = {0}; + size_t globalWorkSize[3] = {globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ}; + size_t localWorkSize[3] = {blockDimX, blockDimY, blockDimZ}; + amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize); + amd::Command::EventWaitList waitList; + address kernargs = nullptr; + + bool profileNDRange = (startEvent != nullptr || stopEvent != nullptr); + + // Flag set to 1 signifies that kernel can be launched in anyorder + if (flags & hipExtAnyOrderLaunch) { + params |= amd::NDRangeKernelCommand::AnyOrderLaunch; + } + + amd::NDRangeKernelCommand* kernelCommand = new amd::NDRangeKernelCommand( + *stream, waitList, *kernel, ndrange, sharedMemBytes, params, gridId, numGrids, prevGridSum, + allGridSum, firstDevice, profileNDRange); + if (!kernelCommand) { + return hipErrorOutOfMemory; + } + + // Capture the kernel arguments + if (CL_SUCCESS != kernelCommand->captureAndValidate()) { + kernelCommand->release(); + return hipErrorOutOfMemory; + } + + command = kernelCommand; + + return hipSuccess; +} + +hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + uint32_t sharedMemBytes, hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, + uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, + uint32_t numGrids = 0, uint64_t prevGridSum = 0, + uint64_t allGridSum = 0, uint32_t firstDevice = 0) { + int deviceId = hip::Stream::DeviceId(hStream); + HIP_RETURN_ONFAIL(PlatformState::instance().initStatManagedVarDevicePtr(deviceId)); + + if (f == nullptr) { + LogPrintfError("%s", "Function passed is null"); + return hipErrorInvalidResourceHandle; + } + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(f); + amd::Kernel* kernel = function->kernel(); + amd::ScopedLock lock(function->dflock_); + + hipError_t status = ihipLaunchKernel_validate( + f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, blockDimY, blockDimZ, + sharedMemBytes, kernelParams, extra, deviceId, params); + if (status != hipSuccess) { + return status; + } + amd::Command* command = nullptr; + hip::Stream* hip_stream = hip::getStream(hStream); + status = ihipLaunchKernelCommand(command, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + blockDimX, blockDimY, blockDimZ, sharedMemBytes, hip_stream, + kernelParams, extra, startEvent, stopEvent, flags, params, + gridId, numGrids, prevGridSum, allGridSum, firstDevice); + if (status != hipSuccess) { + return status; + } + + if (startEvent != nullptr) { + hip::Event* eStart = reinterpret_cast(startEvent); + status = eStart->addMarker(hStream, nullptr, false); + if (status != hipSuccess) { + return status; + } + } + + if (stopEvent != nullptr) { + hip::Event* eStop = reinterpret_cast(stopEvent); + if (eStop->flags & hipEventDisableSystemFence) { + command->setEventScope(amd::Device::kCacheStateIgnore); + } else { + command->setEventScope(amd::Device::kCacheStateSystem); + } + // Enqueue Dispatch and bind the stop event + command->enqueue(); + eStop->BindCommand(*command, false); + } else { + command->enqueue(); + } + + if (command->status() == CL_INVALID_OPERATION) { + command->release(); + return hipErrorIllegalState; + } + + command->release(); + + return hipSuccess; +} + +hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, + uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, + uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, + void** kernelParams, void** extra) { + HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, + blockDimZ, sharedMemBytes, hStream, kernelParams, extra); + + if (!hip::isValid(hStream)) { + HIP_RETURN(hipErrorInvalidValue); + } + + STREAM_CAPTURE(hipModuleLaunchKernel, hStream, f, gridDimX, gridDimY, gridDimZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, kernelParams, extra); + + size_t globalWorkSizeX = static_cast(gridDimX) * blockDimX; + size_t globalWorkSizeY = static_cast(gridDimY) * blockDimY; + size_t globalWorkSizeZ = static_cast(gridDimZ) * blockDimZ; + if (globalWorkSizeX > std::numeric_limits::max() || + globalWorkSizeY > std::numeric_limits::max() || + globalWorkSizeZ > std::numeric_limits::max()) { + HIP_RETURN(hipErrorInvalidConfiguration); + } + HIP_RETURN(ihipModuleLaunchKernel( + f, static_cast(globalWorkSizeX), static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), blockDimX, blockDimY, blockDimZ, sharedMemBytes, + hStream, kernelParams, extra, nullptr, nullptr)); +} + +hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, void** extra, + hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) { + HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, + kernelParams, extra, startEvent, stopEvent, flags); + + if (!hip::isValid(hStream)) { + HIP_RETURN(hipErrorInvalidValue); + } + + STREAM_CAPTURE(hipExtModuleLaunchKernel, hStream, f, globalWorkSizeX, globalWorkSizeY, + globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, + kernelParams, extra, startEvent, stopEvent, flags); + + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, + hStream, kernelParams, extra, startEvent, stopEvent, flags)); +} + + +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) { + HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, + startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, + extra, startEvent, stopEvent)); +} + +hipError_t hipModuleLaunchKernelExt(hipFunction_t f, uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, + uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, + size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t startEvent, hipEvent_t stopEvent) { + HIP_INIT_API(hipModuleLaunchKernelExt, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, + blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, + startEvent, stopEvent); + + HIP_RETURN(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, + extra, startEvent, stopEvent)); +} + +hipError_t hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, + hipStream_t stream, void** kernelParams) { + HIP_INIT_API(hipModuleLaunchCooperativeKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, + blockDimY, blockDimZ, sharedMemBytes, stream, kernelParams); + + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorInvalidValue); + } + + size_t globalWorkSizeX = static_cast(gridDimX) * blockDimX; + size_t globalWorkSizeY = static_cast(gridDimY) * blockDimY; + size_t globalWorkSizeZ = static_cast(gridDimZ) * blockDimZ; + if (globalWorkSizeX > std::numeric_limits::max() || + globalWorkSizeY > std::numeric_limits::max() || + globalWorkSizeZ > std::numeric_limits::max()) { + HIP_RETURN(hipErrorInvalidConfiguration); + } + HIP_RETURN(ihipModuleLaunchKernel(f, static_cast(globalWorkSizeX), + static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), blockDimX, blockDimY, + blockDimZ, sharedMemBytes, stream, kernelParams, nullptr, nullptr, + nullptr, 0, amd::NDRangeKernelCommand::CooperativeGroups)); +} + +hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags, + uint32_t extFlags) { + int numActiveGPUs = 0; + hipError_t result = hipSuccess; + result = ihipDeviceGetCount(&numActiveGPUs); + + if ((numDevices == 0) || (numDevices > numActiveGPUs)) { + return hipErrorInvalidValue; + } + + if (flags > (hipCooperativeLaunchMultiDeviceNoPostSync + + hipCooperativeLaunchMultiDeviceNoPreSync)) { + return hipErrorInvalidValue; + } + + uint64_t allGridSize = 0; + std::vector mgpu_list(numDevices); + + for (int i = 0; i < numDevices; ++i) { + uint32_t blockDims = 0; + const hipFunctionLaunchParams& launch = launchParamsList[i]; + blockDims = launch.blockDimX * launch.blockDimY * launch.blockDimZ; + allGridSize += launch.gridDimX * launch.gridDimY * launch.gridDimZ * blockDims; + + // Make sure block dimensions are valid + if (0 == blockDims) { + return hipErrorInvalidConfiguration; + } + if (launch.hStream != nullptr) { + // Validate devices to make sure it dosn't have duplicates + hip::Stream* hip_stream = reinterpret_cast(launch.hStream); + auto device = &hip_stream->vdev()->device(); + for (int j = 0; j < numDevices; ++j) { + if (mgpu_list[j] == device) { + return hipErrorInvalidDevice; + } + } + mgpu_list[i] = device; + } else { + return hipErrorInvalidResourceHandle; + } + } + uint64_t prevGridSize = 0; + uint32_t firstDevice = 0; + + // Sync the execution streams on all devices + if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + hip::Stream* hip_stream = + reinterpret_cast(launchParamsList[i].hStream); + hip_stream->finish(); + } + } + + for (int i = 0; i < numDevices; ++i) { + const hipFunctionLaunchParams& launch = launchParamsList[i]; + hip::Stream* hip_stream = reinterpret_cast(launch.hStream); + + if (i == 0) { + // The order of devices in the launch may not match the order in the global array + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + // Find the matching device + if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) { + // Save ROCclr index of the first device in the launch + firstDevice = hip_stream->vdev()->device().index(); + break; + } + } + } + + size_t globalWorkSizeX = static_cast(launch.gridDimX) * launch.blockDimX; + size_t globalWorkSizeY = static_cast(launch.gridDimY) * launch.blockDimY; + size_t globalWorkSizeZ = static_cast(launch.gridDimZ) * launch.blockDimZ; + if (globalWorkSizeX > std::numeric_limits::max() || + globalWorkSizeY > std::numeric_limits::max() || + globalWorkSizeZ > std::numeric_limits::max()) { + return hipErrorInvalidConfiguration; + } + result = ihipModuleLaunchKernel( + launch.function, static_cast(globalWorkSizeX), + static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), launch.blockDimX, launch.blockDimY, + launch.blockDimZ, launch.sharedMemBytes, launch.hStream, launch.kernelParams, + nullptr, nullptr, nullptr, flags, extFlags, + i, numDevices, prevGridSize, allGridSize, firstDevice); + if (result != hipSuccess) { + break; + } + prevGridSize += globalWorkSizeX * globalWorkSizeY * globalWorkSizeZ; + } + + // Sync the execution streams on all devices + if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + hip::Stream* hip_stream = + reinterpret_cast(launchParamsList[i].hStream); + hip_stream->finish(); + } + } + + return result; +} + +hipError_t hipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList, + unsigned int numDevices, + unsigned int flags) { + HIP_INIT_API(hipModuleLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags); + + if (launchParamsList == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Validate all streams passed by user + for (int i = 0; i < numDevices; ++i) { + if (!hip::isValid(launchParamsList[i].hStream)) { + HIP_RETURN(hipErrorInvalidValue); + } + } + + HIP_RETURN(ihipModuleLaunchCooperativeKernelMultiDevice( + launchParamsList, + numDevices, + flags, + (amd::NDRangeKernelCommand::CooperativeGroups | + amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups))); + +} + +extern "C" hipError_t hipLaunchKernel_common(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, + hipStream_t stream) { + STREAM_CAPTURE(hipLaunchKernel, stream, hostFunction, gridDim, blockDim, args, sharedMemBytes); + return ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, nullptr, + nullptr, 0); +} + +extern "C" hipError_t hipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream) { + HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); + HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream)); +} + +extern "C" hipError_t hipLaunchKernel_spt(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream) { + HIP_INIT_API(hipLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipLaunchKernel_common(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream)); +} + +extern "C" hipError_t hipExtLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags) { + HIP_INIT_API(hipExtLaunchKernel, hostFunction, gridDim, blockDim, args, sharedMemBytes, + stream, startEvent, stopEvent, flags); + + if (!hip::isValid(stream) || !hip::isValid(startEvent) || !hip::isValid(stopEvent)) { + HIP_RETURN(hipErrorInvalidValue); + } + + STREAM_CAPTURE(hipExtLaunchKernel, stream, hostFunction, gridDim, blockDim, args, sharedMemBytes, + startEvent, stopEvent, flags); + HIP_RETURN(ihipLaunchKernel(hostFunction, gridDim, blockDim, args, sharedMemBytes, stream, + startEvent, stopEvent, flags)); +} + +hipError_t hipLaunchCooperativeKernel_common(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream) { + if (!hip::isValid(hStream)) { + return hipErrorInvalidValue; + } + + hipFunction_t func = nullptr; + int deviceId = hip::Stream::DeviceId(hStream); + hipError_t getStatFuncError = PlatformState::instance().getStatFunc(&func, f, deviceId); + if (getStatFuncError != hipSuccess) { + return getStatFuncError; + } + const amd::Device* device = g_devices[deviceId]->devices()[0]; + size_t globalWorkSizeX = static_cast(gridDim.x) * blockDim.x; + size_t globalWorkSizeY = static_cast(gridDim.y) * blockDim.y; + size_t globalWorkSizeZ = static_cast(gridDim.z) * blockDim.z; + if (globalWorkSizeX > std::numeric_limits::max() || + globalWorkSizeY > std::numeric_limits::max() || + globalWorkSizeZ > std::numeric_limits::max() || + (blockDim.x * blockDim.y * blockDim.z > device->info().maxWorkGroupSize_)) { + return hipErrorInvalidConfiguration; + } + + return ihipModuleLaunchKernel(func, static_cast(globalWorkSizeX), + static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), blockDim.x, blockDim.y, + blockDim.z, sharedMemBytes, hStream, kernelParams, nullptr, nullptr, + nullptr, 0, amd::NDRangeKernelCommand::CooperativeGroups); +} + +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream) { + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream); + HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, + hStream)); +} + +hipError_t hipLaunchCooperativeKernel_spt(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, uint32_t sharedMemBytes, + hipStream_t hStream) { + HIP_INIT_API(hipLaunchCooperativeKernel, f, gridDim, blockDim, sharedMemBytes, hStream); + PER_THREAD_DEFAULT_STREAM(hStream); + HIP_RETURN(hipLaunchCooperativeKernel_common(f, gridDim, blockDim, kernelParams, sharedMemBytes, + hStream)); +} + +hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags, uint32_t extFlags) { + if (launchParamsList == nullptr) { + return hipErrorInvalidValue; + } + + std::vector functionLaunchParamsList(numDevices); + // Convert hipLaunchParams to hipFunctionLaunchParams + for (int i = 0; i < numDevices; ++i) { + hipLaunchParams& launch = launchParamsList[i]; + // Validate stream passed by user + if (!hip::isValid(launch.stream)) { + return hipErrorInvalidValue; + } + + hip::Stream* hip_stream = hip::getStream(launch.stream); + hipFunction_t func = nullptr; + // The order of devices in the launch may not match the order in the global array + for (size_t dev = 0; dev < g_devices.size(); ++dev) { + // Find the matching device and request the kernel function + if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) { + IHIP_RETURN_ONFAIL(PlatformState::instance().getStatFunc(&func, launch.func, dev)); + break; + } + } + if (func == nullptr) { + return hipErrorInvalidDeviceFunction; + } + + functionLaunchParamsList[i].function = func; + functionLaunchParamsList[i].gridDimX = launch.gridDim.x; + functionLaunchParamsList[i].gridDimY = launch.gridDim.y; + functionLaunchParamsList[i].gridDimZ = launch.gridDim.z; + functionLaunchParamsList[i].blockDimX = launch.blockDim.x; + functionLaunchParamsList[i].blockDimY = launch.blockDim.y; + functionLaunchParamsList[i].blockDimZ = launch.blockDim.z; + functionLaunchParamsList[i].sharedMemBytes = launch.sharedMem; + functionLaunchParamsList[i].hStream = launch.stream; + functionLaunchParamsList[i].kernelParams = launch.args; + } + + return ihipModuleLaunchCooperativeKernelMultiDevice(functionLaunchParamsList.data(), + functionLaunchParamsList.size(), + flags, + extFlags); +} + +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags) { + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, numDevices, flags); + + HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice( + launchParamsList, numDevices, flags, + (amd::NDRangeKernelCommand::CooperativeGroups | + amd::NDRangeKernelCommand::CooperativeMultiDeviceGroups))); +} + +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags) { + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); + + HIP_RETURN(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, 0)); +} + +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { + HIP_INIT_API(hipModuleGetTexRef, texRef, hmod, name); + + /* input args check */ + if ((texRef == nullptr) || (name == nullptr) || (strlen(name) == 0)) { + HIP_RETURN(hipErrorInvalidValue); + } + if (hmod == nullptr) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + /* Get address and size for the global symbol */ + if (hipSuccess != PlatformState::instance().getDynTexRef(name, hmod, texRef)) { + LogPrintfError("Cannot get texRef for name: %s at module:0x%x \n", name, hmod); + HIP_RETURN(hipErrorNotFound); + } + + // Texture references created by HIP driver API + // have the default read mode set to normalized float. + // have format set to format float + // set num of channels to 1 + (*texRef)->readMode = hipReadModeNormalizedFloat; + (*texRef)->format = HIP_AD_FORMAT_FLOAT; + (*texRef)->numChannels = 1; + + hipError_t err = PlatformState::instance().registerTexRef(*texRef, hmod, std::string(name)); + + HIP_RETURN(err); +} diff --git a/projects/clr/hipamd/src/hip_peer.cpp b/projects/clr/hipamd/src/hip_peer.cpp new file mode 100644 index 0000000000..17dc65da05 --- /dev/null +++ b/projects/clr/hipamd/src/hip_peer.cpp @@ -0,0 +1,256 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) { + HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx, + size_t sizeBytes) { + HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t canAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId){ + amd::Device* device = nullptr; + amd::Device* peer_device = nullptr; + if (canAccessPeer == nullptr) { + return hipErrorInvalidValue; + } + /* Peer cannot be self */ + if (deviceId == peerDeviceId) { + *canAccessPeer = 0; + return hipSuccess; + } + /* Cannot exceed the max number of devices */ + if (static_cast(deviceId) >= g_devices.size() + || static_cast(peerDeviceId) >= g_devices.size()) { + return hipErrorInvalidDevice; + } + device = g_devices[deviceId]->devices()[0]; + peer_device = g_devices[peerDeviceId]->devices()[0]; + *canAccessPeer = static_cast(std::find(device->p2pDevices_.begin(), + device->p2pDevices_.end(), as_cl(peer_device)) + != device->p2pDevices_.end()); + return hipSuccess; +} + +hipError_t findLinkInfo(int device1, int device2, + std::vector* link_attrs) { + + amd::Device* amd_dev_obj1 = nullptr; + amd::Device* amd_dev_obj2 = nullptr; + const int numDevices = static_cast(g_devices.size()); + + if ((device1 < 0) || (device1 >= numDevices) || (device2 < 0) || (device2 >= numDevices)) { + return hipErrorInvalidDevice; + } + + amd_dev_obj1 = g_devices[device1]->devices()[0]; + amd_dev_obj2 = g_devices[device2]->devices()[0]; + + if (!amd_dev_obj1->findLinkInfo(*amd_dev_obj2, link_attrs)) { + return hipErrorInvalidHandle; + } + + return hipSuccess; +} + +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, + uint32_t* linktype, uint32_t* hopcount) { + HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount); + + if (linktype == nullptr || hopcount == nullptr || + device1 == device2 || device1 < 0 || device2 < 0) { + HIP_RETURN(hipErrorInvalidValue); + } + // Fill out the list of LinkAttributes + std::vector link_attrs; + link_attrs.push_back(std::make_pair(amd::Device::LinkAttribute::kLinkLinkType, 0)); + link_attrs.push_back(std::make_pair(amd::Device::LinkAttribute::kLinkHopCount, 0)); + + HIP_RETURN_ONFAIL(findLinkInfo(device1, device2, &link_attrs)); + + *linktype = static_cast(link_attrs[0].second); + *hopcount = static_cast(link_attrs[1].second); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, + int srcDevice, int dstDevice) { + HIP_INIT_API(hipDeviceGetP2PAttribute, value, attr, srcDevice, dstDevice); + + if (value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (srcDevice == dstDevice || srcDevice >= static_cast(g_devices.size()) + || dstDevice >= static_cast(g_devices.size())) { + HIP_RETURN(hipErrorInvalidDevice); + } + + std::vector link_attrs; + + switch (attr) { + case hipDevP2PAttrPerformanceRank : { + link_attrs.push_back(std::make_pair(amd::Device::LinkAttribute::kLinkLinkType, 0)); + break; + } + case hipDevP2PAttrAccessSupported : { + HIP_RETURN_ONFAIL(canAccessPeer(value, srcDevice, dstDevice)); + break; + } + case hipDevP2PAttrNativeAtomicSupported : { + link_attrs.push_back(std::make_pair(amd::Device::LinkAttribute::kLinkAtomicSupport, 0)); + break; + } + case hipDevP2PAttrHipArrayAccessSupported : { + hipDeviceProp_t srcDeviceProp; + hipDeviceProp_t dstDeviceProp; + HIP_RETURN_ONFAIL(hipGetDeviceProperties(&srcDeviceProp, srcDevice)); + HIP_RETURN_ONFAIL(hipGetDeviceProperties(&dstDeviceProp, dstDevice)); + + // Linear layout access is supported if P2P is enabled + // Opaque Images are supported only on homogeneous systems + // Might have more conditions to check, in future. + if (srcDeviceProp.gcnArch == dstDeviceProp.gcnArch) { + HIP_RETURN_ONFAIL(canAccessPeer(value, srcDevice, dstDevice)); + } else { + *value = 0; + } + break; + } + default : { + LogPrintfError("Invalid attribute attr: %d ", attr); + HIP_RETURN(hipErrorInvalidValue); + } + } + + if ((attr != hipDevP2PAttrAccessSupported) && (attr != hipDevP2PAttrHipArrayAccessSupported)) { + HIP_RETURN_ONFAIL(findLinkInfo(srcDevice, dstDevice, &link_attrs)); + *value = static_cast(link_attrs[0].second); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipDeviceCanAccessPeer(int* canAccess, int deviceId, int peerDeviceId) { + HIP_INIT_API(hipDeviceCanAccessPeer, canAccess, deviceId, peerDeviceId); + HIP_RETURN(canAccessPeer(canAccess, deviceId, peerDeviceId)); +} + +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { + HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId); + int deviceId = hip::getCurrentDevice()->deviceId(); + int canAccess = 0; + if ((hipSuccess != canAccessPeer(&canAccess, deviceId, peerDeviceId)) || (canAccess == 0)) { + HIP_RETURN(hipErrorInvalidDevice); + } + + amd::Device* device = g_devices[deviceId]->devices()[0]; + amd::Device* peer_device = g_devices[peerDeviceId]->devices()[0]; + peer_device->disableP2P(device); + + HIP_RETURN(hip::getCurrentDevice()->DisablePeerAccess(peerDeviceId)); +} + +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { + HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags); + int deviceId = hip::getCurrentDevice()->deviceId(); + int canAccess = 0; + if (flags != 0) { + HIP_RETURN(hipErrorInvalidValue); + } + if ((hipSuccess != canAccessPeer(&canAccess, deviceId, peerDeviceId)) || (canAccess == 0)) { + HIP_RETURN(hipErrorInvalidDevice); + } + + amd::Device* device = g_devices[deviceId]->asContext()->devices()[0]; + amd::Device* peer_device = g_devices[peerDeviceId]->asContext()->devices()[0]; + peer_device->enableP2P(device); + + HIP_RETURN(hip::getCurrentDevice()->EnablePeerAccess(peerDeviceId)); +} + +hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes) { + HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes); + + if (srcDevice >= static_cast(g_devices.size()) || + dstDevice >= static_cast(g_devices.size()) || + srcDevice < 0 || dstDevice < 0) { + HIP_RETURN(hipErrorInvalidDevice); + } + + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice, *hip::getNullStream(), + true, false)); +} + +hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream) { + HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream); + + if (srcDevice >= static_cast(g_devices.size()) || + dstDevice >= static_cast(g_devices.size()) || + srcDevice < 0 || dstDevice < 0) { + HIP_RETURN(hipErrorInvalidDevice); + } + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { + return hipErrorInvalidValue; + } + HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, hipMemcpyDeviceToDevice, *hip_stream, true, true)); +} + +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { + HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { + HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx); + + HIP_RETURN(hipSuccess); +} diff --git a/projects/clr/hipamd/src/hip_platform.cpp b/projects/clr/hipamd/src/hip_platform.cpp new file mode 100644 index 0000000000..1e12cfd4fa --- /dev/null +++ b/projects/clr/hipamd/src/hip_platform.cpp @@ -0,0 +1,895 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include "hip_platform.hpp" +#include "hip_internal.hpp" +#include "platform/program.hpp" +#include "platform/runtime.hpp" + +#include + +constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" + +PlatformState* PlatformState::platform_; // Initiaized as nullptr by default + +// forward declaration of methods required for __hipRegisrterManagedVar +hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); + +struct __CudaFatBinaryWrapper { + unsigned int magic; + unsigned int version; + void* binary; + void* dummy1; +}; + +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, + const char* name); + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes); + +extern hipError_t ihipModuleLaunchKernel( + hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, + uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, + void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, + uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, + uint64_t prevGridSum = 0, uint64_t allGridSum = 0, uint32_t firstDevice = 0); +static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const char* device_name) { + // Workaround for device name mismatch. + // Device name may contain feature strings delimited by '+', e.g. + // gfx900+xnack. Currently HIP-Clang does not include feature strings + // in code object target id in fat binary. Therefore drop the feature + // strings from device name before comparing it with code object target id. + std::string short_name(device_name); + auto feature_loc = short_name.find('+'); + if (feature_loc != std::string::npos) { + short_name.erase(feature_loc); + } + return codeobj_target_id == short_name; +} + +extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data) { + const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); + if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { + LogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", fbwrapper->magic, + fbwrapper->version); + return nullptr; + } + return PlatformState::instance().addFatBinary(fbwrapper->binary); +} + +extern "C" void __hipRegisterFunction(hip::FatBinaryInfo** modules, const void* hostFunction, + char* deviceFunction, const char* deviceName, + unsigned int threadLimit, uint3* tid, uint3* bid, + dim3* blockDim, dim3* gridDim, int* wSize) { + static int enable_deferred_loading{[]() { + char* var = getenv("HIP_ENABLE_DEFERRED_LOADING"); + return var ? atoi(var) : 1; + }()}; + hipError_t hip_error = hipSuccess; + hip::Function* func = new hip::Function(std::string(deviceName), modules); + hip_error = PlatformState::instance().registerStatFunction(hostFunction, func); + guarantee((hip_error == hipSuccess), "Cannot register Static function"); + + if (!enable_deferred_loading) { + HIP_INIT_VOID(); + hipFunction_t hfunc = nullptr; + + for (size_t dev_idx = 0; dev_idx < g_devices.size(); ++dev_idx) { + hip_error = PlatformState::instance().getStatFunc(&hfunc, hostFunction, dev_idx); + guarantee((hip_error == hipSuccess), "Cannot retrieve Static function"); + } + } +} + +// Registers a device-side global variable. +// For each global variable in device code, there is a corresponding shadow +// global variable in host code. The shadow host variable is used to keep +// track of the value of the device side global variable between kernel +// executions. +extern "C" void __hipRegisterVar( + hip::FatBinaryInfo** modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int ext, // Whether this variable is external + size_t size, // Size of the variable + int constant, // Whether this variable is constant + int global) // Unknown, always 0 +{ + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable, + size, 0, 0, modules); + hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr); + guarantee((err == hipSuccess), "Cannot register Static Global Var"); +} + +extern "C" void __hipRegisterSurface( + hip::FatBinaryInfo** modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int ext) { + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface, + sizeof(surfaceReference), 0, 0, modules); + hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr); + guarantee((err == hipSuccess), "Cannot register Static Glbal Var"); +} + +extern "C" void __hipRegisterManagedVar( + void* hipModule, // Pointer to hip module returned from __hipRegisterFatbinary + void** pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p + // align HIP runtime allocates such managed memory and assign it to \p pointer + void* init_value, // Initial value to be copied into \p pointer + const char* name, // Name of the variable in code object + size_t size, unsigned align) { + HIP_INIT_VOID(); + hipError_t status = ihipMallocManaged(pointer, size, align); + if (status == hipSuccess) { + hip::Stream* stream = hip::getNullStream(); + if (stream != nullptr) { + status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *stream); + guarantee((status == hipSuccess), "Error during memcpy to managed memory!"); + } else { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); + } + } else { + guarantee(false, "Error during allocation of managed memory!"); + } + hip::Var* var_ptr = new hip::Var(std::string(name), hip::Var::DeviceVarKind::DVK_Managed, pointer, + size, align, reinterpret_cast(hipModule)); + status = PlatformState::instance().registerStatManagedVar(var_ptr); + guarantee((status == hipSuccess), "Cannot register Static Managed Var"); +} + +extern "C" void __hipRegisterTexture( + hip::FatBinaryInfo** modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int norm, int ext) { + hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture, + sizeof(textureReference), 0, 0, modules); + hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr); + guarantee((err == hipSuccess), "Cannot register Static Global Var"); +} + +extern "C" void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules) { + hipError_t err = PlatformState::instance().removeFatBinary(modules); + guarantee((err == hipSuccess), "Cannot Unregister Fat Binary"); +} + +extern "C" hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream); + + PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem, + hipStream_t* stream) { + HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + *gridDim = exec.gridDim_; + *blockDim = exec.blockDim_; + *sharedMem = exec.sharedMem_; + *stream = exec.hStream_; + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) { + HIP_INIT_API(hipSetupArgument, arg, size, offset); + + PlatformState::instance().setupArgument(arg, size, offset); + + HIP_RETURN(hipSuccess); +} + +extern "C" hipError_t hipLaunchByPtr(const void* hostFunction) { + HIP_INIT_API(hipLaunchByPtr, hostFunction); + + ihipExec_t exec; + PlatformState::instance().popExec(exec); + + hip::Stream* stream = reinterpret_cast(exec.hStream_); + int deviceId = (stream != nullptr) ? stream->DeviceId() : ihipGetDevice(); + if (deviceId == -1) { + LogPrintfError("Wrong DeviceId: %d \n", deviceId); + HIP_RETURN(hipErrorNoDevice); + } + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId); + if ((hip_error != hipSuccess) || (func == nullptr)) { + LogPrintfError("Could not retrieve hostFunction: 0x%x \n", hostFunction); + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + size_t size = exec.arguments_.size(); + void* extra[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; + + HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z, + exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z, + exec.sharedMem_, exec.hStream_, nullptr, extra)); +} + +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { + HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol); + + hipError_t hip_error = hipSuccess; + if (devPtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + size_t sym_size = 0; + + HIP_RETURN_ONFAIL( + PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size)); + + HIP_RETURN(hipSuccess, *devPtr); +} + +hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { + HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol); + + if (sizePtr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + hipDeviceptr_t device_ptr = nullptr; + HIP_RETURN_ONFAIL( + PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr)); + + HIP_RETURN(hipSuccess, *sizePtr); +} + +hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj, + hipDeviceptr_t* dptr, size_t* bytes) { + + /* Get Device Program pointer*/ + amd::Program* program = as_amd(reinterpret_cast(hmod)); + device::Program* dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); + + if (dev_program == nullptr) { + LogPrintfError("Cannot get Device Function for module: 0x%x \n", hmod); + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + /* Find the global Symbols */ + if (!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { + LogPrintfError("Cannot create Global Var obj for symbol: %s \n", name); + HIP_RETURN(hipErrorInvalidSymbol); + } + + HIP_RETURN(hipSuccess); +} + + +namespace hip_impl { +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device, + hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz) { + hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func); + const amd::Kernel& kernel = *function->kernel(); + + const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel.getDeviceKernel(device)->workGroupInfo(); + if (bCalcPotentialBlkSz == false) { + if (inputBlockSize <= 0) { + return hipErrorInvalidValue; + } + *bestBlockSize = 0; + // Make sure the requested block size is smaller than max supported + if (inputBlockSize > int(device.info().maxWorkGroupSize_)) { + *maxBlocksPerCU = 0; + *numBlocksPerGrid = 0; + return hipSuccess; + } + } else { + if (inputBlockSize > int(device.info().maxWorkGroupSize_) || inputBlockSize <= 0) { + // The user wrote the kernel to work with a workgroup size + // bigger than this hardware can support. Or they do not care + // about the size So just assume its maximum size is + // constrained by hardware + inputBlockSize = device.info().maxWorkGroupSize_; + } + } + // Find wave occupancy per CU => simd_per_cu * GPR usage + size_t MaxWavesPerSimd; + + if (device.isa().versionMajor() <= 9) { + MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD + } else { + MaxWavesPerSimd = 16; + } + size_t VgprWaves = MaxWavesPerSimd; + uint32_t VgprGranularity = device.info().vgprAllocGranularity_; + size_t maxVGPRs = device.info().vgprsPerSimd_; + size_t wavefrontSize = wrkGrpInfo->wavefrontSize_; + if (device.isa().versionMajor() >= 10) { + if (wavefrontSize == 64) { + maxVGPRs = maxVGPRs >> 1; + VgprGranularity = VgprGranularity >> 1; + } + } + if (wrkGrpInfo->usedSGPRs_ > 0) { + VgprWaves = maxVGPRs / amd::alignUp(wrkGrpInfo->usedVGPRs_, VgprGranularity); + } + + size_t GprWaves = VgprWaves; + if (wrkGrpInfo->usedSGPRs_ > 0) { + size_t maxSGPRs = device.info().sgprsPerSimd_; + const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); + GprWaves = std::min(VgprWaves, SgprWaves); + } + uint32_t simdPerCU = (device.isa().versionMajor() <= 9) ? device.info().simdPerCU_ + : (wrkGrpInfo->isWGPMode_ ? 4 : 2); + const size_t alu_occupancy = simdPerCU * std::min(MaxWavesPerSimd, GprWaves); + const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_; + + int lds_occupancy_wgs = INT_MAX; + const size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize; + if (total_used_lds != 0) { + lds_occupancy_wgs = static_cast(device.info().localMemSize_ / total_used_lds); + } + // Calculate how many blocks of inputBlockSize we can fit per CU + // Need to align with hardware wavefront size. If they want 65 threads, but + // waves are 64, then we need 128 threads per block. + // So this calculates how many blocks we can fit. + *maxBlocksPerCU = alu_limited_threads / amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_); + // Unless those blocks are further constrained by LDS size. + *maxBlocksPerCU = std::min(*maxBlocksPerCU, lds_occupancy_wgs); + + // Some callers of this function want to return the block size, in threads, that + // leads to the maximum occupancy. In that case, inputBlockSize is the maximum + // workgroup size the user wants to allow, or that the hardware can allow. + // It is either the number of threads that we are limited to due to occupancy, or + // the maximum available block size for this kernel, which could have come from the + // user. e.g., if the user indicates the maximum block size is 64 threads, but we + // calculate that 128 threads can fit in each CU, we have to give up and return 64. + *bestBlockSize = + std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_)); + // If the best block size is smaller than the block size used to fit the maximum, + // then we need to make the grid bigger for full occupancy. + const int bestBlocksPerCU = alu_limited_threads / (*bestBlockSize); + // Unless those blocks are further constrained by LDS size. + *numBlocksPerGrid = device.info().maxComputeUnits_ * std::min(bestBlocksPerCU, lds_occupancy_wgs); + + return hipSuccess; +} +} // namespace hip_impl + +extern "C" { +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f, + size_t dynSharedMemPerBlk, int blockSizeLimit) { + HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice()); + if ((hip_error != hipSuccess) || (func == nullptr)) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int max_blocks_per_grid = 0; + int num_blocks = 0; + int best_block_size = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit, + dynSharedMemPerBlk, true); + if (ret == hipSuccess) { + *blockSize = best_block_size; + *gridSize = max_blocks_per_grid; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, + size_t dynSharedMemPerBlk, int blockSizeLimit) { + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); + if ((gridSize == nullptr) || (blockSize == nullptr) || (f == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int max_blocks_per_grid = 0; + int num_blocks = 0; + int best_block_size = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, + dynSharedMemPerBlk, true); + if (ret == hipSuccess) { + *blockSize = best_block_size; + *gridSize = max_blocks_per_grid; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, + size_t dynSharedMemPerBlk, + int blockSizeLimit, + unsigned int flags) { + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, + blockSizeLimit, flags); + if ((gridSize == nullptr) || (blockSize == nullptr) || (f == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + if (flags != hipOccupancyDefault && flags != hipOccupancyDisableCachingOverride) { + HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int max_blocks_per_grid = 0; + int num_blocks = 0; + int best_block_size = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit, + dynSharedMemPerBlk, true); + if (ret == hipSuccess) { + *blockSize = best_block_size; + *gridSize = max_blocks_per_grid; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f, + int blockSize, + size_t dynSharedMemPerBlk) { + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, + dynSharedMemPerBlk); + if (numBlocks == nullptr || (f == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + int max_blocks_per_grid = 0; + int best_block_size = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, + false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, + dynSharedMemPerBlk, flags); + if (numBlocks == nullptr || (f == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + if (flags != hipOccupancyDefault && flags != hipOccupancyDisableCachingOverride) { + HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + int max_blocks_per_grid = 0; + int best_block_size = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk, + false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f, + int blockSize, size_t dynamicSMemSize) { + HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize); + if (numBlocks == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice()); + if ((hip_error != hipSuccess) || (func == nullptr)) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + int max_blocks_per_grid = 0; + int best_block_size = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, + false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* f, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) { + HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, + flags); + if (numBlocks == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + if (flags != hipOccupancyDefault && flags != hipOccupancyDisableCachingOverride) { + HIP_RETURN(hipErrorInvalidValue); + } + hipFunction_t func = nullptr; + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice()); + if ((hip_error != hipSuccess) || (func == nullptr)) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + int max_blocks_per_grid = 0; + int best_block_size = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize, + false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} +} + +hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, void** args, + size_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent, + hipEvent_t stopEvent, int flags) { + hipFunction_t func = nullptr; + int deviceId = hip::Stream::DeviceId(stream); + hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId); + if ((hip_error != hipSuccess) || (func == nullptr)) { + if (hip_error == hipErrorSharedObjectInitFailed) { + return hip_error; + } else { + return hipErrorInvalidDeviceFunction; + } + } + size_t globalWorkSizeX = static_cast(gridDim.x) * blockDim.x; + size_t globalWorkSizeY = static_cast(gridDim.y) * blockDim.y; + size_t globalWorkSizeZ = static_cast(gridDim.z) * blockDim.z; + if (globalWorkSizeX > std::numeric_limits::max() || + globalWorkSizeY > std::numeric_limits::max() || + globalWorkSizeZ > std::numeric_limits::max()) { + return hipErrorInvalidConfiguration; + } + return ihipModuleLaunchKernel( + func, static_cast(globalWorkSizeX), static_cast(globalWorkSizeY), + static_cast(globalWorkSizeZ), blockDim.x, blockDim.y, blockDim.z, sharedMemBytes, + stream, args, nullptr, startEvent, stopEvent, flags); +} + +// conversion routines between float and half precision + +static inline std::uint32_t f32_as_u32(float f) { + union { + float f; + std::uint32_t u; + } v; + v.f = f; + return v.u; +} + +static inline float u32_as_f32(std::uint32_t u) { + union { + float f; + std::uint32_t u; + } v; + v.u = u; + return v.f; +} + +static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } + + +// half float, the f16 is in the low 16 bits of the input argument + +static inline float __convert_half_to_float(std::uint32_t a) noexcept { + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + + std::uint32_t v = + f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U) /*0x1.0p+112f*/) + 0x38000000U; + + u = (a & 0x7fff) != 0 ? v : u; + + return u32_as_f32(u) * u32_as_f32(0x07800000U) /*0x1.0p-112f*/; +} + +// float half with nearest even rounding +// The lower 16 bits of the result is the bit pattern for the f16 +static inline std::uint32_t __convert_float_to_half(float a) noexcept { + std::uint32_t u = f32_as_u32(a); + int e = static_cast((u >> 23) & 0xff) - 127 + 15; + std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0); + std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); + std::uint32_t n = ((std::uint32_t)e << 12) | m; + std::uint32_t s = (u >> 16) & 0x8000; + int b = clamp_int(1 - e, 0, 13); + std::uint32_t d = (0x1000 | m) >> b; + d |= (d << b) != (0x1000 | m); + std::uint32_t v = e < 1 ? d : n; + v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5)); + v = e > 30 ? 0x7c00 : v; + v = e == 143 ? i : v; + return s | v; +} + +extern "C" +#if !defined(_MSC_VER) + __attribute__((weak)) +#endif + float + __gnu_h2f_ieee(unsigned short h) { + return __convert_half_to_float((std::uint32_t)h); +} + +extern "C" +#if !defined(_MSC_VER) + __attribute__((weak)) +#endif + unsigned short + __gnu_f2h_ieee(float f) { + return (unsigned short)__convert_float_to_half(f); +} + +void PlatformState::init() { + amd::ScopedLock lock(lock_); + if (initialized_ || g_devices.empty()) { + return; + } + initialized_ = true; + for (auto& it : statCO_.modules_) { + hipError_t err = digestFatBinary(it.first, it.second); + if (err != hipSuccess) { + HIP_ERROR_PRINT(err); + return; + } + } + for (auto& it : statCO_.vars_) { + it.second->resize_dVar(g_devices.size()); + } + for (auto& it : statCO_.functions_) { + it.second->resize_dFunc(g_devices.size()); + } +} + +hipError_t PlatformState::loadModule(hipModule_t* module, const char* fname, const void* image) { + if (module == nullptr) { + return hipErrorInvalidValue; + } + + hip::DynCO* dynCo = new hip::DynCO(); + hipError_t hip_error = dynCo->loadCodeObject(fname, image); + if (hip_error != hipSuccess) { + delete dynCo; + return hip_error; + } + + *module = dynCo->module(); + assert(*module != nullptr); + + amd::ScopedLock lock(lock_); + if (dynCO_map_.find(*module) != dynCO_map_.end()) { + delete dynCo; + return hipErrorAlreadyMapped; + } + dynCO_map_.insert(std::make_pair(*module, dynCo)); + + return hipSuccess; +} + +hipError_t PlatformState::unloadModule(hipModule_t hmod) { + amd::ScopedLock lock(lock_); + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + return hipErrorNotFound; + } + + delete it->second; + dynCO_map_.erase(hmod); + + auto tex_it = texRef_map_.begin(); + while (tex_it != texRef_map_.end()) { + if (tex_it->second.first == hmod) { + tex_it = texRef_map_.erase(tex_it); + } else { + ++tex_it; + } + } + + return hipSuccess; +} + +hipError_t PlatformState::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod, + const char* func_name) { + amd::ScopedLock lock(lock_); + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + LogPrintfError("Cannot find the module: 0x%x", hmod); + return hipErrorNotFound; + } + if (0 == strlen(func_name)) { + return hipErrorNotFound; + } + + return it->second->getDynFunc(hfunc, func_name); +} + +hipError_t PlatformState::getDynGlobalVar(const char* hostVar, hipModule_t hmod, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(lock_); + + if (hostVar == nullptr || dev_ptr == nullptr || size_ptr == nullptr) { + return hipErrorInvalidValue; + } + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + LogPrintfError("Cannot find the module: 0x%x", hmod); + return hipErrorNotFound; + } + *dev_ptr = nullptr; + IHIP_RETURN_ONFAIL(it->second->getManagedVarPointer(hostVar, dev_ptr, size_ptr)); + // if dev_ptr is nullptr, hostvar is not in managed variable list + if (*dev_ptr == nullptr) { + hip::DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, hostVar)); + *dev_ptr = dvar->device_ptr(); + *size_ptr = dvar->size(); + } + return hipSuccess; +} + +hipError_t PlatformState::registerTexRef(textureReference* texRef, hipModule_t hmod, + std::string name) { + amd::ScopedLock lock(lock_); + texRef_map_.insert(std::make_pair(texRef, std::make_pair(hmod, name))); + return hipSuccess; +} + +hipError_t PlatformState::getDynTexGlobalVar(textureReference* texRef, hipDeviceptr_t* dev_ptr, + size_t* size_ptr) { + amd::ScopedLock lock(lock_); + + auto tex_it = texRef_map_.find(texRef); + if (tex_it == texRef_map_.end()) { + LogPrintfError("Cannot find the texRef Entry: 0x%x", texRef); + return hipErrorNotFound; + } + + auto it = dynCO_map_.find(tex_it->second.first); + if (it == dynCO_map_.end()) { + LogPrintfError("Cannot find the module: 0x%x", tex_it->second.first); + return hipErrorNotFound; + } + + hip::DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, tex_it->second.second)); + *dev_ptr = dvar->device_ptr(); + *size_ptr = dvar->size(); + + return hipSuccess; +} + +hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod, + textureReference** texRef) { + amd::ScopedLock lock(lock_); + + auto it = dynCO_map_.find(hmod); + if (it == dynCO_map_.end()) { + LogPrintfError("Cannot find the module: 0x%x", hmod); + return hipErrorNotFound; + } + + hip::DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, hostVar)); + + if (dvar->size() != sizeof(textureReference)) { + return hipErrorNotFound; // Any better way to verify texture type? + } + + dvar->shadowVptr = new texture(); + *texRef = reinterpret_cast(dvar->shadowVptr); + return hipSuccess; +} + +hipError_t PlatformState::digestFatBinary(const void* data, hip::FatBinaryInfo*& programs) { + return statCO_.digestFatBinary(data, programs); +} + +hip::FatBinaryInfo** PlatformState::addFatBinary(const void* data) { + return statCO_.addFatBinary(data, initialized_); +} + +hipError_t PlatformState::removeFatBinary(hip::FatBinaryInfo** module) { + return statCO_.removeFatBinary(module); +} + +hipError_t PlatformState::registerStatFunction(const void* hostFunction, hip::Function* func) { + return statCO_.registerStatFunction(hostFunction, func); +} + +hipError_t PlatformState::registerStatGlobalVar(const void* hostVar, hip::Var* var) { + return statCO_.registerStatGlobalVar(hostVar, var); +} + +hipError_t PlatformState::registerStatManagedVar(hip::Var* var) { + return statCO_.registerStatManagedVar(var); +} + +const char* PlatformState::getStatFuncName(const void* hostFunction) { + return statCO_.getStatFuncName(hostFunction); +} + +hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, + int deviceId) { + return statCO_.getStatFunc(hfunc, hostFunction, deviceId); +} + +hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, + int deviceId) { + if (func_attr == nullptr) { + return hipErrorInvalidValue; + } + if (hostFunction == nullptr) { + return hipErrorInvalidDeviceFunction; + } + return statCO_.getStatFuncAttr(func_attr, hostFunction, deviceId); +} + +hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr); +} + +hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) { + return statCO_.initStatManagedVarDevicePtr(deviceId); +} + +void PlatformState::setupArgument(const void* arg, size_t size, size_t offset) { + auto& arguments = hip::tls.exec_stack_.top().arguments_; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); + } + + ::memcpy(&arguments[offset], arg, size); +} + +void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + hip::tls.exec_stack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); +} + +void PlatformState::popExec(ihipExec_t& exec) { + exec = std::move(hip::tls.exec_stack_.top()); + hip::tls.exec_stack_.pop(); +} diff --git a/projects/clr/hipamd/src/hip_platform.hpp b/projects/clr/hipamd/src/hip_platform.hpp new file mode 100644 index 0000000000..109a921547 --- /dev/null +++ b/projects/clr/hipamd/src/hip_platform.hpp @@ -0,0 +1,96 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ +#pragma once + +#include "hip_internal.hpp" +#include "hip_fatbin.hpp" +#include "device/device.hpp" +#include "hip_code_object.hpp" + +namespace hip_impl { + +hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device, + hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz); +} /* namespace hip_impl*/ + +class PlatformState { + amd::Monitor lock_{"Guards PlatformState globals", true}; + + /* Singleton object */ + static PlatformState* platform_; + PlatformState() {} + ~PlatformState() {} + + public: + void init(); + + // Dynamic Code Objects functions + hipError_t loadModule(hipModule_t* module, const char* fname, const void* image = nullptr); + hipError_t unloadModule(hipModule_t hmod); + + hipError_t getDynFunc(hipFunction_t* hfunc, hipModule_t hmod, const char* func_name); + hipError_t getDynGlobalVar(const char* hostVar, hipModule_t hmod, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + hipError_t getDynTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); + + hipError_t registerTexRef(textureReference* texRef, hipModule_t hmod, std::string name); + hipError_t getDynTexGlobalVar(textureReference* texRef, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + + /* Singleton instance */ + static PlatformState& instance() { + if (platform_ == nullptr) { + // __hipRegisterFatBinary() will call this when app starts, thus + // there is no multiple entry issue here. + platform_ = new PlatformState(); + } + return *platform_; + } + + // Static Code Objects functions + hip::FatBinaryInfo** addFatBinary(const void* data); + hipError_t removeFatBinary(hip::FatBinaryInfo** module); + hipError_t digestFatBinary(const void* data, hip::FatBinaryInfo*& programs); + + hipError_t registerStatFunction(const void* hostFunction, hip::Function* func); + hipError_t registerStatGlobalVar(const void* hostVar, hip::Var* var); + hipError_t registerStatManagedVar(hip::Var* var); + + const char* getStatFuncName(const void* hostFunction); + hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); + hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId); + hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + + hipError_t initStatManagedVarDevicePtr(int deviceId); + + // Exec Functions + void setupArgument(const void* arg, size_t size, size_t offset); + void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); + void popExec(ihipExec_t& exec); + + private: + // Dynamic Code Object map, keyin module to get the corresponding object + std::unordered_map dynCO_map_; + hip::StatCO statCO_; // Static Code object var + bool initialized_{false}; + std::unordered_map> texRef_map_; +}; diff --git a/projects/clr/hipamd/src/hip_prof_api.h b/projects/clr/hipamd/src/hip_prof_api.h new file mode 100644 index 0000000000..c552ee5ebc --- /dev/null +++ b/projects/clr/hipamd/src/hip_prof_api.h @@ -0,0 +1,98 @@ +/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_SRC_HIP_PROF_API_H +#define HIP_SRC_HIP_PROF_API_H + +#include +#include +#include +#include +#include + +#if USE_PROF_API +#include "hip/amd_detail/hip_prof_str.h" +#include "platform/prof_protocol.h" + +struct hip_api_trace_data_t { + hip_api_data_t api_data; + uint64_t phase_enter_timestamp; + uint64_t phase_data; + + void (*phase_enter)(hip_api_id_t operation_id, hip_api_trace_data_t* data); + void (*phase_exit)(hip_api_id_t operation_id, hip_api_trace_data_t* data); +}; + +// HIP API callbacks spawner object macro +#define HIP_CB_SPAWNER_OBJECT(operation_id) \ + api_callbacks_spawner_t __api_tracer( \ + [=](auto& api_data) { INIT_CB_ARGS_DATA(operation_id, api_data); }); + +template class api_callbacks_spawner_t { + public: + template api_callbacks_spawner_t(Functor init_cb_args_data) { + static_assert(operation_id >= HIP_API_ID_FIRST && operation_id <= HIP_API_ID_LAST, + "invalid HIP_API operation id"); + + if (auto function = activity_prof::report_activity.load(std::memory_order_relaxed); function && + (enabled_ = function(ACTIVITY_DOMAIN_HIP_API, operation_id, &trace_data_) == 0)) { + activity_prof::correlation_id = trace_data_.api_data.correlation_id; + + if (trace_data_.phase_enter != nullptr) { + init_cb_args_data(trace_data_.api_data); + trace_data_.phase_enter(operation_id, &trace_data_); + } + } + } + + ~api_callbacks_spawner_t() { + if (enabled_) { + if (trace_data_.phase_exit != nullptr) trace_data_.phase_exit(operation_id, &trace_data_); + activity_prof::correlation_id = 0; + } + } + + private: + bool enabled_{false}; + union { + hip_api_trace_data_t trace_data_; + }; +}; + +template <> class api_callbacks_spawner_t { + public: + template api_callbacks_spawner_t(Functor) {} +}; + +#else + +#define HIP_CB_SPAWNER_OBJECT(x) \ + do { \ + } while (false) + +class api_callbacks_table_t { + public: + bool set_activity(hip_api_id_t, activity_sync_callback_t, void*) { return false; } + bool set_callback(hip_api_id_t, activity_rtapi_callback_t, void*) { return false; } +}; + +#endif + +#endif // HIP_SRC_HIP_PROF_API_H diff --git a/projects/clr/hipamd/src/hip_prof_gen.py b/projects/clr/hipamd/src/hip_prof_gen.py new file mode 100755 index 0000000000..6dc5247609 --- /dev/null +++ b/projects/clr/hipamd/src/hip_prof_gen.py @@ -0,0 +1,730 @@ +#!/usr/bin/python + +# Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import os, sys, re +import CppHeaderParser +import filecmp + +PROF_HEADER = "hip_prof_str.h" +OUTPUT = PROF_HEADER +REC_MAX_LEN = 1024 + +# Recursive sources processing +recursive_mode = 0 +# HIP_INIT_API macro patching +hip_patch_mode = 0 +# API matching types check +types_check_mode = 0 +# Private API check +private_check_mode = 0 + +# Messages and errors controll +verbose = 0 +errexit = 0 +inp_file = 'none' +line_num = -1 + +# Verbose message +def message(msg): + if verbose: sys.stdout.write(msg + '\n') + +# Fatal error termination +def error(msg): + if line_num != -1: + msg += ", file '" + inp_file + "', line (" + str(line_num) + ")" + if errexit: + msg = " Error: " + msg + else: + msg = " Warning: " + msg + + sys.stdout.write(msg + '\n') + sys.stderr.write(sys.argv[0] + msg +'\n') + +def fatal(msg): + error(msg) + sys.exit(1) + +############################################################# +# Normalizing API name +def filtr_api_name(name): + name = re.sub(r'\s*$', r'', name); + return name + +def filtr_api_decl(record): + record = re.sub("\s__dparm\([^\)]*\)", r'', record); + record = re.sub("\(void\*\)", r'', record); + return record + +# Normalizing API arguments +def filtr_api_args(args_str): + args_str = re.sub(r'^\s*', r'', args_str); + args_str = re.sub(r'\s*$', r'', args_str); + args_str = re.sub(r'\s*,\s*', r',', args_str); + args_str = re.sub(r'\s+', r' ', args_str); + args_str = re.sub(r'\s*(\*+)\s*', r'\1 ', args_str); + args_str = re.sub(r'(\benum|struct) ', '', args_str); + return args_str + +# Normalizing types +def norm_api_types(type_str): + type_str = re.sub(r'uint32_t', r'unsigned int', type_str) + type_str = re.sub(r'^unsigned$', r'unsigned int', type_str) + return type_str + +# Creating a list of arguments [(type, name), ...] +def list_api_args(args_str): + args_str = filtr_api_args(args_str) + args_list = [] + if args_str != '': + for arg_pair in args_str.split(','): + if arg_pair == 'void': continue + arg_pair = re.sub(r'\s*=\s*\S+$','', arg_pair); + m = re.match("^(.*)\s(\S+)$", arg_pair); + if m: + arg_type = norm_api_types(m.group(1)) + arg_name = m.group(2) + args_list.append((arg_type, arg_name)) + else: + fatal("bad args: args_str: '" + args_str + "' arg_pair: '" + arg_pair + "'") + return args_list; + +# Creating arguments string "type0, type1, ..." +def filtr_api_types(args_str): + args_list = list_api_args(args_str) + types_str = '' + for arg_tuple in args_list: + types_str += arg_tuple[0] + ', ' + return types_str + +# Creating options list [opt0, opt1, ...] +def filtr_api_opts(args_str): + args_list = list_api_args(args_str) + opts_list = [] + for arg_tuple in args_list: + opts_list.append(arg_tuple[1]) + return opts_list + +# Checking for pointer non-void arg type +def pointer_ck(arg_type): + ptr_type = '' + m = re.match(r'(.*)\*$', arg_type) + if m: + ptr_type = m.group(1) + n = re.match(r'(.*)\*\*$', arg_type) + if not n: + ptr_type = re.sub(r'const ', '', ptr_type) + if ptr_type == 'void': ptr_type = '' + return ptr_type +############################################################# +# Parsing API header +# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); +def parse_api(inp_file_p, out): + global inp_file + global line_num + inp_file = inp_file_p + + beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\("); + api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)"); + end_pattern = re.compile("Texture"); + hidden_pattern = re.compile(r'__attribute__\(\(visibility\("hidden"\)\)\)') + nms_open_pattern = re.compile(r'namespace hip_impl {') + nms_close_pattern = re.compile(r'}') + + inp = open(inp_file, 'r') + + found = 0 + hidden = 0 + nms_level = 0; + record = "" + line_num = -1 + + for line in inp.readlines(): + record += re.sub(r'^\s+', r' ', line[:-1]) + line_num += 1 + + if len(record) > REC_MAX_LEN: + fatal("bad record \"" + record + "\"") + + m = beg_pattern.match(line) + if m: + name = m.group(2) + if hidden != 0: + message("api: " + name + " - hidden") + elif nms_level != 0: + message("api: " + name + " - hip_impl") + else: + message("api: " + name) + found = 1 + + if found != 0: + record = re.sub("\s__dparm\([^\)]*\)", '', record); + m = api_pattern.match(record) + if m: + found = 0 + if end_pattern.search(record): continue + api_name = filtr_api_name(m.group(2)) + api_args = m.group(3) + if not api_name in out: + out[api_name] = api_args + else: continue + + hidden = 0 + if hidden_pattern.match(line): hidden = 1 + + if nms_open_pattern.match(line): nms_level += 1 + if (nms_level > 0) and nms_close_pattern.match(line): nms_level -= 1 + if nms_level < 0: + fatal("nms level < 0") + + record = "" + + inp.close() + line_num = -1 +############################################################# +# Parsing API implementation +# hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) { +# HIP_INIT_API(hipSetupArgument, arg, size, offset); +# inp_file - input implementation source file +# api_map - input public API map [] => +# out - output map [] => [opt0, opt1, ...] +def parse_content(inp_file_p, api_map, out): + global hip_patch_mode + global types_check_mode + global private_check_mode + global inp_file + global line_num + inp_file = inp_file_p + + # API method begin pattern + beg_pattern = re.compile("^(hipError_t|const char\s*\*)\s+[^\(]+\("); + # API declaration pattern + decl_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*;"); + # API definition pattern + api_pattern = re.compile("^(hipError_t|const char\s*\*)\s+([^\(]+)\(([^\)]*)\)\s*{"); + # API init macro pattern + init_pattern = re.compile("(^\s*HIP_INIT_API[^\s]*\s*)\((([^,]+)(,.*|)|)(\);|,)\s*$"); + + # Open input file + inp = open(inp_file, 'r') + + # API name + api_name = "" + # Valid public API found flag + api_valid = 0 + # API overload (parameters mismatch) + api_overload = 0 + + # Input file patched content + content = '' + # Sub content for found API defiition + sub_content = '' + # Current record, accumulating several API definition related lines + record = '' + # Current input file line number + line_num = -1 + # API beginning found flag + found = 0 + + # Reading input file + for line in inp.readlines(): + # Accumulating record + record += re.sub(r'^\s+', r' ', line[:-1]) + line_num += 1 + + if len(record) > REC_MAX_LEN: + fatal("bad record \"" + record + "\"") + break; + + # Looking for API begin + if found == 0: + record = re.sub(r'\s*extern\s+"C"\s+', r'', record); + if beg_pattern.match(record): + found = 1 + record = filtr_api_decl(record) + + # Matching API declaration + if found == 1: + if decl_pattern.match(record): + found = 0 + + # Matching API definition + if found == 1: + m = api_pattern.match(record) + # Checking if complete API matched + if m: + found = 2 + api_valid = 0 + api_overload = 0 + api_name = filtr_api_name(m.group(2)) + # Checking if API name is in the API map + if (private_check_mode == 0) or (api_name in api_map): + if not api_name in api_map: api_map[api_name] = '' + # Getting API arguments + api_args = m.group(3) + # Getting etalon arguments from the API map + eta_args = api_map[api_name] + if eta_args == '': + eta_args = api_args + api_map[api_name] = eta_args + # Normalizing API arguments + api_types = filtr_api_types(api_args) + # Normalizing etalon arguments + eta_types = filtr_api_types(eta_args) + if (api_types == eta_types) or ((types_check_mode == 0) and (not api_name in out)): + # API is already found and not is mismatched + if (api_name in out): + fatal("API redefined \"" + api_name + "\", record \"" + record + "\"") + # Set valid public API found flag + api_valid = 1 + # Set output API map with API arguments list + out[api_name] = filtr_api_opts(api_args) + # Register missmatched API methods + else: + api_overload = 1 + # Warning about mismatched API, possible non public overloaded version + api_diff = '\t\t' + inp_file + " line(" + str(line_num) + ")\n\t\tapi: " + api_types + "\n\t\teta: " + eta_types + message("\t" + api_name + ' args mismatch:\n' + api_diff + '\n') + + # API found action + if found == 2: + if hip_patch_mode != 0: + # Looking for INIT macro + m = init_pattern.match(line) + if m: + init_name = api_name + if api_overload == 1: init_name = 'NONE' + init_args = m.group(4) + line = m.group(1) + '(' + init_name + init_args + m.group(5) + '\n' + + m = init_pattern.match(line) + if m: + found = 0 + if api_valid == 1: message("\t" + api_name) + # Ignore if it is initialized as NONE + init_name = m.group(3) + if init_name != 'NONE': + # Check if init name matching API name + # if init_name != api_name: + # fatal("init name mismatch: '" + init_name + "' <> '" + api_name + "'") + # Registering dummy API for non public API if the name in INIT is not NONE + if api_valid == 0: + # If init name is not in public API map then it is private API + # else it was not identified and will be checked on finish + if not init_name in api_map: + if init_name in out: + fatal("API reinit \"" + api_name + "\", record \"" + record + "\"") + out[init_name] = [] + elif re.search('}', line): + found = 0 + # Expect INIT macro for valid public API + # Removing and registering non-conformant APIs with missing HIP_INIT macro + if api_valid == 1: + if api_name in out: + del out[api_name] + del api_map[api_name] + # Registering non-conformant APIs + out['.' + api_name] = 1 + else: + fatal("API is not in out \"" + api_name + "\", record \"" + record + "\"") + + if found != 1: record = "" + content += line + + inp.close() + line_num = -1 + + if len(out) != 0: + return content + else: + return '' + +# src path walk +def parse_src(api_map, src_path, src_patt, out): + global recursive_mode + + pattern = re.compile(src_patt) + src_path = re.sub(r'\s', '', src_path) + for src_dir in src_path.split(':'): + message("Parsing " + src_dir + " for '" + src_patt + "'") + for root, dirs, files in os.walk(src_dir): + for fnm in files: + if pattern.search(fnm): + file = root + '/' + fnm + message(file) + content = parse_content(file, api_map, out); + if (hip_patch_mode != 0) and (content != ''): + f = open(file, 'w') + f.write(content) + f.close() + if recursive_mode == 0: break +############################################################# +# Generating profiling primitives header +# api_map - public API map [] => [(type, name), ...] +# callback_ids - public API callback IDs list (name, callback_id) +# opts_map - opts map [] => [opt0, opt1, ...] +def generate_prof_header(f, api_map, callback_ids, opts_map): + # Private API list + priv_lst = [] + + f.write('// Generated file. DO NOT EDIT.\n') + f.write('//\n') + f.write('// This file is automatically generated by the ' + os.path.basename(__file__) + ' script.\n') + f.write('// If changes are required, run the script and commit the updated file.\n\n') + f.write('#ifndef _HIP_PROF_STR_H\n'); + f.write('#define _HIP_PROF_STR_H\n'); + f.write('#define HIP_PROF_VER 1\n') + + # Check for non-public API + for name in sorted(opts_map.keys()): + if not name in api_map: + opts_lst = opts_map[name] + if len(opts_lst) != 0: + fatal("bad dummy API \"" + name + "\", args: " + str(opts_lst)) + priv_lst.append(name) + message("Private: " + name) + + # Generating the callbacks ID enumaration + f.write('\n// HIP API callbacks ID enumeration\n') + f.write('enum hip_api_id_t {\n') + f.write(' HIP_API_ID_NONE = 0,\n') + f.write(' HIP_API_ID_FIRST = 1,\n') + + cb_id_map = {} + last_cb_id = 0 + for name, cb_id in callback_ids: + if not name in api_map: + f.write(' HIP_API_ID_RESERVED_' + str(cb_id) + ' = ' + str(cb_id) + ',\n') + else: + f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n') + cb_id_map[name] = cb_id + if cb_id > last_cb_id: last_cb_id = cb_id + + for name in sorted(api_map.keys()): + if not name in cb_id_map: + last_cb_id += 1 + f.write(' HIP_API_ID_' + name + ' = ' + str(last_cb_id) + ',\n') + + f.write(' HIP_API_ID_LAST = ' + str(last_cb_id) + ',\n') + f.write('\n') + for name in sorted(priv_lst): + f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NONE,\n') + f.write('};\n') + + # Generating the method to return API name by ID + f.write('\n// Return the HIP API string for a given callback ID\n') + f.write('static inline const char* hip_api_name(const uint32_t id) {\n') + f.write(' switch(id) {\n') + for name in sorted(api_map.keys()): + f.write(' case HIP_API_ID_' + name + ': return "' + name + '";\n') + f.write(' };\n') + f.write(' return "unknown";\n') + f.write('};\n') + + # Generating the method for querying API ID by name + f.write('\n') + f.write('#include \n'); + f.write('// Return the HIP API callback ID for a given name\n') + f.write('static inline uint32_t hipApiIdByName(const char* name) {\n') + for name in sorted(api_map.keys()): + f.write(' if (strcmp("' + name + '", name) == 0) return HIP_API_ID_' + name + ';\n') + f.write(' return HIP_API_ID_NONE;\n') + f.write('}\n') + + # Generating the callbacks data structure + f.write('\n// HIP API callbacks data structures\n') + f.write( + 'typedef struct hip_api_data_s {\n' + + ' uint64_t correlation_id;\n' + + ' uint32_t phase;\n' + + ' union {\n' + ) + for name in sorted(api_map.keys()): + args = api_map[name] + if len(args) != 0: + f.write(' struct {\n') + for arg_tuple in args: + arg_type = arg_tuple[0] + ptr_type = pointer_ck(arg_type) + arg_name = arg_tuple[1] + # Checking for enum type + if arg_type == "hipLimit_t": arg_type = 'enum ' + arg_type + # Structuer field code + f.write(' ' + arg_type + ' ' + arg_name + ';\n') + if ptr_type != '': + f.write(' ' + ptr_type + ' ' + arg_name + '__val;\n') + f.write(' } ' + name + ';\n') + f.write( + ' } args;\n' + + ' uint64_t *phase_data;\n' + + '} hip_api_data_t;\n' + ) + + # Generating the callbacks args data filling macros + f.write('\n// HIP API callbacks args data filling macros\n') + for name in sorted(api_map.keys()): + args = api_map[name] + f.write('// ' + name + str(args) + '\n') + f.write('#define INIT_' + name + '_CB_ARGS_DATA(cb_data) { \\\n') + if name in opts_map: + opts_list = opts_map[name] + if len(args) != len(opts_list): + fatal("\"" + name + "\" API args and opts mismatch, args: " + str(args) + ", opts: " + str(opts_list)) + # API args iterating: + # type is args[][0] + # name is args[][1] + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_type = arg_tuple[0] + ptr_type = pointer_ck(arg_type) + fld_name = arg_tuple[1] + opt_name = opts_list[ind] + if arg_type == "const char*": + f.write(' cb_data.args.' + name + '.' + fld_name + ' = (' + opt_name + ') ? strdup(' + opt_name + ') : NULL; \\\n') + else: + f.write(' cb_data.args.' + name + '.' + fld_name + ' = (' + arg_type + ')' + opt_name + '; \\\n') + f.write('};\n') + f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') + + # Generating macro for non-public API + f.write('\n// Macros for non-public API primitives\n') + for name in sorted(priv_lst): + f.write('// ' + name + '()\n') + f.write('#define INIT_'+ name + '_CB_ARGS_DATA(cb_data) {};\n') + f.write('\n#define INIT_NONE_CB_ARGS_DATA(cb_data) {};\n') + + f.write('\n#if HIP_PROF_HIP_API_STRING\n') + # Generating the method for the API args filling + f.write('// HIP API args filling helper\n') + f.write('static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {\n') + f.write(' switch (id) {\n') + for name in sorted(api_map.keys()): + args = api_map[name] + f.write('// ' + name + str(args) + '\n') + f.write(' case HIP_API_ID_' + name + ':\n') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_type = arg_tuple[0] + ptr_type = pointer_ck(arg_type) + fld_name = arg_tuple[1] + var_name = 'data->args.' + name + '.' + fld_name + if arg_type == "char*": + f.write(' ' + var_name + ' = (' + var_name + ') ? strdup(' + var_name + ') : NULL;\n') + else: + if ptr_type != '': + f.write(' if (' + var_name + ') ' + var_name + '__val = *(' + var_name + ');\n') + f.write(' break;\n') + f.write(' default: break;\n') + f.write(' };\n') + f.write('}\n') + + # Generating the method for the API string, name and parameters + f.write('\n') + f.write('#include \n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name in sorted(api_map.keys()): + args = api_map[name] + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '(";\n') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_type = arg_tuple[0] + ptr_type = pointer_ck(arg_type) + arg_name = arg_tuple[1] + var_name = 'data->args.' + name + '.' + arg_name + delim = '' if ind == 0 else ', '; + oss_stream = 'oss << "' + delim + arg_name + '=' + line_shift = ' ' + f.write(line_shift) + if ptr_type != '': + f.write('if (' + var_name + ' == NULL) ' + oss_stream + 'NULL";\n' + line_shift + 'else { ') + if pointer_ck(ptr_type) != '': + f.write(oss_stream + '"; roctracer::hip_support::detail::operator<<(oss, (void*)' + var_name + '__val' + '); }\n') + else: + f.write(oss_stream + '"; roctracer::hip_support::detail::operator<<(oss, ' + var_name + '__val' + '); }\n') + else: + f.write(oss_stream + '"; roctracer::hip_support::detail::operator<<(oss, ' + var_name + ');\n') + f.write(' oss << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('}\n') + f.write('#endif // HIP_PROF_HIP_API_STRING\n') + + f.write('#endif // _HIP_PROF_STR_H\n'); + +############################################################# +# main +while len(sys.argv) > 1: + if not re.match(r'-', sys.argv[1]): break + + if (sys.argv[1] == '-v'): + verbose = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-r'): + recursive_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-t'): + types_check_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '--priv'): + private_check_mode = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-e'): + errexit = 1 + sys.argv.pop(1) + + if (sys.argv[1] == '-p'): + hip_patch_mode = 1 + sys.argv.pop(1) + +# Usage +if (len(sys.argv) < 4): + fatal ("Usage: " + sys.argv[0] + " [-v] []\n" + + " -v - verbose messages\n" + + " -r - process source directory recursively\n" + + " -t - API types matching check\n" + + " --priv - private API check\n" + + " -e - on error exit mode\n" + + " -p - HIP_INIT_API macro patching mode\n" + + "\n" + + " Example:\n" + + " $ " + sys.argv[0] + " -v -p -t --priv ../hip/include/hip/hip_runtime_api.h" + + " ./src ./include/hip/amd_detail/hip_prof_str.h ./include/hip/amd_detail/hip_prof_str.h.new"); + +# API header file given as an argument +src_pat = "\.cpp$" +api_hfile = sys.argv[1] +if not os.path.isfile(api_hfile): + fatal("input file '" + api_hfile + "' not found") + +# Srcs directory given as an argument +src_dir = sys.argv[2] +if not os.path.isdir(src_dir): + fatal("src directory " + src_dir + "' not found") + +# Current hip_prof_str include +INPUT = sys.argv[3] +if not os.path.isfile(INPUT): + fatal("input file '" + INPUT + "' not found") + +if len(sys.argv) > 4: OUTPUT = sys.argv[4] + +# API declaration map +api_map = { + 'hipSetupArgument': '', + 'hipMalloc3DArray': '', + 'hipFuncGetAttribute': '', + 'hipMemset3DAsync': '', + 'hipKernelNameRef': '', + 'hipStreamGetPriority': '', + 'hipLaunchByPtr': '', + 'hipFreeHost': '', + 'hipGetErrorName': '', + 'hipMemcpy3DAsync': '', + 'hipMemcpyParam2DAsync': '', + 'hipArray3DCreate': '', + 'hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags': '', + 'hipOccupancyMaxPotentialBlockSize': '', + 'hipMallocManaged': '', + 'hipOccupancyMaxActiveBlocksPerMultiprocessor': '', + 'hipGetErrorString': '', + 'hipMallocHost': '', + 'hipModuleLoadDataEx': '', + 'hipGetDeviceProperties': '', + 'hipConfigureCall': '', + 'hipHccModuleLaunchKernel': '', + 'hipExtModuleLaunchKernel': '', +} +# API options map +opts_map = {} + +# Parsing API header +parse_api(api_hfile, api_map) + +# Parsing sources +parse_src(api_map, src_dir, src_pat, opts_map) + +try: + cppHeader = CppHeaderParser.CppHeader(INPUT) +except CppHeaderParser.CppParseError as e: + print(e) + sys.exit(1) + +# Callback IDs +api_callback_ids = [] + +for enum in cppHeader.enums: + if enum['name'] == 'hip_api_id_t': + for value in enum['values']: + if value['name'] == 'HIP_API_ID_NONE' or value['name'] == 'HIP_API_ID_FIRST': + continue + if value['name'] == 'HIP_API_ID_LAST': + break + m = re.match(r'HIP_API_ID_(\S*)', value['name']) + if m: + api_callback_ids.append((m.group(1), value['value'])) + break + +# Checking for non-conformant APIs with missing HIP_INIT macro +for name in list(opts_map.keys()): + m = re.match(r'\.(\S*)', name) + if m: + message("Init missing: " + m.group(1)) + del opts_map[name] + +# Converting api map to map of lists +# Checking for not found APIs +not_found = 0 +if len(opts_map) != 0: + for name in api_map.keys(): + args_str = api_map[name]; + api_map[name] = list_api_args(args_str) + if not name in opts_map: + error("implementation not found: " + name) + not_found += 1 +if not_found != 0: + error(str(not_found) + " API calls missing in interception layer") + +# The output subdirectory seems to exist or not depending on the +# version of cmake. +output_dir = os.path.dirname(OUTPUT) +if not os.path.exists(output_dir): + os.makedirs(output_dir) + +# Generating output header file +with open(OUTPUT, 'w') as f: + generate_prof_header(f, api_map, api_callback_ids, opts_map) + +if not filecmp.cmp(INPUT, OUTPUT): + message("Warning: \"" + INPUT + "\" needs to be re-generated and checked-in with the current changes") + +# Successfull exit +sys.exit(0) diff --git a/projects/clr/hipamd/src/hip_profile.cpp b/projects/clr/hipamd/src/hip_profile.cpp new file mode 100644 index 0000000000..d557c5c698 --- /dev/null +++ b/projects/clr/hipamd/src/hip_profile.cpp @@ -0,0 +1,40 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" + +hipError_t hipProfilerStart() { + HIP_INIT_API(hipProfilerStart); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} + + +hipError_t hipProfilerStop() { + HIP_INIT_API(hipProfilerStop); + + assert(0 && "Unimplemented"); + + HIP_RETURN(hipErrorNotSupported); +} diff --git a/projects/clr/hipamd/src/hip_runtime.cpp b/projects/clr/hipamd/src/hip_runtime.cpp new file mode 100644 index 0000000000..78eb4acace --- /dev/null +++ b/projects/clr/hipamd/src/hip_runtime.cpp @@ -0,0 +1,65 @@ +/* Copyright (c) 2008 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include "thread/thread.hpp" + +#include +#include + +void ihipDestroyDevice(); + +#ifdef DEBUG +static int reportHook(int reportType, char* message, int* returnValue) { + if (returnValue) { + *returnValue = 1; + } + std::cerr << message; + ::exit(3); + return TRUE; +} +#endif // DEBUG + +extern "C" BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID reserved) { + switch (reason) { + case DLL_PROCESS_ATTACH: +#ifdef DEBUG + if (!::getenv("AMD_OCL_ENABLE_MESSAGE_BOX")) { + _CrtSetReportHook(reportHook); + _set_error_mode(_OUT_TO_STDERR); + } +#endif // DEBUG + break; + case DLL_PROCESS_DETACH: { + amd::Thread* thread = amd::Thread::current(); + if (!(thread != nullptr || + ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { + return true; + } + ihipDestroyDevice(); + } break; + case DLL_THREAD_DETACH: { + amd::Thread* thread = amd::Thread::current(); + delete thread; + } break; + default: + break; + } + return true; +} diff --git a/projects/clr/hipamd/src/hip_stream.cpp b/projects/clr/hipamd/src/hip_stream.cpp new file mode 100644 index 0000000000..45ddd1665b --- /dev/null +++ b/projects/clr/hipamd/src/hip_stream.cpp @@ -0,0 +1,821 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_event.hpp" +#include "thread/monitor.hpp" +#include "hip_prof_api.h" + +static amd::Monitor streamSetLock{"Guards global stream set"}; +static std::unordered_set streamSet; +namespace hip { + +// ================================================================================================ +Stream::Stream(hip::Device* dev, Priority p, unsigned int f, bool null_stream, + const std::vector& cuMask, hipStreamCaptureStatus captureStatus) + : amd::HostQueue(*dev->asContext(), *dev->devices()[0], 0, amd::CommandQueue::RealTimeDisabled, + convertToQueuePriority(p), cuMask), + lock_("Stream Callback lock"), + device_(dev), + priority_(p), + flags_(f), + null_(null_stream), + cuMask_(cuMask), + captureStatus_(captureStatus), + originStream_(false), + captureID_(0) + { + amd::ScopedLock lock(streamSetLock); + streamSet.insert(this); + } + +// ================================================================================================ +hipError_t Stream::EndCapture() { + for (auto event : captureEvents_) { + hip::Event* e = reinterpret_cast(event); + e->EndCapture(); + } + for (auto stream : parallelCaptureStreams_) { + hip::Stream* s = reinterpret_cast(stream); + hipError_t err = s->EndCapture(); + assert(err == hipSuccess); + } + captureStatus_ = hipStreamCaptureStatusNone; + pCaptureGraph_ = nullptr; + originStream_ = false; + parentStream_ = nullptr; + lastCapturedNodes_.clear(); + parallelCaptureStreams_.clear(); + captureEvents_.clear(); + + return hipSuccess; +} + +// ================================================================================================ +bool Stream::Create() { + return create(); +} + +// ================================================================================================ +bool Stream::terminate() { + { + amd::ScopedLock lock(streamSetLock); + streamSet.erase(this); + } + return HostQueue::terminate(); +} + +// ================================================================================================ +bool isValid(hipStream_t& stream) { + // NULL stream is always valid + if (stream == nullptr) { + return true; + } + + if (hipStreamPerThread == stream) { + getStreamPerThread(stream); + } + + hip::Stream* s = reinterpret_cast(stream); + amd::ScopedLock lock(streamSetLock); + if (streamSet.find(s) == streamSet.end()) { + return false; + } + return true; +} + +// ================================================================================================ +int Stream::DeviceId() const { + return device_->deviceId(); +} + +int Stream::DeviceId(const hipStream_t hStream) { + // Copying locally into non-const variable just to get const away + hipStream_t inputStream = hStream; + if (!hip::isValid(inputStream)) { + //return invalid device id + return -1; + } + hip::Stream* s = reinterpret_cast(inputStream); + int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice(); + assert(deviceId >= 0 && deviceId < static_cast(g_devices.size())); + return deviceId; +} + +void Stream::syncNonBlockingStreams(int deviceId) { + amd::ScopedLock lock(streamSetLock); + for (auto& it : streamSet) { + if (it->Flags() & hipStreamNonBlocking) { + if (it->DeviceId() == deviceId) { + it->finish(); + } + } + } +} + +bool Stream::StreamCaptureBlocking() { + amd::ScopedLock lock(streamSetLock); + for (auto& it : streamSet) { + if (it->GetCaptureStatus() == hipStreamCaptureStatusActive && it->Flags() != hipStreamNonBlocking) { + return true; + } + } + return false; +} + +void Stream::destroyAllStreams(int deviceId) { + std::vector toBeDeleted; + { + amd::ScopedLock lock(streamSetLock); + for (auto& it : streamSet) { + if (it->Null() == false && it->DeviceId() == deviceId) { + toBeDeleted.push_back(it); + } + } + } + for (auto& it : toBeDeleted) { + it->release(); + } +} + +bool Stream::StreamCaptureOngoing(void) { + return (g_allCapturingStreams.empty() == true) ? false : true; +} + +bool Stream::existsActiveStreamForDevice(hip::Device* device) { + + amd::ScopedLock lock(streamSetLock); + + for (const auto& active_stream : streamSet) { + if ((active_stream->GetDevice() == device) && + active_stream->GetQueueStatus()) { + return true; + } + } + return false; +} + +};// hip namespace + +// ================================================================================================ +void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream) { + amd::Command::EventWaitList eventWaitList(0); + bool submitMarker = 0; + { + amd::ScopedLock lock(streamSetLock); + + for (const auto& active_stream : streamSet) { + // If it's the current device + if ((&active_stream->device() == &blocking_stream->device()) && + // Make sure it's a default stream + ((active_stream->Flags() & hipStreamNonBlocking) == 0) && + // and it's not the current stream + (active_stream != blocking_stream) && + // check for a wait on the null stream + (active_stream->Null() == wait_null_stream)) { + // Get the last valid command + amd::Command* command = active_stream->getLastQueuedCommand(true); + if (command != nullptr) { + amd::Event& event = command->event(); + // Check HW status of the ROCcrl event. + // Note: not all ROCclr modes support HW status + bool ready = active_stream->device().IsHwEventReady(event); + if (!ready) { + ready = (command->status() == CL_COMPLETE); + } + submitMarker |= active_stream->vdev()->isFenceDirty(); + // Check the current active status + if (!ready) { + command->notifyCmdQueue(); + eventWaitList.push_back(command); + } else { + command->release(); + } + } + // Nullstream, hence there is nothing else to wait + if (wait_null_stream) { + break; + } + } + } + } + + // Check if we have to wait anything + if (eventWaitList.size() > 0 || submitMarker) { + amd::Command* command = new amd::Marker(*blocking_stream, kMarkerDisableFlush, eventWaitList); + if (command != nullptr) { + command->enqueue(); + command->release(); + } + + //Reset the dirty flag for all streams now that the marker is submitted + for (const auto& stream : streamSet) { + amd::HostQueue* active_queue = stream->asHostQueue(); + if (active_queue->vdev()->isFenceDirty()) { + active_queue->vdev()->resetFenceDirty(); + } + } + } + + // Release all active commands. It's safe after the marker was enqueued + for (const auto& it : eventWaitList) { + it->release(); + } +} + +// ================================================================================================ +void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { + StreamCallback* cbo = reinterpret_cast(user_data); + cbo->callback(); + delete cbo; +} + +// ================================================================================================ +static hipError_t ihipStreamCreate(hipStream_t* stream, + unsigned int flags, hip::Stream::Priority priority, + const std::vector& cuMask = {}) { + if (flags != hipStreamDefault && flags != hipStreamNonBlocking) { + return hipErrorInvalidValue; + } + hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags, false, cuMask); + + if (hStream == nullptr) { + return hipErrorOutOfMemory; + } + else if (!hStream->Create()) { + hStream->release(); + return hipErrorOutOfMemory; + } + + *stream = reinterpret_cast(hStream); + + return hipSuccess; +} + +// ================================================================================================ + +stream_per_thread::stream_per_thread() { + m_streams.resize(g_devices.size()); + for (auto &stream : m_streams) { + stream = nullptr; + } +} + +stream_per_thread::~stream_per_thread() { + for (auto &stream:m_streams) { + if (stream != nullptr && hip::isValid(stream)) { + reinterpret_cast(stream)->release(); + stream = nullptr; + } + } +} + +hipStream_t stream_per_thread::get() { + hip::Device* device = hip::getCurrentDevice(); + int currDev = device->deviceId(); + // This is to make sure m_streams is not empty + if (m_streams.empty()) { + m_streams.resize(g_devices.size()); + for (auto &stream : m_streams) { + stream = nullptr; + } + } + // There is a scenario where hipResetDevice destroys stream per thread + // hence isValid check is required to make sure only valid stream is used + if (m_streams[currDev] == nullptr || !hip::isValid(m_streams[currDev])) { + hipError_t status = ihipStreamCreate(&m_streams[currDev], hipStreamDefault, + hip::Stream::Priority::Normal); + if (status != hipSuccess) { + DevLogError("Stream creation failed\n"); + } + } + return m_streams[currDev]; +} + + +// ================================================================================================ +void getStreamPerThread(hipStream_t& stream) { + if (stream == hipStreamPerThread) { + stream = hip::tls.stream_per_thread_obj_.get(); + } +} + +// ================================================================================================ +hipStream_t getPerThreadDefaultStream() { + // Function to get per thread default stream + // More about the usecases yet to come + hipStream_t stream = hipStreamPerThread; + getStreamPerThread(stream); + return stream; +} + +// ================================================================================================ +hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { + HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipStreamCreate(stream, flags, hip::Stream::Priority::Normal), *stream); +} + +// ================================================================================================ +hipError_t hipStreamCreate(hipStream_t *stream) { + HIP_INIT_API(hipStreamCreate, stream); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, hip::Stream::Priority::Normal), *stream); +} + +// ================================================================================================ +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { + HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Stream::Priority streamPriority; + if (priority <= hip::Stream::Priority::High) { + streamPriority = hip::Stream::Priority::High; + } else if (priority >= hip::Stream::Priority::Low) { + streamPriority = hip::Stream::Priority::Low; + } else { + streamPriority = hip::Stream::Priority::Normal; + } + + HIP_RETURN(ihipStreamCreate(stream, flags, streamPriority), *stream); +} + +// ================================================================================================ +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { + HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); + + if (leastPriority != nullptr) { + *leastPriority = hip::Stream::Priority::Low; + } + if (greatestPriority != nullptr) { + *greatestPriority = hip::Stream::Priority::High; + } + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipStreamGetFlags_common(hipStream_t stream, unsigned int* flags) { + if ((flags != nullptr) && (stream != nullptr)) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + *flags = reinterpret_cast(stream)->Flags(); + } else { + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +// ================================================================================================ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) { + HIP_INIT_API(hipStreamGetFlags, stream, flags); + HIP_RETURN(hipStreamGetFlags_common(stream, flags)); +} + +// ================================================================================================ +hipError_t hipStreamGetFlags_spt(hipStream_t stream, unsigned int* flags) { + HIP_INIT_API(hipStreamGetFlags, stream, flags); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamGetFlags_common(stream, flags)); +} + +// ================================================================================================ +hipError_t hipStreamSynchronize_common(hipStream_t stream) { + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorContextIsDestroyed); + } + if (stream != nullptr) { + // If still capturing return error + if (hip::Stream::StreamCaptureOngoing() == true) { + HIP_RETURN(hipErrorStreamCaptureUnsupported); + } + } + // Wait for the current host queue + hip::getStream(stream)->finish(); + return hipSuccess; +} + +// ================================================================================================ +hipError_t hipStreamSynchronize(hipStream_t stream) { + HIP_INIT_API(hipStreamSynchronize, stream); + HIP_RETURN(hipStreamSynchronize_common(stream)); +} + +// ================================================================================================ +hipError_t hipStreamSynchronize_spt(hipStream_t stream) { + HIP_INIT_API(hipStreamSynchronize, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamSynchronize_common(stream)); +} + +// ================================================================================================ +hipError_t hipStreamDestroy(hipStream_t stream) { + HIP_INIT_API(hipStreamDestroy, stream); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + if (stream == hipStreamPerThread) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorContextIsDestroyed); + } + hip::Stream* s = reinterpret_cast(stream); + if (s->GetCaptureStatus() != hipStreamCaptureStatusNone) { + if (s->GetParentStream() != nullptr) { + reinterpret_cast(s->GetParentStream())->EraseParallelCaptureStream(stream); + } + auto error = s->EndCapture(); + } + s->GetDevice()->RemoveStreamFromPools(s); + + amd::ScopedLock lock(g_captureStreamsLock); + const auto& g_it = std::find(g_captureStreams.begin(), g_captureStreams.end(), s); + if (g_it != g_captureStreams.end()) { + g_captureStreams.erase(g_it); + } + const auto& l_it = std::find(hip::tls.capture_streams_.begin(), + hip::tls.capture_streams_.end(), s); + if (l_it != hip::tls.capture_streams_.end()) { + hip::tls.capture_streams_.erase(l_it); + } + s->release(); + + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +void WaitThenDecrementSignal(hipStream_t stream, hipError_t status, void* user_data) { + CallbackData* data = reinterpret_cast(user_data); + int offset = data->previous_read_index % IPC_SIGNALS_PER_EVENT; + while (data->shmem->read_index < data->previous_read_index + IPC_SIGNALS_PER_EVENT && + data->shmem->signal[offset] != 0) { + amd::Os::sleep(1); + } + delete data; +} + +// ================================================================================================ +hipError_t hipStreamWaitEvent_common(hipStream_t stream, hipEvent_t event, unsigned int flags) { + EVENT_CAPTURE(hipStreamWaitEvent, event, stream, flags); + + if (event == nullptr) { + return hipErrorInvalidHandle; + } + + if (flags != 0) { + return hipErrorInvalidValue; + } + + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + + hip::Event* e = reinterpret_cast(event); + return e->streamWait(stream, flags); +} + +// ================================================================================================ +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { + HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); + HIP_RETURN(hipStreamWaitEvent_common(stream, event, flags)); +} + +// ================================================================================================ +hipError_t hipStreamWaitEvent_spt(hipStream_t stream, hipEvent_t event, unsigned int flags) { + HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamWaitEvent_common(stream, event, flags)); +} + +// ================================================================================================ +hipError_t hipStreamQuery_common(hipStream_t stream) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + if (stream != nullptr) { + // If still capturing return error + if (hip::Stream::StreamCaptureOngoing() == true) { + HIP_RETURN(hipErrorStreamCaptureUnsupported); + } + } + hip::Stream* hip_stream = hip::getStream(stream); + + amd::Command* command = hip_stream->getLastQueuedCommand(true); + if (command == nullptr) { + // Nothing was submitted to the queue + return hipSuccess; + } + + amd::Event& event = command->event(); + if (command->type() != 0) { + event.notifyCmdQueue(); + } + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status + bool ready = command->queue()->device().IsHwEventReady(event); + if (!ready) { + ready = (command->status() == CL_COMPLETE); + } + hipError_t status = ready ? hipSuccess : hipErrorNotReady; + command->release(); + return status; +} + +// ================================================================================================ +hipError_t hipStreamQuery(hipStream_t stream) { + HIP_INIT_API(hipStreamQuery, stream); + HIP_RETURN(hipStreamQuery_common(stream)); +} + +// ================================================================================================ +hipError_t hipStreamQuery_spt(hipStream_t stream) { + HIP_INIT_API(hipStreamQuery, stream); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamQuery_common(stream)); +} + +hipError_t streamCallback_common(hipStream_t stream, StreamCallback* cbo, void* userData) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + + hip::Stream* hip_stream = hip::getStream(stream); + amd::Command* last_command = hip_stream->getLastQueuedCommand(true); + amd::Command::EventWaitList eventWaitList; + if (last_command != nullptr) { + eventWaitList.push_back(last_command); + } + amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList); + if (command == nullptr) { + return hipErrorInvalidValue; + } + if ((cbo == nullptr) || !command->setCallback(CL_COMPLETE, ihipStreamCallback, cbo)) { + command->release(); + if (last_command != nullptr) { + last_command->release(); + } + return hipErrorInvalidHandle; + } + command->enqueue(); + // @note: don't release the command here, because it will be released after HIP callback + if (last_command != nullptr) { + last_command->release(); + } + // Extra marker is required for HW event check, which is done before the callback is finished. + // Add the new barrier to stall the stream, until the callback is done + eventWaitList.clear(); + eventWaitList.push_back(command); + amd::Command* block_command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList); + if (block_command == nullptr) { + return hipErrorInvalidValue; + } + block_command->enqueue(); + block_command->release(); + + // Release the callback marker + command->release(); + // Notify the command queue about a possible waiter for the calback + block_command->notifyCmdQueue(); + + return hipSuccess; +} + +// ================================================================================================ +hipError_t hipStreamAddCallback_common(hipStream_t stream, hipStreamCallback_t callback, + void* userData, unsigned int flags) { + // flags - Reserved for future use, must be 0 + if (callback == nullptr || flags != 0) { + return hipErrorInvalidValue; + } + StreamCallback* cbo = new StreamAddCallback(stream, callback, userData); + return streamCallback_common(stream, cbo, userData); +} + +// ================================================================================================ +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags) { + HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); + HIP_RETURN(hipStreamAddCallback_common(stream, callback, userData, flags)); +} + +// ================================================================================================ +hipError_t hipStreamAddCallback_spt(hipStream_t stream, hipStreamCallback_t callback, + void* userData, unsigned int flags) { + HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamAddCallback_common(stream, callback, userData, flags)); +} + +// ================================================================================================ +hipError_t hipLaunchHostFunc_common(hipStream_t stream, hipHostFn_t fn, void* userData) { + STREAM_CAPTURE(hipLaunchHostFunc, stream, fn, userData); + if (fn == nullptr) { + return hipErrorInvalidValue; + } + StreamCallback* cbo = new LaunchHostFuncCallback(fn, userData); + return streamCallback_common(stream, cbo, userData); +} + +// ================================================================================================ +hipError_t hipLaunchHostFunc_spt(hipStream_t stream, hipHostFn_t fn, void* userData) { + HIP_INIT_API(hipLaunchHostFunc, stream, fn, userData); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipLaunchHostFunc_common(stream, fn, userData)); +} + +// ================================================================================================ +hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData) { + HIP_INIT_API(hipLaunchHostFunc, stream, fn, userData); + if (stream == nullptr && (hip::Stream::StreamCaptureOngoing() == true)) { + HIP_RETURN(hipErrorStreamCaptureImplicit); + } + HIP_RETURN(hipLaunchHostFunc_common(stream, fn, userData)); +} + +// ================================================================================================ +hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, + const uint32_t* cuMask) { + HIP_INIT_API(hipExtStreamCreateWithCUMask, stream, cuMaskSize, cuMask); + + if (stream == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + if (cuMaskSize == 0 || cuMask == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + const std::vector cuMaskv(cuMask, cuMask + cuMaskSize); + + HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, hip::Stream::Priority::Normal, cuMaskv), *stream); +} + +// ================================================================================================ +hipError_t hipStreamGetPriority_common(hipStream_t stream, int* priority) { + if ((priority != nullptr) && (stream == nullptr)) { + *priority = 0; + return hipSuccess; + } + + if ((priority != nullptr) && (stream != nullptr)) { + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + *priority = static_cast(reinterpret_cast(stream)->GetPriority()); + } else { + return hipErrorInvalidValue; + } + + return hipSuccess; +} + +// ================================================================================================ +hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) { + HIP_INIT_API(hipStreamGetPriority, stream, priority); + HIP_RETURN(hipStreamGetPriority_common(stream, priority)); +} + +// ================================================================================================ +hipError_t hipStreamGetPriority_spt(hipStream_t stream, int* priority) { + HIP_INIT_API(hipStreamGetPriority, stream, priority); + PER_THREAD_DEFAULT_STREAM(stream); + HIP_RETURN(hipStreamGetPriority_common(stream, priority)); +} + +// ================================================================================================ +hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask) { + HIP_INIT_API(hipExtStreamGetCUMask, stream, cuMaskSize, cuMask); + + if (cuMask == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + int deviceId = hip::getCurrentDevice()->deviceId(); + auto* deviceHandle = g_devices[deviceId]->devices()[0]; + const auto& info = deviceHandle->info(); + + // find the minimum cuMaskSize required to present the CU mask bit-array in a patch of 32 bits + // and return error if the cuMaskSize argument is less than cuMaskSizeRequired + uint32_t cuMaskSizeRequired = info.maxComputeUnits_ / 32 + + ((info.maxComputeUnits_ % 32) ? 1 : 0); + + if (cuMaskSize < cuMaskSizeRequired) { + HIP_RETURN(hipErrorInvalidValue); + } + + // make a default CU mask bit-array where all CUs are active + // this default mask will be returned when there is no + // custom or global CU mask defined + std::vector defaultCUMask; + uint32_t temp = 0; + uint32_t bit_index = 0; + for (uint32_t i = 0; i < info.maxComputeUnits_; i++) { + temp |= 1UL << bit_index; + if (bit_index >= 32) { + defaultCUMask.push_back(temp); + temp = 0; + bit_index = 0; + temp |= 1UL << bit_index; + } + bit_index += 1; + } + if (bit_index != 0) { + defaultCUMask.push_back(temp); + } + + // if the stream is null then either return globalCUMask_ (if it is defined) + // or return defaultCUMask + if (stream == nullptr || stream == hipStreamPerThread) { + if (info.globalCUMask_.size() != 0) { + std::copy(info.globalCUMask_.begin(), info.globalCUMask_.end(), cuMask); + } else { + std::copy(defaultCUMask.begin(), defaultCUMask.end(), cuMask); + } + } else { + // if the stream is not null then get the stream's CU mask and return one of the below cases + // case1 if globalCUMask_ is defined then return the AND of globalCUMask_ and stream's CU mask + // case2 if globalCUMask_ is not defined then retuen AND of defaultCUMask and stream's CU mask + // in both cases above if stream's CU mask is empty then either globalCUMask_ (for case1) + // or defaultCUMask(for case2) will be returned + std::vector streamCUMask; + streamCUMask = reinterpret_cast(stream)->GetCUMask(); + std::vector mask = {}; + if (info.globalCUMask_.size() != 0) { + for (uint32_t i = 0; i < std::min(streamCUMask.size(), info.globalCUMask_.size()); i++) { + mask.push_back(streamCUMask[i] & info.globalCUMask_[i]); + } + } else { + for (uint32_t i = 0; i < std::min(streamCUMask.size(), defaultCUMask.size()); i++) { + mask.push_back(streamCUMask[i] & defaultCUMask[i]); + } + // check to make sure after ANDing streamCUMask (custom-defined) with global CU mask, + //we have non-zero mask, oterwise just return either globalCUMask_ or defaultCUMask + bool zeroCUMask = true; + for (auto m : mask) { + if (m != 0) { + zeroCUMask = false; + break; + } + } + if (zeroCUMask) { + mask = (info.globalCUMask_.size() != 0) ? info.globalCUMask_ : defaultCUMask; + } + std::copy(mask.begin(), mask.end(), cuMask); + } + } + HIP_RETURN(hipSuccess); +} + +// ================================================================================================ +hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device) { + HIP_INIT_API(hipStreamGetDevice, stream, device); + + if (device == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorContextIsDestroyed); + } + + if (stream == nullptr) { // handle null stream + // null stream is associated with current device, return the device id associated with the + // current device + *device = hip::getCurrentDevice()->deviceId(); + } else { + getStreamPerThread(stream); + *device = reinterpret_cast(stream)->DeviceId(); + } + + HIP_RETURN(hipSuccess); +} diff --git a/projects/clr/hipamd/src/hip_stream_ops.cpp b/projects/clr/hipamd/src/hip_stream_ops.cpp new file mode 100644 index 0000000000..7032c4c65d --- /dev/null +++ b/projects/clr/hipamd/src/hip_stream_ops.cpp @@ -0,0 +1,137 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "platform/command_utils.hpp" + +hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void* ptr, + uint64_t value, uint64_t mask, unsigned int flags, size_t sizeBytes) { + size_t offset = 0; + unsigned int outFlags = 0; + + if (ptr == nullptr) { + return hipErrorInvalidValue; + } + + if (!hip::isValid(stream)) { + return hipErrorContextIsDestroyed; + } + + amd::Memory* memory = getMemoryObject(ptr, offset); + if (!memory) { + return hipErrorInvalidValue; + } + + // NOTE: 'mask' is only used in Wait operation, 'sizeBytes' is only used in Write operation + // 'flags' for now used only for Wait, but in future there will usecases for Write too. + + if (cmdType == ROCCLR_COMMAND_STREAM_WAIT_VALUE) { + // Stream Wait on AQL barrier-value type packet is only supported on SignalMemory objects + if (GPU_STREAMOPS_CP_WAIT && (!(memory->getMemFlags() & ROCCLR_MEM_HSA_SIGNAL_MEMORY))) { + return hipErrorInvalidValue; + } + switch (flags) { + case hipStreamWaitValueGte: + outFlags = ROCCLR_STREAM_WAIT_VALUE_GTE; + break; + case hipStreamWaitValueEq: + outFlags = ROCCLR_STREAM_WAIT_VALUE_EQ; + break; + case hipStreamWaitValueAnd: + outFlags = ROCCLR_STREAM_WAIT_VALUE_AND; + break; + case hipStreamWaitValueNor: + outFlags = ROCCLR_STREAM_WAIT_VALUE_NOR; + break; + default: + return hipErrorInvalidValue; + break; + } + } else if (cmdType != ROCCLR_COMMAND_STREAM_WRITE_VALUE) { + return hipErrorInvalidValue; + } + + hip::Stream* hip_stream = hip::getStream(stream); + amd::Command::EventWaitList waitList; + + amd::StreamOperationCommand* command = + new amd::StreamOperationCommand(*hip_stream, cmdType, waitList, *memory->asBuffer(), + value, mask, outFlags, offset, sizeBytes); + + if (command == nullptr) { + return hipErrorOutOfMemory; + } + command->enqueue(); + command->release(); + return hipSuccess; +} + +hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags, + uint32_t mask) { + HIP_INIT_API(hipStreamWaitValue32, stream, ptr, value, mask, flags); + // NOTE: ptr corresponds to a HSA Signal memeory which is 64 bits. + // 32 bit value and mask are converted to 64-bit values. + HIP_RETURN_DURATION(ihipStreamOperation( + stream, + ROCCLR_COMMAND_STREAM_WAIT_VALUE, + ptr, + value, + mask, + flags, + sizeof(uint32_t))); +} + +hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags, + uint64_t mask) { + HIP_INIT_API(hipStreamWaitValue64, stream, ptr, value, mask, flags); + HIP_RETURN_DURATION(ihipStreamOperation( + stream, + ROCCLR_COMMAND_STREAM_WAIT_VALUE, + ptr, + value, + mask, + flags, + sizeof(uint64_t))); +} + +hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags) { + HIP_INIT_API(hipStreamWriteValue32, stream, ptr, value, flags); + HIP_RETURN_DURATION(ihipStreamOperation( + stream, + ROCCLR_COMMAND_STREAM_WRITE_VALUE, + ptr, + value, + 0, // mask un-used set it to 0 + 0, // flags un-used for now set it to 0 + sizeof(uint32_t))); +} + +hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags) { + HIP_INIT_API(hipStreamWriteValue64, stream, ptr, value, flags); + HIP_RETURN_DURATION(ihipStreamOperation( + stream, + ROCCLR_COMMAND_STREAM_WRITE_VALUE, + ptr, + value, + 0, // mask un-used set it to 0 + 0, // flags un-used for now set it to 0 + sizeof(uint64_t))); +} diff --git a/projects/clr/hipamd/src/hip_surface.cpp b/projects/clr/hipamd/src/hip_surface.cpp new file mode 100644 index 0000000000..3ec495b668 --- /dev/null +++ b/projects/clr/hipamd/src/hip_surface.cpp @@ -0,0 +1,96 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include + +#include "hip_internal.hpp" +#include + +hipError_t ihipFree(void* ptr); + +struct __hip_surface { + uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; + amd::Image* image; + hipResourceDesc resDesc; + + __hip_surface(amd::Image* image_, const hipResourceDesc& resDesc_) + : image(image_), resDesc(resDesc_) { + amd::Context& context = *hip::getCurrentDevice()->asContext(); + amd::Device& device = *context.devices()[0]; + + device::Memory* imageMem = image->getDeviceMemory(device); + std::memcpy(imageSRD, imageMem->cpuSrd(), sizeof(imageSRD)); + } +}; + +hipError_t ihipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + + // Validate input params + if (pSurfObject == nullptr || pResDesc == nullptr) { + return hipErrorInvalidValue; + } + + // the type of resource must be a HIP array + // hipResourceDesc::res::array::array must be set to a valid HIP array handle. + if ((pResDesc->resType != hipResourceTypeArray) || (pResDesc->res.array.array == nullptr)) { + return hipErrorInvalidValue; + } + + amd::Image* image = nullptr; + cl_mem memObj = reinterpret_cast(pResDesc->res.array.array->data); + if (!is_valid(memObj)) { + return hipErrorInvalidValue; + } + image = as_amd(memObj)->asImage(); + + void* surfObjectBuffer = nullptr; + hipError_t err = ihipMalloc(&surfObjectBuffer, sizeof(__hip_surface), + CL_MEM_SVM_FINE_GRAIN_BUFFER); + if (surfObjectBuffer == nullptr || err != hipSuccess) { + return hipErrorOutOfMemory; + } + *pSurfObject = new (surfObjectBuffer) __hip_surface{image, *pResDesc}; + + return hipSuccess; +} + +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, + const hipResourceDesc* pResDesc) { + HIP_INIT_API(hipCreateSurfaceObject, pSurfObject, pResDesc); + + HIP_RETURN(ihipCreateSurfaceObject(pSurfObject, pResDesc)); +} + +hipError_t ihipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + if (surfaceObject == nullptr) { + return hipSuccess; + } + + return ihipFree(surfaceObject); +} + +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { + HIP_INIT_API(hipDestroySurfaceObject, surfaceObject); + + HIP_RETURN(ihipDestroySurfaceObject(surfaceObject)); +} diff --git a/projects/clr/hipamd/src/hip_texture.cpp b/projects/clr/hipamd/src/hip_texture.cpp new file mode 100644 index 0000000000..8c44373901 --- /dev/null +++ b/projects/clr/hipamd/src/hip_texture.cpp @@ -0,0 +1,1540 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include "hip_internal.hpp" +#include "hip_platform.hpp" +#include "hip_conversions.hpp" +#include "platform/sampler.hpp" + +hipError_t ihipFree(void* ptr); + +struct __hip_texture { + uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; + uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; + amd::Image* image; + amd::Sampler* sampler; + hipResourceDesc resDesc; + hipTextureDesc texDesc; + hipResourceViewDesc resViewDesc; + + __hip_texture(amd::Image* image_, + amd::Sampler* sampler_, + const hipResourceDesc& resDesc_, + const hipTextureDesc& texDesc_, + const hipResourceViewDesc& resViewDesc_) : + image(image_), + sampler(sampler_), + resDesc(resDesc_), + texDesc(texDesc_), + resViewDesc(resViewDesc_) { + amd::Context& context = *hip::getCurrentDevice()->asContext(); + amd::Device& device = *context.devices()[0]; + + device::Memory* imageMem = image->getDeviceMemory(device); + std::memcpy(imageSRD, imageMem->cpuSrd(), sizeof(imageSRD)); + + device::Sampler* samplerMem = sampler->getDeviceSampler(device); + std::memcpy(samplerSRD, samplerMem->hwState(), sizeof(samplerSRD)); + } +}; + +amd::Image* ihipImageCreate(const cl_channel_order channelOrder, + const cl_channel_type channelType, + const cl_mem_object_type imageType, + const size_t imageWidth, + const size_t imageHeight, + const size_t imageDepth, + const size_t imageArraySize, + const size_t imageRowPitch, + const size_t imageSlicePitch, + const uint32_t numMipLevels, + amd::Memory* buffer, + hipError_t& status); + +hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + return hipErrorNotSupported; + } + + // Validate input params + if (pTexObject == nullptr || pResDesc == nullptr || pTexDesc == nullptr) { + return hipErrorInvalidValue; + } + + // pResViewDesc can only be specified if the type of resource is a HIP array or a HIP mipmapped array. + if ((pResViewDesc != nullptr) && + ((pResDesc->resType != hipResourceTypeArray) && (pResDesc->resType != hipResourceTypeMipmappedArray))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeArray, + if (pResDesc->resType == hipResourceTypeArray) { + // hipResourceDesc::res::array::array must be set to a valid HIP array handle. + if (pResDesc->res.array.array == nullptr) { + return hipErrorInvalidValue; + } else if (pResDesc->res.array.array->depth > 0 && + pTexDesc->filterMode == hipFilterModeLinear && + !strncmp(info.name_, "gfx90a", strlen("gfx90a"))) { + LogPrintfInfo("%s doesn't support 3D linear filter!", info.name_); + return hipErrorNotSupported; + } + } + + // If hipResourceDesc::resType is set to hipResourceTypeMipmappedArray, + // hipResourceDesc::res::mipmap::mipmap must be set to a valid HIP mipmapped array handle + // and hipTextureDesc::normalizedCoords must be set to true. + if ((pResDesc->resType == hipResourceTypeMipmappedArray) && + ((pResDesc->res.mipmap.mipmap == nullptr) || (pTexDesc->normalizedCoords == 0))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypeLinear, + // hipResourceDesc::res::linear::devPtr must be set to a valid device pointer, that is aligned to hipDeviceProp::textureAlignment. + // The total number of elements in the linear address range cannot exceed hipDeviceProp::maxTexture1DLinear. + if ((pResDesc->resType == hipResourceTypeLinear) && + ((pResDesc->res.linear.devPtr == nullptr) || + (!amd::isMultipleOf(pResDesc->res.linear.devPtr, info.imageBaseAddressAlignment_)) || + ((pResDesc->res.linear.sizeInBytes / hip::getElementSize(pResDesc->res.linear.desc)) >= info.imageMaxBufferSize_))) { + return hipErrorInvalidValue; + } + + // If hipResourceDesc::resType is set to hipResourceTypePitch2D, + // hipResourceDesc::res::pitch2D::devPtr must be set to a valid device pointer, that is aligned to hipDeviceProp::textureAlignment. + // hipResourceDesc::res::pitch2D::width and hipResourceDesc::res::pitch2D::height specify the width and height of the array in elements, + // and cannot exceed hipDeviceProp::maxTexture2DLinear[0] and hipDeviceProp::maxTexture2DLinear[1] respectively. + // hipResourceDesc::res::pitch2D::pitchInBytes specifies the pitch between two rows in bytes and has to be aligned to hipDeviceProp::texturePitchAlignment. + // Pitch cannot exceed hipDeviceProp::maxTexture2DLinear[2]. + if ((pResDesc->resType == hipResourceTypePitch2D) && + ((pResDesc->res.pitch2D.devPtr == nullptr) || + (!amd::isMultipleOf(pResDesc->res.pitch2D.devPtr, info.imageBaseAddressAlignment_)) || + (pResDesc->res.pitch2D.width >= info.image2DMaxWidth_) || + (pResDesc->res.pitch2D.height >= info.image2DMaxHeight_) || + (!amd::isMultipleOf(pResDesc->res.pitch2D.pitchInBytes, info.imagePitchAlignment_)))) { + // TODO check pitch limits. + return hipErrorInvalidValue; + } + + // Mipmaps are currently not supported. + if (pResDesc->resType == hipResourceTypeMipmappedArray) { + return hipErrorNotSupported; + } + // We don't program the max_ansio_ratio field in the the HW sampler SRD. + if (pTexDesc->maxAnisotropy != 0) { + return hipErrorNotSupported; + } + // We don't program the lod_bias field in the HW sampler SRD. + if (pTexDesc->mipmapLevelBias != 0) { + return hipErrorNotSupported; + } + // We don't program the min_lod field in the HW sampler SRD. + if (pTexDesc->minMipmapLevelClamp != 0) { + return hipErrorNotSupported; + } + // We don't program the max_lod field in the HW sampler SRD. + if (pTexDesc->maxMipmapLevelClamp != 0) { + return hipErrorNotSupported; + } + + // TODO ROCclr assumes all dimensions have the same addressing mode. + cl_addressing_mode addressMode = CL_ADDRESS_NONE; + // If hipTextureDesc::normalizedCoords is set to zero, + // hipAddressModeWrap and hipAddressModeMirror won't be supported + // and will be switched to hipAddressModeClamp. + if ((pTexDesc->normalizedCoords == 0) && + ((pTexDesc->addressMode[0] == hipAddressModeWrap) || (pTexDesc->addressMode[0] == hipAddressModeMirror))) { + addressMode = hip::getCLAddressingMode(hipAddressModeClamp); + } + // hipTextureDesc::addressMode is ignored if hipResourceDesc::resType is hipResourceTypeLinear + else if (pResDesc->resType != hipResourceTypeLinear) { + addressMode = hip::getCLAddressingMode(pTexDesc->addressMode[0]); + } + +#ifndef CL_FILTER_NONE +#define CL_FILTER_NONE 0x1142 +#endif + cl_filter_mode filterMode = CL_FILTER_NONE; + cl_filter_mode mipFilterMode = CL_FILTER_NONE; +#undef CL_FILTER_NONE + // hipTextureDesc::filterMode is ignored if hipResourceDesc::resType is hipResourceTypeLinear. + if (pResDesc->resType != hipResourceTypeLinear) { + filterMode = hip::getCLFilterMode(pTexDesc->filterMode); + } + + if (pResDesc->resType == hipResourceTypeMipmappedArray) { + mipFilterMode = hip::getCLFilterMode(pTexDesc->mipmapFilterMode); + } + + amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentDevice()->asContext(), + pTexDesc->normalizedCoords, + addressMode, + filterMode, + mipFilterMode, + pTexDesc->minMipmapLevelClamp, + pTexDesc->maxMipmapLevelClamp); + + if (sampler == nullptr) { + return hipErrorOutOfMemory; + } + + if (!sampler->create()) { + delete sampler; + return hipErrorOutOfMemory; + } + + amd::Image* image = nullptr; + switch (pResDesc->resType) { + case hipResourceTypeArray: { + cl_mem memObj = reinterpret_cast(pResDesc->res.array.array->data); + if (!is_valid(memObj)) { + return hipErrorInvalidValue; + } + image = as_amd(memObj)->asImage(); + + hipTextureReadMode readMode = pTexDesc->readMode; + // 32-bit integer format will not be promoted, regardless of whether or not + // this hipTextureDesc::readMode is set hipReadModeNormalizedFloat is specified. + if ((pResDesc->res.array.array->Format == HIP_AD_FORMAT_SIGNED_INT32) || + (pResDesc->res.array.array->Format == HIP_AD_FORMAT_UNSIGNED_INT32)) { + readMode = hipReadModeElementType; + } + + // We need to create an image view if the user requested to use normalized pixel values, + // due to already having the image created with a different format. + if ((pResViewDesc != nullptr) || + (readMode == hipReadModeNormalizedFloat) || + (pTexDesc->sRGB == 1)) { + // TODO ROCclr currently right now can only change the format of the image. + const cl_channel_order channelOrder = (pResViewDesc != nullptr) ? hip::getCLChannelOrder(hip::getNumChannels(pResViewDesc->format), pTexDesc->sRGB) : + hip::getCLChannelOrder(pResDesc->res.array.array->NumChannels, pTexDesc->sRGB); + const cl_channel_type channelType = (pResViewDesc != nullptr) ? hip::getCLChannelType(hip::getArrayFormat(pResViewDesc->format), readMode) : + hip::getCLChannelType(pResDesc->res.array.array->Format, readMode); + const amd::Image::Format imageFormat(cl_image_format{channelOrder, channelType}); + if (!imageFormat.isValid()) { + return hipErrorInvalidValue; + } + + image = image->createView(*hip::getCurrentDevice()->asContext(), imageFormat, nullptr); + if (image == nullptr) { + return hipErrorInvalidValue; + } + } + break; + } + case hipResourceTypeMipmappedArray: + return hipErrorInvalidValue; + + case hipResourceTypeLinear: { + const cl_channel_order channelOrder = hip::getCLChannelOrder(hip::getNumChannels(pResDesc->res.linear.desc), pTexDesc->sRGB); + const cl_channel_type channelType = hip::getCLChannelType(hip::getArrayFormat(pResDesc->res.linear.desc), pTexDesc->readMode); + const amd::Image::Format imageFormat({channelOrder, channelType}); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pResDesc->resType); + const size_t imageSizeInBytes = pResDesc->res.linear.sizeInBytes; + amd::Memory* buffer = getMemoryObjectWithOffset(pResDesc->res.linear.devPtr, imageSizeInBytes); + hipError_t status = hipSuccess; + image = ihipImageCreate(channelOrder, + channelType, + imageType, + imageSizeInBytes / imageFormat.getElementSize(), /* imageWidth */ + 0, /* imageHeight */ + 0, /* imageDepth */ + 0, /* imageArraySize */ + 0, /* imageRowPitch */ + 0, /* imageSlicePitch */ + 0, /* numMipLevels */ + buffer, + status); + buffer->release(); + if (image == nullptr) { + return status; + } + break; + } + case hipResourceTypePitch2D: { + const cl_channel_order channelOrder = hip::getCLChannelOrder(hip::getNumChannels(pResDesc->res.pitch2D.desc), pTexDesc->sRGB); + const cl_channel_type channelType = hip::getCLChannelType(hip::getArrayFormat(pResDesc->res.pitch2D.desc), pTexDesc->readMode); + const amd::Image::Format imageFormat({channelOrder, channelType}); + const cl_mem_object_type imageType = hip::getCLMemObjectType(pResDesc->resType); + const size_t imageSizeInBytes = pResDesc->res.pitch2D.width * imageFormat.getElementSize() + + pResDesc->res.pitch2D.pitchInBytes * (pResDesc->res.pitch2D.height - 1); + amd::Memory* buffer = getMemoryObjectWithOffset(pResDesc->res.pitch2D.devPtr, imageSizeInBytes); + hipError_t status = hipSuccess; + image = ihipImageCreate(channelOrder, + channelType, + imageType, + pResDesc->res.pitch2D.width, /* imageWidth */ + pResDesc->res.pitch2D.height, /* imageHeight */ + 0, /* imageDepth */ + 0, /* imageArraySize */ + pResDesc->res.pitch2D.pitchInBytes, /* imageRowPitch */ + 0, /* imageSlicePitch */ + 0, /* numMipLevels */ + buffer, + status); + if (buffer != nullptr) { + buffer->release(); + } + if (image == nullptr) { + return status; + } + break; + } + } + + void *texObjectBuffer = nullptr; + hipError_t err = ihipMalloc(&texObjectBuffer, sizeof(__hip_texture), CL_MEM_SVM_FINE_GRAIN_BUFFER); + if (texObjectBuffer == nullptr || err != hipSuccess) { + return hipErrorOutOfMemory; + } + *pTexObject = new (texObjectBuffer) __hip_texture{image, sampler, *pResDesc, *pTexDesc, (pResViewDesc != nullptr) ? *pResViewDesc : hipResourceViewDesc{}}; + + return hipSuccess; +} + +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) { + HIP_INIT_API(hipCreateTextureObject, pTexObject, pResDesc, pTexDesc, pResViewDesc); + + HIP_RETURN(ihipCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)); +} + +hipError_t ihipDestroyTextureObject(hipTextureObject_t texObject) { + if (texObject == nullptr) { + return hipSuccess; + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + return hipErrorNotSupported; + } + + const hipResourceType type = texObject->resDesc.resType; + const bool isImageFromBuffer = (type == hipResourceTypeLinear) || (type == hipResourceTypePitch2D); + const bool isImageView = ((type == hipResourceTypeArray) || (type == hipResourceTypeMipmappedArray)) && + texObject->image->parent() != nullptr; + // If the texture object was created from an array, then the array owns the image SRD. + // Otherwise, if the texture object is a view, or was created from a buffer, then it owns the image SRD. + if (isImageFromBuffer || isImageView) { + texObject->image->release(); + } + + // The texture object always owns the sampler SRD. + texObject->sampler->release(); + + // TODO Should call ihipFree() to not polute the api trace. + return ihipFree(texObject); +} + +hipError_t ihipUnbindTexture(textureReference* texRef) { + + hipError_t hip_error = hipSuccess; + + do { + if (texRef == nullptr) { + hip_error = hipErrorInvalidValue; + break; + } + + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + hip_error = ihipDestroyTextureObject(texRef->textureObject); + if (hip_error != hipSuccess) { + break; + } + + const_cast(texRef)->textureObject = nullptr; + + } while (0); + + return hip_error; +} + +hipError_t hipDestroyTextureObject(hipTextureObject_t texObject) { + HIP_INIT_API(hipDestroyTextureObject, texObject); + + HIP_RETURN(ihipDestroyTextureObject(texObject)); +} + +hipError_t ihipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t texObject) { + if ((pResDesc == nullptr) || (texObject == nullptr)) { + return hipErrorInvalidValue; + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + return hipErrorNotSupported; + } + + *pResDesc = texObject->resDesc; + + return hipSuccess; +} + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectResourceDesc, pResDesc, texObject); + + HIP_RETURN(ihipGetTextureObjectResourceDesc(pResDesc, texObject)); +} + +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectResourceViewDesc, pResViewDesc, texObject); + + if ((pResViewDesc == nullptr) || (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pResViewDesc = texObject->resViewDesc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipGetTextureObjectTextureDesc, pTexDesc, texObject); + + if ((pTexDesc == nullptr) || (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pTexDesc = texObject->texDesc; + + HIP_RETURN(hipSuccess); +} + +inline hipError_t ihipGetTextureAlignmentOffset(size_t* offset, + const void* devPtr) { + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + return hipErrorNotSupported; + } + + const char* alignedDevPtr = amd::alignUp(static_cast(devPtr), info.imageBaseAddressAlignment_); + const size_t alignedOffset = alignedDevPtr - static_cast(devPtr); + + // If the device memory pointer was returned from hipMalloc(), + // the offset is guaranteed to be 0 and NULL may be passed as the offset parameter. + if ((alignedOffset != 0) && (offset == nullptr)) { + LogPrintfError("Texture object not aligned with offset %u \n", alignedOffset); + return hipErrorInvalidValue; + } + + if (offset != nullptr) { + *offset = alignedOffset; + } + + return hipSuccess; +} + +hipError_t ihipBindTexture(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size) { + if ((texref == nullptr) || + (devPtr == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texref->textureObject); + if (err != hipSuccess) { + return err; + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = const_cast(devPtr); + resDesc.res.linear.desc = *desc; + resDesc.res.linear.sizeInBytes = size; + err = ihipGetTextureAlignmentOffset(offset, devPtr); + if (err != hipSuccess) { + return err; + } + + // Align the user ptr to HW requirments. + resDesc.res.linear.devPtr = static_cast(const_cast(devPtr)) - *offset; + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, nullptr); +} + +hipError_t ihipBindTexture2D(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch) { + if ((texref == nullptr) || + (devPtr == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texref->textureObject); + if (err != hipSuccess) { + return err; + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.pitch2D.devPtr = const_cast(devPtr); + resDesc.res.pitch2D.desc = *desc; + resDesc.res.pitch2D.width = width; + resDesc.res.pitch2D.height = height; + resDesc.res.pitch2D.pitchInBytes = pitch; + err = ihipGetTextureAlignmentOffset(offset, devPtr); + if (err != hipSuccess) { + return err; + } + + // Align the user ptr to HW requirments. + resDesc.res.pitch2D.devPtr = static_cast(const_cast(devPtr)) - *offset; + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, nullptr); +} + +hipError_t hipBindTexture2D(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch) { + HIP_INIT_API(hipBindTexture2D, offset, texref, devPtr, desc, width, height, pitch); + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); + + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipError_t ihipBindTextureToArray(const textureReference* texref, + hipArray_const_t array, + const hipChannelFormatDesc* desc) { + if ((texref == nullptr) || + (array == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texref->textureObject); + if (err != hipSuccess) { + return err; + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = const_cast(array); + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + hipResourceViewFormat format = hip::getResourceViewFormat(*desc); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, &resViewDesc); +} + +hipError_t hipBindTextureToArray(const textureReference* texref, + hipArray_const_t array, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(hipBindTextureToArray, texref, array, desc); + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); + + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTextureToArray(texref, array, desc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + if ((texref == nullptr) || + (mipmappedArray == nullptr) || + (desc == nullptr)) { + return hipErrorInvalidValue; + } + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texref->textureObject); + if (err != hipSuccess) { + return err; + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeMipmappedArray; + resDesc.res.mipmap.mipmap = const_cast(mipmappedArray); + + hipTextureDesc texDesc = hip::getTextureDesc(texref); + + hipResourceViewFormat format = hip::getResourceViewFormat(*desc); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); + + return ihipCreateTextureObject(const_cast(&texref->textureObject), &resDesc, &texDesc, &resViewDesc); +} + +hipError_t hipBindTextureToMipmappedArray(const textureReference* texref, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) { + HIP_INIT_API(hipBindTextureToMipmappedArray, texref, mipmappedArray, desc); + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); + + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipError_t hipUnbindTexture(const textureReference* texref) { + HIP_INIT_API(hipUnbindTexture, texref); + + HIP_RETURN(ihipUnbindTexture(const_cast(texref))); +} + +hipError_t hipBindTexture(size_t* offset, + const textureReference* texref, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size) { + HIP_INIT_API(hipBindTexture, offset, texref, devPtr, desc, size); + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + HIP_RETURN_ONFAIL(PlatformState::instance().getStatGlobalVar(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)); + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTexture(offset, texref, devPtr, desc, size); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, + hipArray_const_t array) { + HIP_INIT_API(hipGetChannelDesc, desc, array); + + if ((desc == nullptr) || (array == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + // It is UB to call hipGetChannelDesc() on an array created via hipArrayCreate()/hipArray3DCreate(). + // This is due to hip not differentiating between runtime and driver types. + *desc = array->desc; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureAlignmentOffset(size_t* offset, + const textureReference* texref) { + HIP_INIT_API(hipGetTextureAlignmentOffset, offset, texref); + + if ((offset == nullptr) || (texref == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + // TODO enforce alignment on devPtr. + *offset = 0; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol) { + HIP_INIT_API(hipGetTextureReference, texref, symbol); + + if (texref == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *texref = reinterpret_cast(symbol); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFormat(textureReference* texRef, + hipArray_Format fmt, + int NumPackedComponents) { + HIP_INIT_API(hipTexRefSetFormat, texRef, fmt, NumPackedComponents); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->format = fmt; + texRef->numChannels = NumPackedComponents; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFlags(textureReference* texRef, + unsigned int Flags) { + HIP_INIT_API(hipTexRefSetFlags, texRef, Flags); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->readMode = hipReadModeNormalizedFloat; + texRef->normalized = 0; + texRef->sRGB = 0; + + if (Flags & HIP_TRSF_READ_AS_INTEGER) { + texRef->readMode = hipReadModeElementType; + } + + if (Flags & HIP_TRSF_NORMALIZED_COORDINATES) { + texRef->normalized = 1; + } + + if (Flags & HIP_TRSF_SRGB) { + texRef->sRGB = 1; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetFilterMode(textureReference* texRef, + hipTextureFilterMode fm) { + HIP_INIT_API(hipTexRefSetFilterMode, texRef, fm); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->filterMode = fm; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* pam, + const textureReference* texRef, + int dim) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetAddressMode, pam, texRef, dim); + + if ((pam == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + // Currently, the only valid value for dim are 0 and 1. + if ((dim != 0) && (dim != 1)) { + LogPrintfError( + "Currently only 2 dimensions (0,1) are valid," + "dim : %d \n", + dim); + HIP_RETURN(hipErrorInvalidValue); + } + + *pam = texRef->addressMode[dim]; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddressMode(textureReference* texRef, + int dim, + hipTextureAddressMode am) { + HIP_INIT_API(hipTexRefSetAddressMode, texRef, dim, am); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if ((dim < 0) || (dim > 2)) { + LogPrintfError( + "Currently only 3 dimensions (0,1,2) are valid," + "dim : %d \n", + dim); + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->addressMode[dim] = am; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetArray(hipArray_t* pArray, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetArray, pArray, texRef); + + if ((pArray == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = ihipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + HIP_RETURN(error); + } + + switch (resDesc.resType) { + case hipResourceTypeLinear: + case hipResourceTypePitch2D: + case hipResourceTypeMipmappedArray: { + HIP_RETURN(hipErrorInvalidValue); + } + case hipResourceTypeArray: + *pArray = resDesc.res.array.array; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetArray(textureReference* texRef, + hipArray_const_t array, + unsigned int flags) { + HIP_INIT_API(hipTexRefSetArray, texRef, array, flags); + + if ((texRef == nullptr) || (array == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (flags != HIP_TRSA_OVERRIDE_FORMAT) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + + HIP_RETURN_ONFAIL(PlatformState::instance().getDynTexGlobalVar(texRef, &refDevPtr, &refDevSize)); + assert(refDevSize == sizeof(textureReference)); + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texRef->textureObject); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = const_cast(array); + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); + + err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetAddress, dptr, texRef); + + if ((dptr == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = ihipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + LogPrintfError("hipGetTextureObjectResourceDesc failed with error code: %s \n", + ihipGetErrorName(error)); + HIP_RETURN(error); + } + + switch (resDesc.resType) { + // Need to verify. + // If the texture reference is not bound to any device memory range, + // return hipErroInvalidValue. + case hipResourceTypeArray: + case hipResourceTypeMipmappedArray: { + HIP_RETURN(hipErrorInvalidValue); + } + case hipResourceTypeLinear: + *dptr = resDesc.res.linear.devPtr; + break; + case hipResourceTypePitch2D: + *dptr = resDesc.res.pitch2D.devPtr; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetAddress(size_t* ByteOffset, + textureReference* texRef, + hipDeviceptr_t dptr, + size_t bytes) { + HIP_INIT_API(hipTexRefSetAddress, ByteOffset, texRef, dptr, bytes); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + HIP_RETURN_ONFAIL(PlatformState::instance().getDynTexGlobalVar(texRef, &refDevPtr, &refDevSize)); + assert(refDevSize == sizeof(textureReference)); + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texRef->textureObject); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeLinear; + resDesc.res.linear.devPtr = dptr; + resDesc.res.linear.desc = hip::getChannelFormatDesc(texRef->numChannels, texRef->format); + resDesc.res.linear.sizeInBytes = bytes; + err = ihipGetTextureAlignmentOffset(ByteOffset, dptr); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + // Align the user ptr to HW requirments. + resDesc.res.linear.devPtr = static_cast(dptr) - *ByteOffset; + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipError_t hipTexRefSetAddress2D(textureReference* texRef, + const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, + size_t Pitch) { + HIP_INIT_API(hipTexRefSetAddress2D, texRef, desc, dptr, Pitch); + + if ((texRef == nullptr) || (desc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + HIP_RETURN_ONFAIL(PlatformState::instance().getDynTexGlobalVar(texRef, &refDevPtr, &refDevSize)); + assert(refDevSize == sizeof(textureReference)); + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texRef->textureObject); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypePitch2D; + resDesc.res.linear.devPtr = dptr; + resDesc.res.linear.desc = hip::getChannelFormatDesc(desc->NumChannels, desc->Format); // Need to verify. + resDesc.res.pitch2D.width = desc->Width; + resDesc.res.pitch2D.height = desc->Height; + resDesc.res.pitch2D.pitchInBytes = Pitch; + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { + return {x, y, z, w, f}; +} + +hipError_t hipTexRefGetBorderColor(float* pBorderColor, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetBorderColor, pBorderColor, texRef); + + if ((pBorderColor == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + // TODO add textureReference::borderColor. + assert(false && "textureReference::borderColor is missing in header"); + // std::memcpy(pBorderColor, texRef.borderColor, sizeof(texRef.borderColor)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFilterMode(hipTextureFilterMode* pfm, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFilterMode, pfm, texRef); + + if ((pfm == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pfm = texRef->filterMode; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFlags(unsigned int* pFlags, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFlags, pFlags, texRef); + + if ((pFlags == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pFlags = 0; + + if (texRef->readMode == hipReadModeElementType) { + *pFlags |= HIP_TRSF_READ_AS_INTEGER; + } + + if (texRef->normalized == 1) { + *pFlags |= HIP_TRSF_NORMALIZED_COORDINATES; + } + + if (texRef->sRGB == 1) { + *pFlags |= HIP_TRSF_SRGB; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, + int* pNumChannels, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetFormat, pFormat, pNumChannels, texRef); + + if ((pFormat == nullptr) || (pNumChannels == nullptr) || + (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pFormat = texRef->format; + *pNumChannels = texRef->numChannels; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMaxAnisotropy, pmaxAnsio, texRef); + + if ((pmaxAnsio == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pmaxAnsio = texRef->maxAnisotropy; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapFilterMode(hipTextureFilterMode* pfm, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapFilterMode, pfm, texRef); + + if ((pfm == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pfm = texRef->mipmapFilterMode; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapLevelBias(float* pbias, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapLevelBias, pbias, texRef); + + if ((pbias == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pbias = texRef->mipmapLevelBias; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, + float* pmaxMipmapLevelClamp, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmapLevelClamp, pminMipmapLevelClamp, pmaxMipmapLevelClamp, texRef); + + if ((pminMipmapLevelClamp == nullptr) || (pmaxMipmapLevelClamp == nullptr) || + (texRef == nullptr)){ + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pminMipmapLevelClamp = texRef->minMipmapLevelClamp; + *pmaxMipmapLevelClamp = texRef->maxMipmapLevelClamp; + + HIP_RETURN(hipErrorInvalidValue); +} + +hipError_t hipTexRefGetMipmappedArray(hipMipmappedArray_t* pArray, + const textureReference* texRef) { + // TODO overload operator<<(ostream&, textureReference&). + HIP_INIT_API(hipTexRefGetMipmappedArray, pArray, &texRef); + + if ((pArray == nullptr) || (texRef == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + hipResourceDesc resDesc = {}; + // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. + hipError_t error = ihipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); + if (error != hipSuccess) { + HIP_RETURN(error); + } + + switch (resDesc.resType) { + case hipResourceTypeLinear: + case hipResourceTypePitch2D: + case hipResourceTypeArray: { + HIP_RETURN(hipErrorInvalidValue); + } + case hipResourceTypeMipmappedArray: + *pArray = resDesc.res.mipmap.mipmap; + break; + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetBorderColor(textureReference* texRef, + float* pBorderColor) { + HIP_INIT_API(hipTexRefSetBorderColor, texRef, pBorderColor); + + if ((texRef == nullptr) || (pBorderColor == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + // TODO add textureReference::borderColor. + assert(false && "textureReference::borderColor is missing in header"); + // std::memcpy(texRef.borderColor, pBorderColor, sizeof(texRef.borderColor)); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMaxAnisotropy(textureReference* texRef, + unsigned int maxAniso) { + HIP_INIT_API(hipTexRefSetMaxAnisotropy, texRef, maxAniso); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->maxAnisotropy = maxAniso; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapFilterMode(textureReference* texRef, + hipTextureFilterMode fm) { + HIP_INIT_API(hipTexRefSetMipmapFilterMode, texRef, fm); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->mipmapFilterMode = fm; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapLevelBias(textureReference* texRef, + float bias) { + HIP_INIT_API(hipTexRefSetMipmapLevelBias, texRef, bias); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->mipmapLevelBias = bias; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmapLevelClamp(textureReference* texRef, + float minMipMapLevelClamp, + float maxMipMapLevelClamp) { + HIP_INIT_API(hipTexRefSetMipmapLevelClamp, minMipMapLevelClamp, maxMipMapLevelClamp); + + if (texRef == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + texRef->minMipmapLevelClamp = minMipMapLevelClamp; + texRef->maxMipmapLevelClamp = maxMipMapLevelClamp; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, + hipMipmappedArray* mipmappedArray, + unsigned int Flags) { + HIP_INIT_API(hipTexRefSetMipmappedArray, texRef, mipmappedArray, Flags); + + if ((texRef == nullptr) || (mipmappedArray == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (Flags != HIP_TRSA_OVERRIDE_FORMAT) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + HIP_RETURN_ONFAIL(PlatformState::instance().getDynTexGlobalVar(texRef, &refDevPtr, &refDevSize)); + assert(refDevSize == sizeof(textureReference)); + + // Any previous address or HIP array state associated with the texture reference is superseded by this function. + // Any memory previously bound to hTexRef is unbound. + // No need to check for errors. + hipError_t err = ihipDestroyTextureObject(texRef->textureObject); + if (err != hipSuccess) { + HIP_RETURN(err); + } + + hipResourceDesc resDesc = {}; + resDesc.resType = hipResourceTypeMipmappedArray; + resDesc.res.mipmap.mipmap = mipmappedArray; + + hipTextureDesc texDesc = hip::getTextureDesc(texRef); + + hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); + + err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); +} + +hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { + HIP_INIT_API(hipTexObjectCreate, pTexObject, pResDesc, pTexDesc, pResViewDesc); + + if ((pTexObject == nullptr) || (pResDesc == nullptr) || (pTexDesc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hipResourceDesc resDesc = hip::getResourceDesc(*pResDesc); + hipTextureDesc texDesc = hip::getTextureDesc(*pTexDesc); + + if (pResViewDesc != nullptr) { + hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(*pResViewDesc); + HIP_RETURN(ihipCreateTextureObject(pTexObject, &resDesc, &texDesc, &resViewDesc)); + } else { + HIP_RETURN(ihipCreateTextureObject(pTexObject, &resDesc, &texDesc, nullptr)); + } +} + +hipError_t hipTexObjectDestroy(hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectDestroy, texObject); + + HIP_RETURN(ihipDestroyTextureObject(texObject)); +} + +hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetResourceDesc, pResDesc, texObject); + + if ((pResDesc == nullptr) || (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pResDesc = hip::getResourceDesc(texObject->resDesc); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetResourceViewDesc, pResViewDesc, texObject); + + if ((pResViewDesc == nullptr) || (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pResViewDesc = hip::getResourceViewDesc(texObject->resViewDesc); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, + hipTextureObject_t texObject) { + HIP_INIT_API(hipTexObjectGetTextureDesc, pTexDesc, texObject); + + if ((pTexDesc == nullptr) || (texObject == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + const device::Info& info = device->info(); + if (!info.imageSupport_) { + LogPrintfError("Texture not supported on the device %s", info.name_); + HIP_RETURN(hipErrorNotSupported); + } + + *pTexDesc = hip::getTextureDesc(texObject->texDesc); + + HIP_RETURN(hipSuccess); +} diff --git a/projects/clr/hipamd/src/hip_vm.cpp b/projects/clr/hipamd/src/hip_vm.cpp new file mode 100644 index 0000000000..17287d8b41 --- /dev/null +++ b/projects/clr/hipamd/src/hip_vm.cpp @@ -0,0 +1,285 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include "hip_internal.hpp" +#include "hip_vm.hpp" + +hipError_t hipMemAddressFree(void* devPtr, size_t size) { + HIP_INIT_API(hipMemAddressFree, devPtr, size); + + if (devPtr == nullptr || size == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + for (auto& dev: g_devices) { + dev->devices()[0]->virtualFree(devPtr); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* addr, unsigned long long flags) { + HIP_INIT_API(hipMemAddressReserve, ptr, size, alignment, addr, flags); + + if (ptr == nullptr || + flags !=0) { + HIP_RETURN(hipErrorInvalidValue); + } + + *ptr = nullptr; + + void* startAddress = addr; + + for (auto& dev : g_devices) { + *ptr = dev->devices()[0]->virtualAlloc(startAddress, size, alignment); + + // if addr==0 we generate the va and use it for other devices + if (startAddress == nullptr) { + startAddress = *ptr; + } else if (*ptr != startAddress) { + // if we cannot reserve the same VA on other devices, just fail + for (auto& d : g_devices) { + if (d == dev) HIP_RETURN(hipErrorOutOfMemory); + d->devices()[0]->virtualFree(startAddress); + } + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, size_t size, const hipMemAllocationProp* prop, unsigned long long flags) { + HIP_INIT_API(hipMemCreate, handle, size, prop, flags); + + if (handle == nullptr || + size == 0 || + flags != 0 || + prop == nullptr || + prop->type != hipMemAllocationTypePinned || + prop->location.type != hipMemLocationTypeDevice || + prop->location.id >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + + // Currently only support non-IPC allocations + if (prop->requestedHandleType != hipMemHandleTypeNone) { + HIP_RETURN(hipErrorNotSupported); + } + + const auto& dev_info = g_devices[prop->location.id]->devices()[0]->info(); + + if (dev_info.maxPhysicalMemAllocSize_ < size) { + HIP_RETURN(hipErrorOutOfMemory); + } + if (size % dev_info.memBaseAddrAlign_ != 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + amd::Context* amdContext = g_devices[prop->location.id]->asContext(); + + void* ptr = amd::SvmBuffer::malloc(*amdContext, 0, size, dev_info.memBaseAddrAlign_, + nullptr); + + if (ptr == nullptr) { + size_t free = 0, total =0; + hipError_t err = hipMemGetInfo(&free, &total); + if (err == hipSuccess) { + LogPrintfError("Allocation failed : Device memory : required :%zu | free :%zu | total :%zu \n", size, free, total); + } + HIP_RETURN(hipErrorOutOfMemory); + } + size_t offset = 0; //this is ignored + amd::Memory* memObj = getMemoryObject(ptr, offset); + //saves the current device id so that it can be accessed later + memObj->getUserData().deviceId = prop->location.id; + memObj->getUserData().data = new hip::GenericAllocation(ptr, size, *prop); + + *handle = reinterpret_cast(memObj->getUserData().data); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemExportToShareableHandle(void* shareableHandle, hipMemGenericAllocationHandle_t handle, hipMemAllocationHandleType handleType, unsigned long long flags) { + HIP_INIT_API(hipMemExportToShareableHandle, shareableHandle, handle, handleType, flags); + + if (flags != 0 || + handle == nullptr || + shareableHandle == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemGetAccess(unsigned long long* flags, const hipMemLocation* location, void* ptr) { + HIP_INIT_API(hipMemGetAccess, flags, location, ptr); + + if (flags == nullptr || + location == nullptr || + ptr == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAllocationProp* prop, hipMemAllocationGranularity_flags option) { + HIP_INIT_API(hipMemGetAllocationGranularity, granularity, prop, option); + + if (granularity == nullptr || + prop == nullptr || + prop->type != hipMemAllocationTypePinned || + prop->location.type != hipMemLocationTypeDevice || + prop->location.id >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidValue); + } + + const auto& dev_info = g_devices[prop->location.id]->devices()[0]->info(); + + *granularity = dev_info.virtualMemAllocGranularity_; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, hipMemGenericAllocationHandle_t handle) { + HIP_INIT_API(hipMemGetAllocationPropertiesFromHandle, prop, handle); + + if (handle == nullptr || prop == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *prop = reinterpret_cast(handle)->GetProperties(); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, void* osHandle, hipMemAllocationHandleType shHandleType) { + HIP_INIT_API(hipMemImportFromShareableHandle, handle, osHandle, shHandleType); + + if (handle == nullptr || osHandle == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemMap(void* ptr, size_t size, size_t offset, hipMemGenericAllocationHandle_t handle, unsigned long long flags) { + HIP_INIT_API(hipMemMap, ptr, size, offset, handle, flags); + + if (ptr == nullptr || + handle == nullptr || + size == 0 || + offset != 0 || + flags != 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::GenericAllocation* ga = reinterpret_cast(handle); + + auto& queue = *g_devices[ga->GetProperties().location.id]->NullStream(); + + amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size, &ga->asAmdMemory()); + cmd->enqueue(); + cmd->awaitCompletion(); + cmd->release(); + + // update the internal svm address to ptr + ga->asAmdMemory().setSvmPtr(ptr); + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, unsigned int count, hipStream_t stream) { + HIP_INIT_API(hipMemMapArrayAsync, mapInfoList, count, stream); + + if (mapInfoList == nullptr || count == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipErrorNotSupported); +} + +hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle) { + HIP_INIT_API(hipMemRelease, handle); + + if (handle == nullptr) HIP_RETURN(hipErrorInvalidValue); + + hip::GenericAllocation* ga = reinterpret_cast(handle); + + delete ga; + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, void* addr) { + HIP_INIT_API(hipMemRetainAllocationHandle, handle, addr); + + if (handle == nullptr || addr == nullptr) HIP_RETURN(hipErrorInvalidValue); + + amd::Memory* mem = amd::MemObjMap::FindMemObj(addr); + + if (mem == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + *handle = reinterpret_cast(mem->getUserData().data); + + if (*handle == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemSetAccess(void* ptr, size_t size, const hipMemAccessDesc* desc, size_t count) { + HIP_INIT_API(hipMemSetAccess, ptr, size, desc, count); + + if (ptr == nullptr || + size == 0 || + desc == nullptr || + count == 0) { + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipMemUnmap(void* ptr, size_t size) { + HIP_INIT_API(hipMemUnmap, ptr, size); + + if (ptr == nullptr) HIP_RETURN(hipErrorInvalidValue); + + amd::Memory* va = amd::MemObjMap::FindMemObj(ptr); + + auto& queue = *g_devices[va->getUserData().deviceId]->NullStream(); + + amd::Command* cmd = new amd::VirtualMapCommand(queue, amd::Command::EventWaitList{}, ptr, size, nullptr); + cmd->enqueue(); + cmd->awaitCompletion(); + cmd->release(); + + // restore the original va of the generic allocation + hip::GenericAllocation* ga = reinterpret_cast(va->getUserData().data); + va->setSvmPtr(ga->genericAddress()); + + HIP_RETURN(hipSuccess); +} + diff --git a/projects/clr/hipamd/src/hip_vm.hpp b/projects/clr/hipamd/src/hip_vm.hpp new file mode 100644 index 0000000000..a38acf63e8 --- /dev/null +++ b/projects/clr/hipamd/src/hip_vm.hpp @@ -0,0 +1,49 @@ +/* Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIP_SRC_HIP_VM_H +#define HIP_SRC_HIP_VM_H + +#include +#include "hip_internal.hpp" + +hipError_t ihipFree(void* ptr); + +namespace hip { +class GenericAllocation { + void* ptr_; + size_t size_; + hipMemAllocationProp properties_; + +public: + GenericAllocation(void* ptr, size_t size, const hipMemAllocationProp& prop): ptr_(ptr), size_(size), properties_(prop) {} + ~GenericAllocation() { hipError_t err = ihipFree(ptr_); } + + const hipMemAllocationProp& GetProperties() const { return properties_; } + hipMemGenericAllocationHandle_t asMemGenericAllocationHandle() { return reinterpret_cast(this); } + amd::Memory& asAmdMemory() { + size_t discardOffset; + return *getMemoryObject(genericAddress(), discardOffset); + } + void* genericAddress() const { return ptr_; } +}; +}; + +#endif //HIP_SRC_HIP_VM_H diff --git a/projects/clr/hipamd/src/hiprtc/CMakeLists.txt b/projects/clr/hipamd/src/hiprtc/CMakeLists.txt new file mode 100644 index 0000000000..90b7330e97 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/CMakeLists.txt @@ -0,0 +1,239 @@ +# Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# This project builds hiprtc +# If ever this is to be a different lib living in different folder +# Please read this part +# Depends on: rocclr, so find_package(rocclr) will be required +# Building hip header requires hip include folders with hip_version.h + +cmake_minimum_required(VERSION 3.16.1) +option(BUILD_SHARED_LIBS "Build the shared library" ON) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") + +if(BUILD_SHARED_LIBS) + add_library(hiprtc SHARED) + # Windows doesn't have a strip utility, so CMAKE_STRIP won't be set. + if((CMAKE_BUILD_TYPE STREQUAL "Release") AND NOT ("${CMAKE_STRIP}" STREQUAL "")) + add_custom_command(TARGET hiprtc POST_BUILD COMMAND ${CMAKE_STRIP} $) + endif() +else() + add_library(hiprtc STATIC $) +endif() + +set_target_properties(hiprtc PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +if(WIN32) + if(${HIP_LIB_VERSION_MAJOR} VERSION_GREATER 9) + set(HIP_MAJOR_VERSION "${HIP_LIB_VERSION_MAJOR}") + else() + set(HIP_MAJOR_VERSION "0${HIP_LIB_VERSION_MAJOR}") + endif() + set(HIP_MINOR_VERSION "0${HIP_LIB_VERSION_MINOR}") +endif() + +if(BUILD_SHARED_LIBS) + if(WIN32) + set_target_properties(hiprtc PROPERTIES + OUTPUT_NAME "hiprtc${HIP_MAJOR_VERSION}${HIP_MINOR_VERSION}" + ARCHIVE_OUTPUT_NAME "hiprtc") + else() + set_target_properties(hiprtc PROPERTIES + VERSION ${HIP_LIB_VERSION_STRING} + SOVERSION ${HIP_LIB_VERSION_MAJOR}) + endif() +endif() + +if(BUILD_SHARED_LIBS) + target_sources(hiprtc PRIVATE hiprtc.cpp hiprtcComgrHelper.cpp hiprtcInternal.cpp) +endif() + +set_target_properties(hiprtc PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON) + +target_include_directories(hiprtc + PRIVATE + $ + $ + $ + PUBLIC + $) + +if(BUILD_SHARED_LIBS) + if(WIN32) + target_sources(hiprtc PRIVATE hiprtc.def) + else() + target_link_libraries(hiprtc PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hiprtc.map.in") + set_target_properties(hiprtc PROPERTIES LINK_DEPENDS "${CMAKE_CURRENT_LIST_DIR}/hiprtc.map.in") + endif() +endif() + +if(WIN32) + target_link_libraries(hiprtc PRIVATE Dbghelp.lib) +endif() + +target_link_libraries(hiprtc PUBLIC ${CMAKE_DL_LIBS}) + +if(BUILD_SHARED_LIBS) + target_link_libraries(hiprtc PRIVATE rocclr) +else() + target_compile_definitions(hiprtc PRIVATE $) + target_include_directories(hiprtc PRIVATE $) +endif() + +target_compile_definitions(hiprtc PUBLIC __HIP_PLATFORM_AMD__) + +add_to_config(_versionInfo HIP_PACKAGING_VERSION_PATCH) +add_to_config(_versionInfo CPACK_DEBIAN_PACKAGE_RELEASE) +add_to_config(_versionInfo CPACK_RPM_PACKAGE_RELEASE) + +add_to_config(_versionInfo HIP_VERSION_MAJOR) +add_to_config(_versionInfo HIP_VERSION_MINOR) +add_to_config(_versionInfo HIP_VERSION_PATCH) +add_to_config(_versionInfo HIP_VERSION_GITHASH) + +# Enable preprocessed hiprtc-builtins library +include(HIPRTC RESULT_VARIABLE HIPRTC_CMAKE) +# Requires clang and llvm-mc to create this library. +find_package(LLVM REQUIRED CONFIG PATHS ${ROCM_PATH}/llvm) +find_package(Clang REQUIRED CONFIG PATHS ${ROCM_PATH}/llvm) +set(HIPRTC_GEN_DIR "${CMAKE_CURRENT_BINARY_DIR}/hip_rtc_gen") +set(HIPRTC_GEN_HEADER "${HIPRTC_GEN_DIR}/hipRTC_header.h") +set(HIPRTC_GEN_MCIN "${HIPRTC_GEN_DIR}/hipRTC_header.mcin") +set(HIPRTC_GEN_PREPROCESSED "${HIPRTC_GEN_DIR}/hipRTC") +set(HIPRTC_GEN_OBJ "${HIPRTC_GEN_DIR}/hipRTC_header${CMAKE_CXX_OUTPUT_EXTENSION}") +set(HIPRTC_WARP_FUNCS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_warp_functions.h") +set(HIPRTC_FP16_MATH_FWD "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_fp16_math_fwd.h") +set(HIPRTC_FP16_FUNCS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_fp16.h") +set(HIPRTC_COOP_GROUPS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_cooperative_groups.h") +set(HIPRTC_COOP_GRPS_HELPER "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_cooperative_groups_helper.h") +set(HIPRTC_UNSAFE_ATOMICS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_unsafe_atomics.h") + +# Generate required HIPRTC files. +FILE(MAKE_DIRECTORY ${HIPRTC_GEN_DIR}) +generate_hiprtc_header("${HIPRTC_GEN_HEADER}") +generate_hiprtc_mcin("${HIPRTC_GEN_MCIN}" "${HIPRTC_GEN_PREPROCESSED}") + +# Generate HIPRTC Builtins Preprocessed Object. +# Note: second command appends define macros at build time. +# FIXME: --hip-version forced to 3.6 to use clang headers, until Windows versioning is fixed. +add_custom_command( + OUTPUT ${HIPRTC_GEN_PREPROCESSED} + COMMAND $ -O3 --rocm-path=${PROJECT_SOURCE_DIR}/include/.. -std=c++17 -nogpulib --hip-version=3.6 -isystem ${HIP_COMMON_INCLUDE_DIR} -isystem ${PROJECT_SOURCE_DIR}/include -isystem ${PROJECT_BINARY_DIR}/include -isystem ${CMAKE_CURRENT_SOURCE_DIR}/include --cuda-device-only -D__HIPCC_RTC__ -x hip ${HIPRTC_GEN_HEADER} -E -o ${HIPRTC_GEN_PREPROCESSED} + COMMAND ${CMAKE_COMMAND} -DHIPRTC_ADD_MACROS=1 -DHIPRTC_WARP_HEADER_FILE=${HIPRTC_WARP_FUNCS} -DHIPRTC_COOP_HEADER_FILE=${HIPRTC_COOP_GROUPS} -DHIPRTC_COOP_HELPER_FILE=${HIPRTC_COOP_GRPS_HELPER} -DHIPRTC_UNSAFE_ATOMICS_FILE=${HIPRTC_UNSAFE_ATOMICS} -DHIPRTC_FP16_MATH_FWD_FILE=${HIPRTC_FP16_MATH_FWD} -DHIPRTC_FP16_HEADER_FILE=${HIPRTC_FP16_FUNCS} -DHIPRTC_PREPROCESSED_FILE=${HIPRTC_GEN_PREPROCESSED} -P ${HIPRTC_CMAKE} + DEPENDS clang ${HIPRTC_GEN_HEADER}) +add_custom_command( + OUTPUT ${HIPRTC_GEN_OBJ} + COMMAND $ -o ${HIPRTC_GEN_OBJ} ${HIPRTC_GEN_MCIN} --filetype=obj + DEPENDS llvm-mc ${HIPRTC_GEN_PREPROCESSED} ${HIPRTC_GEN_MCIN}) + +# Create hiprtc-builtins library. +add_library(hiprtc-builtins ${HIPRTC_GEN_OBJ}) +set_target_properties(hiprtc-builtins PROPERTIES + CXX_STANDARD 14 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + POSITION_INDEPENDENT_CODE ON + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib + LINKER_LANGUAGE CXX + VERSION ${HIP_LIB_VERSION_STRING}) + +# Windows and Linux have different naming conventions. +if(WIN32) + # Windows uses DEF file to determine which symbols to expose. + target_sources(hiprtc-builtins PRIVATE hiprtc-builtins.def) + set_target_properties(hiprtc-builtins PROPERTIES + OUTPUT_NAME "hiprtc-builtins${HIP_MAJOR_VERSION}${HIP_MINOR_VERSION}" + ARCHIVE_OUTPUT_NAME "hiprtc-builtins") + # Since ${HIPRTC_GEN_OBJ} was manually generated with llvm-mc, /MT did not embed + # libcmt.lib inside of the obj. So we need to manually set it as defaultlib. + target_link_options(hiprtc-builtins PRIVATE "LINKER:/DEFAULTLIB:libcmt") +else() + # SOVERSION is only supported on Linux. + set_target_properties(hiprtc-builtins PROPERTIES + OUTPUT_NAME "hiprtc-builtins" + SOVERSION ${HIP_LIB_VERSION_MAJOR}) +endif() + +# Test the header file works with simple compilation. +add_custom_command( + OUTPUT ${HIPRTC_GEN_DIR}/tmp.bc + COMMAND $ -O3 --rocm-path=${PROJECT_SOURCE_DIR}/include/.. -std=c++14 -nogpulib -nogpuinc -emit-llvm -c -isystem ${HIP_COMMON_INCLUDE_DIR} -isystem ${PROJECT_BINARY_DIR}/include -isystem ${CMAKE_CURRENT_SOURCE_DIR}/include --cuda-device-only -D__HIPCC_RTC__ --offload-arch=gfx906 -x hip-cpp-output ${HIPRTC_GEN_PREPROCESSED} -o ${HIPRTC_GEN_DIR}/tmp.bc + DEPENDS clang ${HIPRTC_GEN_PREPROCESSED}) + +target_link_libraries(hiprtc PRIVATE ${HIPRTC_GEN_OBJ}) +target_compile_definitions(hiprtc PRIVATE __HIP_ENABLE_RTC) + +if(NOT WIN32) + target_sources(amdhip64 PRIVATE hiprtc.cpp hiprtcComgrHelper.cpp hiprtcInternal.cpp) +endif() + +list(APPEND HIPRTC_OBJECTS ${HIPRTC_GEN_OBJ}) +set(HIPRTC_OBJECTS ${HIPRTC_OBJECTS} PARENT_SCOPE) + +add_dependencies(hiprtc hiprtc-builtins) +install(TARGETS hiprtc-builtins + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +INSTALL(TARGETS hiprtc + EXPORT hiprtc-targets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +INSTALL(EXPORT hiprtc-targets NAMESPACE hiprtc:: DESTINATION ${CONFIG_RTC_PACKAGE_INSTALL_DIR}) + +############################# +# hiprtc-config +############################# +include(CMakePackageConfigHelpers) + +configure_package_config_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/hiprtc-config.cmake.in + ${PROJECT_BINARY_DIR}/hiprtc-config.cmake + INSTALL_DESTINATION ${CONFIG_RTC_PACKAGE_INSTALL_DIR} + PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR) + +write_basic_package_version_file( + ${PROJECT_BINARY_DIR}/hiprtc-config-version.cmake + VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_GITDATE}" + COMPATIBILITY SameMajorVersion) + +INSTALL(FILES ${HIP_COMMON_INCLUDE_DIR}/hip/hiprtc.h DESTINATION "include/hip") + +INSTALL(FILES ${HIP_COMMON_INCLUDE_DIR}/hip/hip_common.h DESTINATION "include/hip") + +INSTALL( + FILES ${PROJECT_BINARY_DIR}/hiprtc-config.cmake + ${PROJECT_BINARY_DIR}/hiprtc-config-version.cmake + DESTINATION ${CONFIG_RTC_PACKAGE_INSTALL_DIR}) diff --git a/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake b/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake new file mode 100644 index 0000000000..730d013e28 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake @@ -0,0 +1,116 @@ +# Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + + +############################################################################### +# HIPRTC.cmake +############################################################################### + +# This file includes macros required to generate the hiprtc builtins library. + +function(get_hiprtc_macros HIPRTC_DEFINES) + set(${HIPRTC_DEFINES} +"#pragma clang diagnostic push\n\ +#pragma clang diagnostic ignored \"-Wreserved-id-macro\"\n\ +#pragma clang diagnostic ignored \"-Wc++98-compat-pedantic\"\n\ +#define __device__ __attribute__((device))\n\ +#define __host__ __attribute__((host))\n\ +#define __global__ __attribute__((global))\n\ +#define __constant__ __attribute__((constant))\n\ +#define __shared__ __attribute__((shared))\n\ +#define __align__(x) __attribute__((aligned(x)))\n\ +#if !defined(__has_feature) || !__has_feature(cuda_noinline_keyword)\n\ +#define __noinline__ __attribute__((noinline))\n\ +#endif\n\ +#define __forceinline__ inline __attribute__((always_inline))\n\ + +#define launch_bounds_impl0(requiredMaxThreadsPerBlock) \\\n\ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))\n\ +#define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \\\n\ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \\\n\ + amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))\n\ +#define select_impl_(_1, _2, impl_, ...) impl_\n\ +#define __launch_bounds__(...) \\\n\ + select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) \n\ +#pragma clang diagnostic pop\n\ +#define HIP_INCLUDE_HIP_HIP_RUNTIME_H\n\ +#pragma clang diagnostic push\n\ +#pragma clang diagnostic ignored \"-Wreserved-macro-identifier\"\n\ +#define _HIP_BFLOAT16_H_\n\ +#pragma clang diagnostic pop\n\ +#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H" + PARENT_SCOPE) +endfunction(get_hiprtc_macros) + +# To allow concatenating above macros during build time, call this file in script mode. +if(HIPRTC_ADD_MACROS) + message(STATUS "Appending hiprtc macros to ${HIPRTC_PREPROCESSED_FILE}.") + get_hiprtc_macros(HIPRTC_DEFINES) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_DEFINES}") + FILE(READ "${HIPRTC_WARP_HEADER_FILE}" HIPRTC_WARP_HEADER) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_WARP_HEADER}") +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" + FILE(READ "${HIPRTC_COOP_HELPER_FILE}" HIPRTC_COOP_HELPER) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_COOP_HELPER}") + FILE(READ "${HIPRTC_COOP_HEADER_FILE}" HIPRTC_COOP_HEADER) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_COOP_HEADER}") + FILE(READ "${HIPRTC_UNSAFE_ATOMICS_FILE}" HIPRTC_UNSAFE_ATOMICS) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_UNSAFE_ATOMICS}") + FILE(READ "${HIPRTC_FP16_MATH_FWD_FILE}" HIPRTC_FP16_MATH_FWD) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_FP16_MATH_FWD}") + FILE(READ "${HIPRTC_FP16_HEADER_FILE}" HIPRTC_FP16_HEADER) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_FP16_HEADER}") +#pragma clang diagnostic pop +endif() + +macro(generate_hiprtc_header HiprtcHeader) + FILE(WRITE ${HiprtcHeader} +"#pragma push_macro(\"CHAR_BIT\")\n\ +#pragma push_macro(\"INT_MAX\")\n\ +#define CHAR_BIT __CHAR_BIT__\n\ +#define INT_MAX __INTMAX_MAX__\n\ +#include \"hip/hip_runtime.h\"\n\ +#include \"hip/hip_bfloat16.h\"\n\ +#pragma pop_macro(\"CHAR_BIT\")\n\ +#pragma pop_macro(\"INT_MAX\")") +endmacro(generate_hiprtc_header) + +macro(generate_hiprtc_mcin HiprtcMcin HiprtcPreprocessedInput) + if(WIN32) + set(HIPRTC_TYPE_LINUX_ONLY "") + else() + set(HIPRTC_TYPE_LINUX_ONLY + " .type __hipRTC_header,@object\n" + " .type __hipRTC_header_size,@object") + endif() + FILE(WRITE ${HiprtcMcin} +"// Automatically generated script for HIPRTC.\n\ +${HIPRTC_TYPE_LINUX_ONLY}\n\ + .section .hipRTC_header,\"a\"\n\ + .globl __hipRTC_header\n\ + .globl __hipRTC_header_size\n\ + .p2align 3\n\ +__hipRTC_header:\n\ + .incbin \"${HiprtcPreprocessedInput}\"\n\ +__hipRTC_header_size:\n\ + .long __hipRTC_header_size - __hipRTC_header\n") +endmacro(generate_hiprtc_mcin) + diff --git a/projects/clr/hipamd/src/hiprtc/cmake/hiprtc-config.cmake.in b/projects/clr/hipamd/src/hiprtc/cmake/hiprtc-config.cmake.in new file mode 100644 index 0000000000..5aa911925e --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/cmake/hiprtc-config.cmake.in @@ -0,0 +1,39 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.3) +@PACKAGE_INIT@ +include(CMakeFindDependencyMacro) +set_and_check(hiprtc_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@") +set_and_check(hiprtc_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@") +set_and_check(hiprtc_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@") + +# Windows Specific Definition here: +if(WIN32) + if(DEFINED ENV{HIP_PATH}) + file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_PATH) + elseif(DEFINED ENV{HIP_DIR}) + file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR) + else() + # using the HIP found + set(HIP_PATH ${PACKAGE_PREFIX_DIR}) + endif() +endif() +include("${CMAKE_CURRENT_LIST_DIR}/hiprtc-targets.cmake") diff --git a/projects/clr/hipamd/src/hiprtc/hiprtc-builtins.def b/projects/clr/hipamd/src/hiprtc/hiprtc-builtins.def new file mode 100644 index 0000000000..b878164125 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtc-builtins.def @@ -0,0 +1,4 @@ +EXPORTS +__hipRTC_header +__hipRTC_header_size + diff --git a/projects/clr/hipamd/src/hiprtc/hiprtc.cpp b/projects/clr/hipamd/src/hiprtc/hiprtc.cpp new file mode 100644 index 0000000000..a06e999e1b --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtc.cpp @@ -0,0 +1,368 @@ +/* +Copyright (c) 2022 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "hiprtcInternal.hpp" + +namespace hiprtc { +thread_local TlsAggregator tls; +} + +const char* hiprtcGetErrorString(hiprtcResult x) { + switch (x) { + case HIPRTC_SUCCESS: + return "HIPRTC_SUCCESS"; + case HIPRTC_ERROR_OUT_OF_MEMORY: + return "HIPRTC_ERROR_OUT_OF_MEMORY"; + case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: + return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE"; + case HIPRTC_ERROR_INVALID_INPUT: + return "HIPRTC_ERROR_INVALID_INPUT"; + case HIPRTC_ERROR_INVALID_PROGRAM: + return "HIPRTC_ERROR_INVALID_PROGRAM"; + case HIPRTC_ERROR_INVALID_OPTION: + return "HIPRTC_ERROR_INVALID_OPTION"; + case HIPRTC_ERROR_COMPILATION: + return "HIPRTC_ERROR_COMPILATION"; + case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: + return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE"; + case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: + return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION"; + case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: + return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION"; + case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: + return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID"; + case HIPRTC_ERROR_INTERNAL_ERROR: + return "HIPRTC_ERROR_INTERNAL_ERROR"; + case HIPRTC_ERROR_LINKING: + return "HIPRTC_ERROR_LINKING"; + default: + LogPrintfError("Invalid HIPRTC error code: %d \n", x); + return nullptr; + }; + + return nullptr; +} + + +hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, + int numHeaders, const char** headers, const char** headerNames) { + HIPRTC_INIT_API(prog, src, name, numHeaders, headers, headerNames); + + if (prog == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM); + } + if (numHeaders < 0) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + if (numHeaders && (headers == nullptr || headerNames == nullptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + std::string progName; + + if (name) { + progName = name; + } + + auto* rtcProgram = new hiprtc::RTCCompileProgram(progName); + if (rtcProgram == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + if (name == nullptr || strlen(name) == 0) { + progName = "CompileSourceXXXXXX"; + hiprtc::helpers::GenerateUniqueFileName(progName); + } + + if (!rtcProgram->addSource(std::string(src), progName)) { + delete rtcProgram; + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + for (int i = 0; i < numHeaders; i++) { + if (!rtcProgram->addHeader(std::string(headers[i]), std::string(headerNames[i]))) { + delete rtcProgram; + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + } + + *prog = hiprtc::RTCCompileProgram::as_hiprtcProgram(rtcProgram); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { + HIPRTC_INIT_API(prog, numOptions, options); + + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + + bool fgpu_rdc = false; + std::vector opt; + opt.reserve(numOptions); + for (int i = 0; i < numOptions; i++) { + if (std::string(options[i]) == std::string("-fgpu-rdc")) { + fgpu_rdc = true; + } + opt.push_back(std::string(options[i])); + } + + if (!rtcProgram->compile(opt, fgpu_rdc)) { + HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) { + HIPRTC_INIT_API(prog, name_expression); + + if (name_expression == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + std::string name = name_expression; + if (!rtcProgram->trackMangledName(name)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, + const char** loweredName) { + HIPRTC_INIT_API(prog, name_expression, loweredName); + + if (name_expression == nullptr || loweredName == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + + if (!rtcProgram->getMangledName(name_expression, loweredName)) { + return HIPRTC_RETURN(HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { + HIPRTC_INIT_API(prog); + + if (prog == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(*prog); + delete rtcProgram; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) { + HIPRTC_INIT_API(prog, binarySizeRet); + + if (binarySizeRet == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + + *binarySizeRet = rtcProgram->getExecSize(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) { + HIPRTC_INIT_API(prog, binaryMem); + + if (binaryMem == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + auto binary = rtcProgram->getExec(); + ::memcpy(binaryMem, binary.data(), binary.size()); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) { + HIPRTC_INIT_API(prog, dst); + if (dst == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + auto log = rtcProgram->getLog(); + ::memcpy(dst, log.data(), log.size()); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { + HIPRTC_INIT_API(prog, logSizeRet); + if (logSizeRet == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + + *logSizeRet = rtcProgram->getLogSize(); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcVersion(int* major, int* minor) { + HIPRTC_INIT_API(major, minor); + + if (major == nullptr || minor == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + // TODO add actual version, what do these numbers mean + *major = 9; + *minor = 0; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetBitcode (hiprtcProgram prog, char* bitcode) { + HIPRTC_INIT_API(prog, bitcode); + + if (bitcode == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + if (!rtcProgram->GetBitcode(bitcode)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcGetBitcodeSize(hiprtcProgram prog, size_t* bitcode_size) { + HIPRTC_INIT_API(prog, bitcode_size); + + if (bitcode_size == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + auto* rtcProgram = hiprtc::RTCCompileProgram::as_RTCCompileProgram(prog); + if (!rtcProgram->GetBitcodeSize(bitcode_size)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_PROGRAM); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcLinkCreate(unsigned int num_options, hiprtcJIT_option* options_ptr, + void** options_vals_pptr, hiprtcLinkState* hip_link_state_ptr) { + HIPRTC_INIT_API(num_options, options_ptr, options_vals_pptr, hip_link_state_ptr); + + if (hip_link_state_ptr == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + std::string name("Linker Program"); + hiprtc::RTCLinkProgram* rtc_link_prog_ptr = new hiprtc::RTCLinkProgram(name); + if (!rtc_link_prog_ptr->AddLinkerOptions(num_options, options_ptr, options_vals_pptr)) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_OPTION); + } + + *hip_link_state_ptr = reinterpret_cast(rtc_link_prog_ptr); + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcLinkAddFile(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + const char* file_path, unsigned int num_options, + hiprtcJIT_option* options_ptr, void** option_values) { + HIPRTC_INIT_API(hip_link_state, input_type, file_path, num_options, options_ptr, option_values); + + if (hip_link_state == nullptr) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX + || input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT + || input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + if (!rtc_link_prog_ptr->AddLinkerFile(std::string(file_path), input_type)) { + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcLinkAddData(hiprtcLinkState hip_link_state, hiprtcJITInputType input_type, + void* image, size_t image_size, const char* name, + unsigned int num_options, hiprtcJIT_option* options_ptr, + void** option_values) { + HIPRTC_INIT_API(hip_link_state, image, image_size, name, num_options, options_ptr, + option_values); + + if (image == nullptr || image_size <= 0) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + if (input_type == HIPRTC_JIT_INPUT_CUBIN || input_type == HIPRTC_JIT_INPUT_PTX + || input_type == HIPRTC_JIT_INPUT_FATBINARY || input_type == HIPRTC_JIT_INPUT_OBJECT + || input_type == HIPRTC_JIT_INPUT_LIBRARY || input_type == HIPRTC_JIT_INPUT_NVVM) { + HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT); + } + + std::string input_name; + if (name) { + input_name = name; + } + + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + if (!rtc_link_prog_ptr->AddLinkerData(image, image_size, input_name, input_type)) { + HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcLinkComplete(hiprtcLinkState hip_link_state, void** bin_out, size_t* size_out) { + HIPRTC_INIT_API(hip_link_state, bin_out, size_out); + + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + if (!rtc_link_prog_ptr->LinkComplete(bin_out, size_out)) { + HIPRTC_RETURN(HIPRTC_ERROR_LINKING); + } + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + +hiprtcResult hiprtcLinkDestroy(hiprtcLinkState hip_link_state) { + HIPRTC_INIT_API(hip_link_state); + + hiprtc::RTCLinkProgram* rtc_link_prog_ptr + = reinterpret_cast(hip_link_state); + delete rtc_link_prog_ptr; + + HIPRTC_RETURN(HIPRTC_SUCCESS); +} + diff --git a/projects/clr/hipamd/src/hiprtc/hiprtc.def b/projects/clr/hipamd/src/hiprtc/hiprtc.def new file mode 100644 index 0000000000..a25ec71336 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtc.def @@ -0,0 +1,19 @@ +EXPORTS +hiprtcAddNameExpression +hiprtcCompileProgram +hiprtcCreateProgram +hiprtcDestroyProgram +hiprtcGetLoweredName +hiprtcGetProgramLog +hiprtcGetProgramLogSize +hiprtcGetCode +hiprtcGetCodeSize +hiprtcVersion +hiprtcGetErrorString +hiprtcLinkCreate +hiprtcLinkAddFile +hiprtcLinkAddData +hiprtcLinkComplete +hiprtcLinkDestroy +hiprtcGetBitcode +hiprtcGetBitcodeSize diff --git a/projects/clr/hipamd/src/hiprtc/hiprtc.map.in b/projects/clr/hipamd/src/hiprtc/hiprtc.map.in new file mode 100644 index 0000000000..427c483340 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtc.map.in @@ -0,0 +1,23 @@ +{ +global: + hiprtcCompileProgram; + hiprtcCreateProgram; + hiprtcDestroyProgram; + hiprtcGetLoweredName; + hiprtcGetProgramLog; + hiprtcGetProgramLogSize; + hiprtcGetCode; + hiprtcGetCodeSize; + hiprtcGetErrorString; + hiprtcAddNameExpression; + hiprtcVersion; + hiprtcLinkCreate; + hiprtcLinkAddFile; + hiprtcLinkAddData; + hiprtcLinkComplete; + hiprtcLinkDestroy; + hiprtcGetBitcode; + hiprtcGetBitcodeSize; +local: + *; +}; diff --git a/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.cpp b/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.cpp new file mode 100644 index 0000000000..7c5ab5b772 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.cpp @@ -0,0 +1,979 @@ +/* +Copyright (c) 2022 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hiprtcComgrHelper.hpp" +#if defined(_WIN32) + #include +#endif + +#include "../amd_hsa_elf.hpp" + +namespace hiprtc { + +namespace helpers { + +size_t constexpr strLiteralLength(char const* str) { + return *str ? 1 + strLiteralLength(str + 1) : 0; +} + +constexpr char const* CLANG_OFFLOAD_BUNDLER_MAGIC_STR = "__CLANG_OFFLOAD_BUNDLE__"; +constexpr char const* OFFLOAD_KIND_HIP = "hip"; +constexpr char const* OFFLOAD_KIND_HIPV4 = "hipv4"; +constexpr char const* OFFLOAD_KIND_HCC = "hcc"; +constexpr char const* AMDGCN_TARGET_TRIPLE = "amdgcn-amd-amdhsa-"; + +static constexpr size_t bundle_magic_string_size + = strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC_STR); + +struct __ClangOffloadBundleInfo { + uint64_t offset; + uint64_t size; + uint64_t bundleEntryIdSize; + const char bundleEntryId[1]; +}; + +struct __ClangOffloadBundleHeader { + const char magic[bundle_magic_string_size - 1]; + uint64_t numOfCodeObjects; + __ClangOffloadBundleInfo desc[1]; +}; + +uint64_t ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } + +static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported, + bool& sramEccSupported) { + switch (EFlags & EF_AMDGPU_MACH) { + case EF_AMDGPU_MACH_AMDGCN_GFX700: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx700"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX701: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx701"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX702: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx702"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX703: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx703"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX704: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx704"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX705: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx705"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX801: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx801"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX802: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx802"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX803: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx803"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX805: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx805"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX810: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx810"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX900: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx900"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX902: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx902"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX904: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx904"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX906: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx906"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX908: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx908"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX909: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx909"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX90A: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx90a"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX90C: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx90c"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX940: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx940"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1010: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1010"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1011: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1011"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1012: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1012"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1013: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx1013"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1030: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1030"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1031: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1031"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1032: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1032"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1033: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1033"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1034: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1034"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1035: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1035"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1036: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1036"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1100: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1100"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1101: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1101"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1102: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1102"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX1103: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx1103"; + break; + default: + return false; + } + return true; +} + +static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string& target_id) { + if (!code_object) return false; + const Elf64_Ehdr* ehdr = reinterpret_cast(code_object); + if (ehdr->e_machine != EM_AMDGPU) return false; + if (ehdr->e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return false; + + bool isXnackSupported{false}, isSramEccSupported{false}; + + std::string proc_name; + if (!getProcName(ehdr->e_flags, proc_name, isXnackSupported, isSramEccSupported)) return false; + target_id = std::string(AMDGCN_TARGET_TRIPLE) + '-' + proc_name; + + switch (ehdr->e_ident[EI_ABIVERSION]) { + case ELFABIVERSION_AMDGPU_HSA_V2: { + LogPrintfInfo("[Code Object V2, target id:%s]", target_id.c_str()); + return false; + } + + case ELFABIVERSION_AMDGPU_HSA_V3: { + LogPrintfInfo("[Code Object V3, target id:%s]", target_id.c_str()); + if (isSramEccSupported) { + if (ehdr->e_flags & EF_AMDGPU_FEATURE_SRAMECC_V3) + target_id += ":sramecc+"; + else + target_id += ":sramecc-"; + } + if (isXnackSupported) { + if (ehdr->e_flags & EF_AMDGPU_FEATURE_XNACK_V3) + target_id += ":xnack+"; + else + target_id += ":xnack-"; + } + break; + } + + case ELFABIVERSION_AMDGPU_HSA_V4: + case ELFABIVERSION_AMDGPU_HSA_V5: { + if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) { + LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str()); + } else { + LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str()); + } + unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4; + if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4) + target_id += ":sramecc-"; + else if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_ON_V4) + target_id += ":sramecc+"; + + unsigned co_xnack_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_XNACK_V4; + if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_OFF_V4) + target_id += ":xnack-"; + else if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_ON_V4) + target_id += ":xnack+"; + break; + } + + default: { + return false; + } + } + return true; +} + +// Consumes the string 'consume_' from the starting of the given input +// eg: input = amdgcn-amd-amdhsa--gfx908 and consume_ is amdgcn-amd-amdhsa-- +// input will become gfx908. +static bool consume(std::string& input, std::string consume_) { + if (input.substr(0, consume_.size()) != consume_) { + return false; + } + input = input.substr(consume_.size()); + return true; +} + +// Trim String till character, will be used to get gpuname +// example: input is gfx908:sram-ecc+ and trim char is : +// input will become sram-ecc+. +static std::string trimName(std::string& input, char trim) { + auto pos_ = input.find(trim); + auto res = input; + if (pos_ == std::string::npos) { + input = ""; + } else { + res = input.substr(0, pos_); + input = input.substr(pos_); + } + return res; +} + +static char getFeatureValue(std::string& input, std::string feature) { + char res = ' '; + if (consume(input, std::move(feature))) { + res = input[0]; + input = input.substr(1); + } + return res; +} + +static bool getTargetIDValue(std::string& input, std::string& processor, char& sramecc_value, + char& xnack_value) { + processor = trimName(input, ':'); + sramecc_value = getFeatureValue(input, std::string(":sramecc")); + if (sramecc_value != ' ' && sramecc_value != '+' && sramecc_value != '-') return false; + xnack_value = getFeatureValue(input, std::string(":xnack")); + if (xnack_value != ' ' && xnack_value != '+' && xnack_value != '-') return false; + return true; +} + +static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object, + std::string& co_triple_target_id) { + std::string offload_kind = trimName(bundled_co_entry_id, '-'); + if (offload_kind != OFFLOAD_KIND_HIPV4 && offload_kind != OFFLOAD_KIND_HIP && + offload_kind != OFFLOAD_KIND_HCC) + return false; + + if (offload_kind != OFFLOAD_KIND_HIPV4) + return getTripleTargetIDFromCodeObject(code_object, co_triple_target_id); + + // For code object V4 onwards the bundled code object entry ID correctly + // specifies the target triple. + co_triple_target_id = bundled_co_entry_id.substr(1); + return true; +} + +bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, + std::string agent_triple_target_id) { + // Primitive Check + if (co_triple_target_id == agent_triple_target_id) return true; + + // Parse code object triple target id + if (!consume(co_triple_target_id, std::string(OFFLOAD_KIND_HIP) + "-" + + std::string(AMDGCN_TARGET_TRIPLE))) { + return false; + } + + std::string co_processor; + char co_sram_ecc, co_xnack; + if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) { + return false; + } + + if (!co_triple_target_id.empty()) return false; + + // Parse agent isa triple target id + if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { + return false; + } + + std::string agent_isa_processor; + char isa_sram_ecc, isa_xnack; + if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) { + return false; + } + + if (!agent_triple_target_id.empty()) return false; + + // Check for compatibility + if (agent_isa_processor != co_processor) return false; + if (co_sram_ecc != ' ') { + if (co_sram_ecc != isa_sram_ecc) return false; + } + if (co_xnack != ' ') { + if (co_xnack != isa_xnack) return false; + } + + return true; +} + +bool UnbundleBitCode(const std::vector& bundled_llvm_bitcode, const std::string& isa, + size_t& co_offset, size_t& co_size) { + std::string magic(bundled_llvm_bitcode.begin(), + bundled_llvm_bitcode.begin() + bundle_magic_string_size); + if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC_STR)) { + // Handle case where the whole file is unbundled + return true; + } + + std::string bundled_llvm_bitcode_s(bundled_llvm_bitcode.begin(), bundled_llvm_bitcode.begin() + + bundled_llvm_bitcode.size()); + const void* data = reinterpret_cast(bundled_llvm_bitcode_s.c_str()); + const auto obheader + = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + for (uint64_t idx=0; idx < obheader->numOfCodeObjects; ++idx, + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + + desc->bundleEntryIdSize)) { + const void* image = reinterpret_cast(reinterpret_cast(obheader) + + desc->offset); + const size_t image_size = desc->size; + std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + + // Check if the device id and code object id are compatible + if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa)) { + co_offset = (reinterpret_cast(image) - reinterpret_cast(data)); + co_size = image_size; + std::cout<<"bundleEntryId: "<& source, + const std::string& name, const amd_comgr_data_kind_t type) { + amd_comgr_data_t data; + + if (auto res = amd::Comgr::create_data(type, &data); res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + if (auto res = amd::Comgr::set_data(data, source.size(), source.data()); + res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::release_data(data); + return false; + } + + if (auto res = amd::Comgr::set_data_name(data, name.c_str()); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::release_data(data); + return false; + } + + if (auto res = amd::Comgr::data_set_add(input, data); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::release_data(data); + return false; + } + amd::Comgr::release_data(data); // Release from our end after setting the input + + return true; +} + +bool extractBuildLog(amd_comgr_data_set_t dataSet, std::string& buildLog) { + size_t count; + if (auto res = amd::Comgr::action_data_count(dataSet, AMD_COMGR_DATA_KIND_LOG, &count); + res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + std::vector log; + if (count > 0) { + if (!extractByteCodeBinary(dataSet, AMD_COMGR_DATA_KIND_LOG, log)) return false; + buildLog.insert(buildLog.end(), log.data(), log.data() + log.size()); + } + return true; +} + +bool extractByteCodeBinary(const amd_comgr_data_set_t inDataSet, + const amd_comgr_data_kind_t dataKind, std::vector& bin) { + amd_comgr_data_t binaryData; + + if (auto res = amd::Comgr::action_data_get_data(inDataSet, dataKind, 0, &binaryData); + res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + size_t binarySize = 0; + if (auto res = amd::Comgr::get_data(binaryData, &binarySize, NULL); + res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::release_data(binaryData); + return false; + } + + size_t bufSize = (dataKind == AMD_COMGR_DATA_KIND_LOG) ? binarySize + 1 : binarySize; + + char* binary = new char[bufSize]; + if (binary == nullptr) { + amd::Comgr::release_data(binaryData); + return false; + } + + + if (auto res = amd::Comgr::get_data(binaryData, &binarySize, binary); + res != AMD_COMGR_STATUS_SUCCESS) { + delete[] binary; + amd::Comgr::release_data(binaryData); + return false; + } + + if (dataKind == AMD_COMGR_DATA_KIND_LOG) { + binary[binarySize] = '\0'; + } + + amd::Comgr::release_data(binaryData); + + bin.reserve(binarySize); + bin.assign(binary, binary + binarySize); + delete[] binary; + + return true; +} + +bool createAction(amd_comgr_action_info_t& action, std::vector& options, + const std::string& isa, const amd_comgr_language_t lang) { + if (auto res = amd::Comgr::create_action_info(&action); res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + if (lang != AMD_COMGR_LANGUAGE_NONE) { + if (auto res = amd::Comgr::action_info_set_language(action, lang); + res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return false; + } + } + + if (auto res = amd::Comgr::action_info_set_isa_name(action, isa.c_str()); + res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return false; + } + + std::vector optionsArgv; + optionsArgv.reserve(options.size()); + for (auto& option : options) { + optionsArgv.push_back(option.c_str()); + } + + if (auto res = + amd::Comgr::action_info_set_option_list(action, optionsArgv.data(), optionsArgv.size()); + res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return res; + } + + if (auto res = amd::Comgr::action_info_set_logging(action, true); + res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return res; + } + + return AMD_COMGR_STATUS_SUCCESS; +} + +bool compileToBitCode(const amd_comgr_data_set_t compileInputs, const std::string& isa, + std::vector& compileOptions, std::string& buildLog, + std::vector& LLVMBitcode) { + amd_comgr_language_t lang = AMD_COMGR_LANGUAGE_HIP; + amd_comgr_action_info_t action; + amd_comgr_data_set_t output; + amd_comgr_data_set_t input = compileInputs; + + if (auto res = createAction(action, compileOptions, isa, lang); res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + if (auto res = amd::Comgr::create_data_set(&output); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return false; + } + + if (auto res = + amd::Comgr::do_action(AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC, + action, input, output); + res != AMD_COMGR_STATUS_SUCCESS) { + extractBuildLog(output, buildLog); + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + return false; + } + + if (!extractBuildLog(output, buildLog)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + return false; + } + + if (!extractByteCodeBinary(output, AMD_COMGR_DATA_KIND_BC, LLVMBitcode)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + return false; + } + + // Clean up + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + return true; +} + +bool linkLLVMBitcode(const amd_comgr_data_set_t linkInputs, const std::string& isa, + std::vector& linkOptions, std::string& buildLog, + std::vector& LinkedLLVMBitcode) { + amd_comgr_language_t lang = AMD_COMGR_LANGUAGE_HIP; + amd_comgr_action_info_t action; + amd_comgr_data_set_t dataSetDevLibs; + + if (auto res = createAction(action, linkOptions, isa, AMD_COMGR_LANGUAGE_HIP); + res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + if (auto res = amd::Comgr::create_data_set(&dataSetDevLibs); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return false; + } + + + if (auto res = amd::Comgr::do_action(AMD_COMGR_ACTION_ADD_DEVICE_LIBRARIES, action, linkInputs, + dataSetDevLibs); + res != AMD_COMGR_STATUS_SUCCESS) { + extractBuildLog(dataSetDevLibs, buildLog); + LogPrintfInfo("%s", buildLog.c_str()); + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(dataSetDevLibs); + return false; + } + + if (!extractBuildLog(dataSetDevLibs, buildLog)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(dataSetDevLibs); + return false; + } + + amd_comgr_data_set_t output; + if (auto res = amd::Comgr::create_data_set(&output); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(dataSetDevLibs); + return false; + } + + if (auto res = + amd::Comgr::do_action(AMD_COMGR_ACTION_LINK_BC_TO_BC, action, dataSetDevLibs, output); + res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(dataSetDevLibs); + amd::Comgr::destroy_data_set(output); + return false; + } + + if (!extractBuildLog(output, buildLog)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(dataSetDevLibs); + amd::Comgr::destroy_data_set(output); + return false; + } + + if (!extractByteCodeBinary(output, AMD_COMGR_DATA_KIND_BC, LinkedLLVMBitcode)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(dataSetDevLibs); + amd::Comgr::destroy_data_set(output); + return false; + } + + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(dataSetDevLibs); + amd::Comgr::destroy_data_set(output); + return true; +} + +bool createExecutable(const amd_comgr_data_set_t linkInputs, const std::string& isa, + std::vector& exeOptions, std::string& buildLog, + std::vector& executable) { + amd_comgr_action_info_t action; + + if (auto res = createAction(action, exeOptions, isa); res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + amd_comgr_data_set_t relocatableData; + if (auto res = amd::Comgr::create_data_set(&relocatableData); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return false; + } + + if (auto res = amd::Comgr::do_action(AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE, action, + linkInputs, relocatableData); + res != AMD_COMGR_STATUS_SUCCESS) { + extractBuildLog(relocatableData, buildLog); + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(relocatableData); + return false; + } + + if (!extractBuildLog(relocatableData, buildLog)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(relocatableData); + return false; + } + + + amd::Comgr::destroy_action_info(action); + std::vector emptyOpt; + if (auto res = createAction(action, emptyOpt, isa); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_data_set(relocatableData); + return false; + } + + amd_comgr_data_set_t output; + if (auto res = amd::Comgr::create_data_set(&output); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(relocatableData); + return false; + } + + if (auto res = amd::Comgr::do_action(AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE, action, + relocatableData, output); + res != AMD_COMGR_STATUS_SUCCESS) { + extractBuildLog(output, buildLog); + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + amd::Comgr::destroy_data_set(relocatableData); + return false; + } + + if (!extractBuildLog(output, buildLog)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + amd::Comgr::destroy_data_set(relocatableData); + return false; + } + + if (!extractByteCodeBinary(output, AMD_COMGR_DATA_KIND_EXECUTABLE, executable)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + amd::Comgr::destroy_data_set(relocatableData); + return false; + } + + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(output); + amd::Comgr::destroy_data_set(relocatableData); + + return true; +} + +void GenerateUniqueFileName(std::string &name) { +#if !defined(_WIN32) + char *name_template = const_cast(name.c_str()); + int temp_fd = mkstemp(name_template); +#else + char *name_template = new char[name.length()+1]; + strcpy_s(name_template, name.length()+1, name.data()); + int sizeinchars = strnlen(name_template, 20) + 1; + _mktemp_s(name_template, sizeinchars); +#endif + name = name_template; +#if !defined(_WIN32) + unlink(name_template); + close(temp_fd); +#endif +} + +bool dumpIsaFromBC(const amd_comgr_data_set_t isaInputs, const std::string& isa, + std::vector& exeOptions, std::string name, std::string& buildLog) { + + amd_comgr_action_info_t action; + + if (auto res = createAction(action, exeOptions, isa); res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + amd_comgr_data_set_t isaData; + if (auto res = amd::Comgr::create_data_set(&isaData); res != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_action_info(action); + return false; + } + + if (auto res = amd::Comgr::do_action(AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY, action, isaInputs, + isaData); + res != AMD_COMGR_STATUS_SUCCESS) { + extractBuildLog(isaData, buildLog); + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(isaData); + return false; + } + + std::vector isaOutput; + if (!extractByteCodeBinary(isaData, AMD_COMGR_DATA_KIND_SOURCE, isaOutput)) { + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(isaData); + return false; + } + + if (name.size() == 0) { + // Generate a unique name if the program name is not specified by the user + name = std::string("hiprtcXXXXXX"); + GenerateUniqueFileName(name); + } + std::string isaName = isa; +#if defined(_WIN32) + // Replace special charaters that are not supported by Windows FS. + std::replace(isaName.begin(), isaName.end(), ':', '@'); +#endif + + auto isaFileName = name + std::string("-hip-") + isaName + ".s"; + std::ofstream f(isaFileName.c_str(), std::ios::trunc | std::ios::binary); + if (f.is_open()) { + f.write(isaOutput.data(), isaOutput.size()); + f.close(); + } else { + buildLog += "Warning: writing isa file failed.\n"; + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(isaData); + return false; + } + amd::Comgr::destroy_action_info(action); + amd::Comgr::destroy_data_set(isaData); + return true; +} + +bool demangleName(const std::string& mangledName, std::string& demangledName) { + amd_comgr_data_t mangled_data; + amd_comgr_data_t demangled_data; + + if (AMD_COMGR_STATUS_SUCCESS != amd::Comgr::create_data(AMD_COMGR_DATA_KIND_BYTES, &mangled_data)) + return false; + + if (AMD_COMGR_STATUS_SUCCESS != + amd::Comgr::set_data(mangled_data, mangledName.size(), mangledName.c_str())) { + amd::Comgr::release_data(mangled_data); + return false; + } + + if (AMD_COMGR_STATUS_SUCCESS != amd::Comgr::demangle_symbol_name(mangled_data, &demangled_data)) { + amd::Comgr::release_data(mangled_data); + return false; + } + + size_t demangled_size = 0; + if (AMD_COMGR_STATUS_SUCCESS != amd::Comgr::get_data(demangled_data, &demangled_size, NULL)) { + amd::Comgr::release_data(mangled_data); + amd::Comgr::release_data(demangled_data); + return false; + } + + demangledName.resize(demangled_size); + + if (AMD_COMGR_STATUS_SUCCESS != + amd::Comgr::get_data(demangled_data, &demangled_size, + const_cast(demangledName.data()))) { + amd::Comgr::release_data(mangled_data); + amd::Comgr::release_data(demangled_data); + return false; + } + + amd::Comgr::release_data(mangled_data); + amd::Comgr::release_data(demangled_data); + return true; +} + +std::string handleMangledName(std::string loweredName) { + if (loweredName.empty()) { + return loweredName; + } + + if (loweredName.find(".kd") != std::string::npos) { + return {}; + } + + if (loweredName.find("void ") == 0) { + loweredName.erase(0, strlen("void ")); + } + + auto dx{loweredName.find_first_of("(<")}; + + if (dx == std::string::npos) { + return loweredName; + } + + if (loweredName[dx] == '<') { + uint32_t count = 1; + do { + ++dx; + count += (loweredName[dx] == '<') ? 1 : ((loweredName[dx] == '>') ? -1 : 0); + } while (count); + + loweredName.erase(++dx); + } else { + loweredName.erase(dx); + } + + return loweredName; +} + +bool fillMangledNames(std::vector& dataVec, + std::vector& mangledNames, bool isBitcode) { + amd_comgr_data_t dataObject; + if (auto res = amd::Comgr::create_data(isBitcode ? AMD_COMGR_DATA_KIND_BC : + AMD_COMGR_DATA_KIND_EXECUTABLE, &dataObject); + res != AMD_COMGR_STATUS_SUCCESS) { + return false; + } + + if (auto res = amd::Comgr::set_data(dataObject, dataVec.size(), dataVec.data())) { + amd::Comgr::release_data(dataObject); + return false; + } + + size_t Count; + if (auto res = amd::Comgr::populate_mangled_names(dataObject, &Count)) { + amd::Comgr::release_data(dataObject); + return false; + } + + for (size_t i = 0; i < Count; i++) { + size_t Size; + if (auto res = amd::Comgr::get_mangled_name(dataObject, i, &Size, NULL)) { + amd::Comgr::release_data(dataObject); + return false; + } + + char *mName = new char[Size](); + if (auto res = amd::Comgr::get_mangled_name(dataObject, i, &Size, mName)) { + amd::Comgr::release_data(dataObject); + return false; + } + + mangledNames.push_back(std::string(mName)); + delete mName; + } + + amd::Comgr::release_data(dataObject); + return true; +} + +bool getDemangledNames(const std::vector& mangledNames, + std::map& demangledNames) { + for (auto& i : mangledNames) { + std::string demangledName; + if (!demangleName(i, demangledName)) return false; + demangledName = handleMangledName(demangledName); + + demangledName.erase(std::remove_if(demangledName.begin(), demangledName.end(), + [](unsigned char c) { return std::isspace(c); }), + demangledName.end()); + + if (auto dres = demangledNames.find(demangledName); dres != demangledNames.end()) { + dres->second = i; + } + } + return true; +} +} // namespace helpers +} // namespace hiprtc diff --git a/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.hpp b/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.hpp new file mode 100644 index 0000000000..290982cd90 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.hpp @@ -0,0 +1,63 @@ +/* +Copyright (c) 2022 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +#include "vdi_common.hpp" +#include "utils/debug.hpp" +#include "device/comgrctx.hpp" + +namespace hiprtc { +namespace helpers { +bool UnbundleBitCode(const std::vector& bundled_bit_code, const std::string& isa, + size_t& co_offset, size_t& co_size); +bool addCodeObjData(amd_comgr_data_set_t& input, const std::vector& source, + const std::string& name, const amd_comgr_data_kind_t type); +bool extractBuildLog(amd_comgr_data_set_t dataSet, std::string& buildLog); +bool extractByteCodeBinary(const amd_comgr_data_set_t inDataSet, + const amd_comgr_data_kind_t dataKind, std::vector& bin); +bool createAction(amd_comgr_action_info_t& action, std::vector& options, + const std::string& isa, + const amd_comgr_language_t lang = AMD_COMGR_LANGUAGE_NONE); +bool compileToBitCode(const amd_comgr_data_set_t compileInputs, const std::string& isa, + std::vector& compileOptions, std::string& buildLog, + std::vector& LLVMBitcode); +bool linkLLVMBitcode(const amd_comgr_data_set_t linkInputs, const std::string& isa, + std::vector& linkOptions, std::string& buildLog, + std::vector& LinkedLLVMBitcode); +bool createExecutable(const amd_comgr_data_set_t linkInputs, const std::string& isa, + std::vector& exeOptions, std::string& buildLog, + std::vector& executable); +bool dumpIsaFromBC(const amd_comgr_data_set_t isaInputs, const std::string& isa, + std::vector& exeOptions, std::string name, std::string& buildLog); +bool demangleName(const std::string& mangledName, std::string& demangledName); +std::string handleMangledName(std::string loweredName); +bool fillMangledNames(std::vector& executable, + std::vector& mangledNames, bool isBitcode); +bool getDemangledNames(const std::vector& mangledNames, + std::map& demangledNames); +void GenerateUniqueFileName(std::string &name); +} // namespace helpers +} // namespace hiprtc diff --git a/projects/clr/hipamd/src/hiprtc/hiprtcInternal.cpp b/projects/clr/hipamd/src/hiprtc/hiprtcInternal.cpp new file mode 100644 index 0000000000..188b42d8de --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtcInternal.cpp @@ -0,0 +1,671 @@ +/* +Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hiprtcInternal.hpp" + +#include +#include + +#include "vdi_common.hpp" +#include "utils/flags.hpp" + +namespace hiprtc { +using namespace helpers; + +//RTC Program Member Functions +RTCProgram::RTCProgram(std::string name) : name_(name) { + constexpr bool kComgrVersioned = true; + std::call_once(amd::Comgr::initialized, amd::Comgr::LoadLib, kComgrVersioned); + if (amd::Comgr::create_data_set(&exec_input_) != AMD_COMGR_STATUS_SUCCESS) { + crashWithMessage("Failed to allocate internal hiprtc structure"); + } +} + +bool RTCProgram::findIsa() { + const char* libName; +#ifdef _WIN32 + libName = "amdhip64.dll"; +#else + libName = "libamdhip64.so"; +#endif + + void* handle = amd::Os::loadLibrary(libName); + + if (!handle) { + LogInfo("hip runtime failed to load using dlopen"); + build_log_ += + "hip runtime failed to load.\n" + "Error: Please provide architecture for which code is to be " + "generated.\n"; + return false; + } + + void* sym_hipGetDevice = amd::Os::getSymbol(handle, "hipGetDevice"); + void* sym_hipGetDeviceProperties = amd::Os::getSymbol(handle, "hipGetDeviceProperties"); + + if (sym_hipGetDevice == nullptr || sym_hipGetDeviceProperties == nullptr) { + LogInfo("ISA cannot be found to dlsym failure"); + build_log_ += + "ISA cannot be found from hip runtime.\n" + "Error: Please provide architecture for which code is to be " + "generated.\n"; + return false; + } + + hipError_t (*dyn_hipGetDevice)(int*) = reinterpret_cast + (sym_hipGetDevice); + + hipError_t (*dyn_hipGetDeviceProperties)(hipDeviceProp_t*, int) = reinterpret_cast + (sym_hipGetDeviceProperties); + + int device; + hipError_t status = dyn_hipGetDevice(&device); + if (status != hipSuccess) { + return false; + } + hipDeviceProp_t props; + status = dyn_hipGetDeviceProperties(&props, device); + if (status != hipSuccess) { + return false; + } + isa_ = "amdgcn-amd-amdhsa--"; + isa_.append(props.gcnArchName); + + amd::Os::unloadLibrary(handle); + return true; +} + +//RTC Compile Program Member Functions +RTCCompileProgram::RTCCompileProgram(std::string name_) : RTCProgram(name_), fgpu_rdc_(false) { + + if ((amd::Comgr::create_data_set(&compile_input_) != AMD_COMGR_STATUS_SUCCESS) || + (amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS)) { + crashWithMessage("Failed to allocate internal hiprtc structure"); + } + // Add internal header + if (!addBuiltinHeader()) { + crashWithMessage("Unable to add internal header"); + } + + // Add compile options + const std::string hipVerOpt{"--hip-version=" + std::to_string(HIP_VERSION_MAJOR) + '.' + + std::to_string(HIP_VERSION_MINOR) + '.' + + std::to_string(HIP_VERSION_PATCH)}; + const std::string hipVerMajor{"-DHIP_VERSION_MAJOR=" + std::to_string(HIP_VERSION_MAJOR)}; + const std::string hipVerMinor{"-DHIP_VERSION_MINOR=" + std::to_string(HIP_VERSION_MINOR)}; + const std::string hipVerPatch{"-DHIP_VERSION_PATCH=" + std::to_string(HIP_VERSION_PATCH)}; + + compile_options_.reserve(20); // count of options below + compile_options_.push_back("-O3"); + + if (GPU_ENABLE_WGP_MODE) compile_options_.push_back("-mcumode"); + + if (!GPU_ENABLE_WAVE32_MODE) compile_options_.push_back("-mwavefrontsize64"); + + compile_options_.push_back(hipVerOpt); + compile_options_.push_back(hipVerMajor); + compile_options_.push_back(hipVerMinor); + compile_options_.push_back(hipVerPatch); + compile_options_.push_back("-D__HIPCC_RTC__"); + compile_options_.push_back("-include"); + compile_options_.push_back("hiprtc_runtime.h"); + compile_options_.push_back("-std=c++14"); + compile_options_.push_back("-nogpuinc"); + compile_options_.push_back("-Wno-gnu-line-marker"); + compile_options_.push_back("-Wno-missing-prototypes"); +#ifdef _WIN32 + compile_options_.push_back("-target"); + compile_options_.push_back("x86_64-pc-windows-msvc"); + compile_options_.push_back("-fms-extensions"); + compile_options_.push_back("-fms-compatibility"); +#endif + + exe_options_.push_back("-O3"); +} + +bool RTCCompileProgram::addSource(const std::string& source, const std::string& name) { + if (source.size() == 0 || name.size() == 0) { + LogError("Error in hiprtc: source or name is of size 0 in addSource"); + return false; + } + source_code_ += source; + source_name_ = name; + return true; +} + +// addSource_impl is a different function because we need to add source when we track mangled +// objects +bool RTCCompileProgram::addSource_impl() { + std::vector vsource(source_code_.begin(), source_code_.end()); + if (!addCodeObjData(compile_input_, vsource, source_name_, AMD_COMGR_DATA_KIND_SOURCE)) { + return false; + } + return true; +} + +bool RTCCompileProgram::addHeader(const std::string& source, const std::string& name) { + if (source.size() == 0 || name.size() == 0) { + LogError("Error in hiprtc: source or name is of size 0 in addHeader"); + return false; + } + std::vector vsource(source.begin(), source.end()); + if (!addCodeObjData(compile_input_, vsource, name, AMD_COMGR_DATA_KIND_INCLUDE)) { + return false; + } + return true; +} + +bool RTCCompileProgram::addBuiltinHeader() { + std::vector source(__hipRTC_header, __hipRTC_header + __hipRTC_header_size); + std::string name{"hiprtc_runtime.h"}; + if (!addCodeObjData(compile_input_, source, name, AMD_COMGR_DATA_KIND_INCLUDE)) { + return false; + } + return true; +} + +bool RTCCompileProgram::findLLVMOptions(const std::vector& options, + std::vector& llvm_options) { + for (size_t i = 0; i < options.size(); ++i) { + if (options[i] == "-mllvm") { + if (options.size() == (i+1)) { + LogInfo( + "-mllvm option passed by the app, it comes as a pair but there is no option after this"); + return false; + } + llvm_options.push_back(options[i]); + llvm_options.push_back(options[i + 1]); + } + } + return true; +} + +bool RTCCompileProgram::transformOptions(std::vector& compile_options) { + auto getValueOf = [](const std::string& option) { + std::string res; + auto f = std::find(option.begin(), option.end(), '='); + if (f != option.end()) res = std::string(f + 1, option.end()); + return res; + }; + + for (auto& i : compile_options) { + if (i == "-hip-pch") { + LogInfo( + "-hip-pch is deprecated option, has no impact on execution of new hiprtc programs, it " + "can be removed"); + i.clear(); + continue; + } + // Some rtc samples use --gpu-architecture + if (i.rfind("--gpu-architecture=", 0) == 0) { + LogInfo("--gpu-architecture is nvcc option, transforming it to --offload-arch option"); + auto val = getValueOf(i); + i = "--offload-arch=" + val; + continue; + } + if (i == "--save-temps") { + settings_.dumpISA = true; + continue; + } + } + + if (auto res = std::find_if( + compile_options.begin(), compile_options.end(), + [](const std::string& str) { return str.find("--offload-arch=") != std::string::npos; }); + res != compile_options.end()) { + auto isaName = getValueOf(*res); + isa_ = "amdgcn-amd-amdhsa--" + isaName; + settings_.offloadArchProvided = true; + return true; + } + // App has not provided the gpu archiecture, need to find it + return findIsa(); +} + +amd::Monitor RTCProgram::lock_("HIPRTC Program", true); + +bool RTCCompileProgram::compile(const std::vector& options, bool fgpu_rdc) { + + if (!addSource_impl()) { + LogError("Error in hiprtc: unable to add source code"); + return false; + } + + fgpu_rdc_ = fgpu_rdc; + + // Append compile options + std::vector compileOpts(compile_options_); + compileOpts.reserve(compile_options_.size() + options.size() + 2); + compileOpts.insert(compileOpts.end(), options.begin(), options.end()); + + if (!fgpu_rdc_) { + compileOpts.push_back("-Xclang"); + compileOpts.push_back("-disable-llvm-passes"); + } + + if (!transformOptions(compileOpts)) { + LogError("Error in hiprtc: unable to transform options"); + return false; + } + + if (!compileToBitCode(compile_input_, isa_, compileOpts, build_log_, LLVMBitcode_)) { + LogError("Error in hiprtc: unable to compile source to bitcode"); + return false; + } + + if (fgpu_rdc_) { + std::vector mangledNames; + if (!fillMangledNames(LLVMBitcode_, mangledNames, true)) { + LogError("Error in hiprtc: unable to fill mangled names"); + return false; + } + + if (!getDemangledNames(mangledNames, demangled_names_)) { + LogError("Error in hiprtc: unable to get demangled names"); + return false; + } + return true; + } + + std::string linkFileName = "linked"; + if (!addCodeObjData(link_input_, LLVMBitcode_, linkFileName, AMD_COMGR_DATA_KIND_BC)) { + LogError("Error in hiprtc: unable to add linked code object"); + return false; + } + + std::vector LinkedLLVMBitcode; + if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, LinkedLLVMBitcode)) { + LogError("Error in hiprtc: unable to add device libs to linked bitcode"); + return false; + } + + std::string linkedFileName = "LLVMBitcode.bc"; + if (!addCodeObjData(exec_input_, LinkedLLVMBitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) { + LogError("Error in hiprtc: unable to add device libs linked code object"); + return false; + } + + std::vector llvmOptions; + // Find the -mllvm options passed by the app such as "-mllvm" "-amdgpu-early-inline-all=true" + if (!findLLVMOptions(options, llvmOptions)) { + LogError("Error in hiprtc: unable to match -mllvm options"); + return false; + } + + std::vector exeOpts(exe_options_); + exeOpts.reserve(exeOpts.size() + llvmOptions.size() + 2); + // Add these options by default for optimizations during BC to Relocatable phase. + exeOpts.push_back("-mllvm"); + exeOpts.push_back("-amdgpu-internalize-symbols"); + // User provided -mllvm options are appended at the end since they can override the above + // default options if necessary + exeOpts.insert(exeOpts.end(), llvmOptions.begin(), llvmOptions.end()); + + if (settings_.dumpISA) { + if (!dumpIsaFromBC(exec_input_, isa_, exeOpts, name_, build_log_)) { + LogError("Error in hiprtc: unable to dump isa code"); + return false; + } + } + + if (!createExecutable(exec_input_, isa_, exeOpts, build_log_, executable_)) { + LogError("Error in hiprtc: unable to create executable"); + return false; + } + + std::vector mangledNames; + if (!fillMangledNames(executable_, mangledNames, false)) { + LogError("Error in hiprtc: unable to fill mangled names"); + return false; + } + + if (!getDemangledNames(mangledNames, demangled_names_)) { + LogError("Error in hiprtc: unable to get demangled names"); + return false; + } + + return true; +} + +void RTCCompileProgram::stripNamedExpression(std::string& strippedName) { + + if (strippedName.back() == ')') { + strippedName.pop_back(); + strippedName.erase(0, strippedName.find('(')); + } + if (strippedName.front() == '&') { + strippedName.erase(0, 1); + } + // Removes the spaces from strippedName if present + strippedName.erase(std::remove_if(strippedName.begin(), + strippedName.end(), + [](unsigned char c) { + return std::isspace(c); + }), strippedName.end()); +} + +bool RTCCompileProgram::trackMangledName(std::string& name) { + amd::ScopedLock lock(lock_); + + if (name.size() == 0) return false; + + std::string strippedNameNoSpace = name; + stripNamedExpression(strippedNameNoSpace); + + stripped_names_.insert(std::pair(name, strippedNameNoSpace)); + demangled_names_.insert(std::pair(strippedNameNoSpace, "")); + + const auto var{"__hiprtc_" + std::to_string(stripped_names_.size())}; + const auto code{"\nextern \"C\" constexpr auto " + var + " = " + name + ";\n"}; + + source_code_ += code; + return true; +} + +bool RTCCompileProgram::getMangledName(const char* name_expression, const char** loweredName) { + + std::string strippedName = name_expression; + stripNamedExpression(strippedName); + + if (auto dres = demangled_names_.find(strippedName); dres != demangled_names_.end()) { + if (dres->second.size() != 0) { + *loweredName = dres->second.c_str(); + return true; + } else + return false; + } + return false; +} + +bool RTCCompileProgram::GetBitcode(char* bitcode) { + + if (!fgpu_rdc_ || LLVMBitcode_.size() <= 0) { + return false; + } + + std::copy(LLVMBitcode_.begin(), LLVMBitcode_.end(), bitcode); + return true; +} + +bool RTCCompileProgram::GetBitcodeSize(size_t* bitcode_size) { + if (!fgpu_rdc_ || LLVMBitcode_.size() <= 0) { + return false; + } + + *bitcode_size = LLVMBitcode_.size(); + return true; +} + +//RTC Link Program Member Functions +RTCLinkProgram::RTCLinkProgram(std::string name) : RTCProgram(name) { + if (amd::Comgr::create_data_set(&link_input_) != AMD_COMGR_STATUS_SUCCESS) { + crashWithMessage("Failed to allocate internal hiprtc structure"); + } +} + +bool RTCLinkProgram::AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr, + void** options_vals_ptr) { + + for (size_t opt_idx = 0; opt_idx < num_options; ++opt_idx) { + + if (options_vals_ptr[opt_idx] == nullptr) { + crashWithMessage("JIT Options value ptr cannot be null"); + return false; + } + + switch(options_ptr[opt_idx]) { + case HIPRTC_JIT_MAX_REGISTERS: + link_args_.max_registers_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_THREADS_PER_BLOCK: + link_args_.threads_per_block_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_WALL_TIME: + link_args_.wall_time_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_INFO_LOG_BUFFER: + link_args_.info_log_ = (reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES: + link_args_.info_log_size_ = (reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_ERROR_LOG_BUFFER: + link_args_.error_log_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: + link_args_.error_log_size_ = (reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_OPTIMIZATION_LEVEL: + link_args_.optimization_level_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_TARGET_FROM_HIPCONTEXT: + link_args_.target_from_hip_context_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_TARGET: + link_args_.jit_target_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FALLBACK_STRATEGY: + link_args_.fallback_strategy_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_GENERATE_DEBUG_INFO: + link_args_.generate_debug_info_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_LOG_VERBOSE: + link_args_.log_verbose_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_GENERATE_LINE_INFO: + link_args_.generate_line_info_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_CACHE_MODE: + link_args_.cache_mode_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_NEW_SM3X_OPT: + link_args_.sm3x_opt_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FAST_COMPILE: + link_args_.fast_compile_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_NAMES: + link_args_.global_symbol_names_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_ADDRESS: + link_args_.global_symbol_addresses_ = reinterpret_cast(options_vals_ptr[opt_idx]); + break; + case HIPRTC_JIT_GLOBAL_SYMBOL_COUNT: + link_args_.global_symbol_count_ + = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_LTO: + link_args_.lto_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FTZ: + link_args_.ftz_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_PREC_DIV: + link_args_.prec_div_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_PREC_SQRT: + link_args_.prec_sqrt_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + case HIPRTC_JIT_FMA: + link_args_.fma_ = *(reinterpret_cast(options_vals_ptr[opt_idx])); + break; + default: + break; + } + } + + return true; +} + +amd_comgr_data_kind_t RTCLinkProgram::GetCOMGRDataKind(hiprtcJITInputType input_type) { + amd_comgr_data_kind_t data_kind = AMD_COMGR_DATA_KIND_UNDEF; + + // Map the hiprtc input type to comgr data kind + switch (input_type) { + case HIPRTC_JIT_INPUT_LLVM_BITCODE : + data_kind = AMD_COMGR_DATA_KIND_BC; + break; + case HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE : + data_kind = HIPRTC_USE_RUNTIME_UNBUNDLER + ? AMD_COMGR_DATA_KIND_BC : AMD_COMGR_DATA_KIND_BC_BUNDLE; + break; + case HIPRTC_JIT_INPUT_LLVM_ARCHIVES_OF_BUNDLED_BITCODE : + data_kind = AMD_COMGR_DATA_KIND_AR_BUNDLE; + break; + default : + LogError("Cannot find the corresponding comgr data kind"); + break; + } + + return data_kind; +} + +bool RTCLinkProgram::AddLinkerFile(std::string file_path, hiprtcJITInputType input_type) { + std::vector llvm_bitcode; + + // Get the file size. + struct stat stat_buf; + if (stat(file_path.c_str(), &stat_buf)) { + return false; + } + + // Read the file contents + std::string link_file_name("Linker Program"); + std::vector link_file_info(stat_buf.st_size); + std::ifstream bc_file(file_path, std::ios_base::in | std::ios_base::binary); + if (!bc_file.good()) { + return true; + } + + bc_file.read(link_file_info.data(), stat_buf.st_size); + bc_file.close(); + + // If this is bundled bitcode then unbundle this. + if (HIPRTC_USE_RUNTIME_UNBUNDLER && input_type == HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE) { + if (!findIsa()) { + return false; + } + + size_t co_offset = 0; + size_t co_size = 0; + if(!UnbundleBitCode(link_file_info, isa_, co_offset, co_size)) { + LogError("Error in hiprtc: unable to unbundle the llvm bitcode"); + return false; + } + + llvm_bitcode.assign(link_file_info.begin() + co_offset, + link_file_info.begin() + co_offset + co_size); + } else { + llvm_bitcode.assign(link_file_info.begin(), link_file_info.end()); + } + + amd_comgr_data_kind_t data_kind; + if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) { + LogError("Cannot find the correct COMGR data kind"); + return false; + } + + if (!addCodeObjData(link_input_, llvm_bitcode, link_file_name, data_kind)) { + LogError("Error in hiprtc: unable to add linked code object"); + return false; + } + + return true; +} + +bool RTCLinkProgram::AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name, + hiprtcJITInputType input_type) { + char* image_char_buf = reinterpret_cast(image_ptr); + std::vector llvm_bitcode; + + if (HIPRTC_USE_RUNTIME_UNBUNDLER && input_type == HIPRTC_JIT_INPUT_LLVM_BUNDLED_BITCODE) { + std::vector bundled_llvm_bitcode(image_char_buf, image_char_buf + image_size); + + if (!findIsa()) { + return false; + } + + size_t co_offset = 0; + size_t co_size = 0; + if(!UnbundleBitCode(bundled_llvm_bitcode, isa_, co_offset, co_size)) { + LogError("Error in hiprtc: unable to unbundle the llvm bitcode"); + return false; + } + + llvm_bitcode.assign(bundled_llvm_bitcode.begin() + co_offset, + bundled_llvm_bitcode.begin() + co_offset + co_size); + } else { + llvm_bitcode.assign(image_char_buf, image_char_buf + image_size); + } + + amd_comgr_data_kind_t data_kind; + if((data_kind = GetCOMGRDataKind(input_type)) == AMD_COMGR_DATA_KIND_UNDEF) { + LogError("Cannot find the correct COMGR data kind"); + return false; + } + + if(!addCodeObjData(link_input_, llvm_bitcode , link_file_name, data_kind)) { + LogError("Error in hiprtc: unable to add linked code object"); + return false; + } + return true; +} + +bool RTCLinkProgram::LinkComplete(void** bin_out, size_t* size_out) { + + if (!findIsa()) { + return false; + } + + std::vector linked_llvm_bitcode; + if (!linkLLVMBitcode(link_input_, isa_, link_options_, build_log_, linked_llvm_bitcode)) { + LogError("Error in hiprtc: unable to add device libs to linked bitcode"); + return false; + } + + std::string linkedFileName = "LLVMBitcode.bc"; + if (!addCodeObjData(exec_input_, linked_llvm_bitcode, linkedFileName, AMD_COMGR_DATA_KIND_BC)) { + LogError("Error in hiprtc: unable to add linked bitcode"); + return false; + } + + std::vector exe_options; + exe_options.push_back("-O3"); + if (!createExecutable(exec_input_, isa_, exe_options, build_log_, executable_)) { + LogError("Error in hiprtc: unable to create exectuable"); + return false; + } + + *size_out = executable_.size(); + char* bin_out_c = new char[*size_out]; + std::copy(executable_.begin(), executable_.end(), bin_out_c); + *bin_out = reinterpret_cast(bin_out_c); + + return true; +} + +} // namespace hiprtc diff --git a/projects/clr/hipamd/src/hiprtc/hiprtcInternal.hpp b/projects/clr/hipamd/src/hiprtc/hiprtcInternal.hpp new file mode 100644 index 0000000000..28fcc90feb --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc/hiprtcInternal.hpp @@ -0,0 +1,258 @@ +/* +Copyright (c) 2022 - Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include + + +#ifdef HIPRTC_USE_EXCEPTIONS +#include +#endif +#include +#include +#include +#include + +#include "top.hpp" +#include "utils/debug.hpp" +#include "utils/flags.hpp" +#include "utils/macros.hpp" + +#ifdef __HIP_ENABLE_RTC +extern "C" { +extern const char __hipRTC_header[]; +extern unsigned __hipRTC_header_size; +} +#endif + +#include "hiprtcComgrHelper.hpp" + +namespace hiprtc { +namespace internal { +template inline std::string ToString(T v) { + std::ostringstream ss; + ss << v; + return ss.str(); +} + +inline std::string ToString() { return (""); } + +template inline std::string ToString(T first, Args... args) { + return ToString(first) + ", " + ToString(args...); +} +} // namespace internal +} // namespace hiprtc + +static amd::Monitor g_hiprtcInitlock {"hiprtcInit lock"}; +#define HIPRTC_INIT_API_INTERNAL(...) \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "Failed to create thread"); \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + } \ + amd::ScopedLock lock(g_hiprtcInitlock); \ + if (!amd::Flag::init()) { \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + } \ + +#define HIPRTC_INIT_API(...) \ + HIPRTC_INIT_API_INTERNAL(0, __VA_ARGS__) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%s ( %s )", __func__, \ + hiprtc::internal::ToString(__VA_ARGS__).c_str()); + +#define HIPRTC_RETURN(ret) \ + hiprtc::tls.last_rtc_error_ = (ret); \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%s: Returned %s", __func__, \ + hiprtcGetErrorString(hiprtc::tls.last_rtc_error_)); \ + return hiprtc::tls.last_rtc_error_; + +namespace hiprtc { + +static void crashWithMessage(std::string message) { +#ifdef HIPRTC_USE_EXCEPTIONS + throw std::runtime_error(message); +#else + guarantee(false, message.c_str()); +#endif +} + +struct Settings { + bool dumpISA{false}; + bool offloadArchProvided{false}; +}; + +class RTCProgram { +protected: + // Lock and control variables + static amd::Monitor lock_; + static std::once_flag initialized_; + + RTCProgram(std::string name); + ~RTCProgram() { + amd::Comgr::destroy_data_set(exec_input_); + } + + // Member Functions + bool findIsa(); + + // Data Members + std::string name_; + std::string isa_; + std::string build_log_; + std::vector executable_; + + amd_comgr_data_set_t exec_input_; + std::vector exe_options_; +}; + +class RTCCompileProgram : public RTCProgram { + + // Private Data Members + Settings settings_; + + std::string source_code_; + std::string source_name_; + std::map stripped_names_; + std::map demangled_names_; + + std::vector compile_options_; + std::vector link_options_; + + amd_comgr_data_set_t compile_input_; + amd_comgr_data_set_t link_input_; + + bool fgpu_rdc_; + std::vector LLVMBitcode_; + + // Private Member functions + bool addSource_impl(); + bool addBuiltinHeader(); + bool transformOptions(std::vector& compile_options); + bool findLLVMOptions(const std::vector& options, + std::vector& llvm_options); + + RTCCompileProgram() = delete; + RTCCompileProgram(RTCCompileProgram&) = delete; + RTCCompileProgram& operator=(RTCCompileProgram&) = delete; + + public: + RTCCompileProgram(std::string); + ~RTCCompileProgram() { + amd::Comgr::destroy_data_set(compile_input_); + amd::Comgr::destroy_data_set(link_input_); + } + + // Converters + inline static hiprtcProgram as_hiprtcProgram(RTCCompileProgram* p) { + return reinterpret_cast(p); + } + inline static RTCCompileProgram* as_RTCCompileProgram(hiprtcProgram& p) { + return reinterpret_cast(p); + } + + // Public Member Functions + bool addSource(const std::string& source, const std::string& name); + bool addHeader(const std::string& source, const std::string& name); + bool compile(const std::vector& options, bool fgpu_rdc); + bool getMangledName(const char* name_expression, const char** loweredName); + bool trackMangledName(std::string& name); + void stripNamedExpression(std::string& named_expression); + + bool GetBitcode(char* bitcode); + bool GetBitcodeSize(size_t* bitcode_size); + // Public Getter/Setters + const std::vector& getExec() const { return executable_; } + size_t getExecSize() const { return executable_.size(); } + const std::string& getLog() const { return build_log_; } + size_t getLogSize() const { return build_log_.size(); } +}; + +// Linker Arguments passed via hipLinkCreate +struct LinkArguments { + unsigned int max_registers_; + unsigned int threads_per_block_; + float wall_time_; + size_t info_log_size_; + char* info_log_; + size_t error_log_size_; + char* error_log_; + unsigned int optimization_level_; + unsigned int target_from_hip_context_; + unsigned int jit_target_; + unsigned int fallback_strategy_; + int generate_debug_info_; + long log_verbose_; + int generate_line_info_; + unsigned int cache_mode_; + bool sm3x_opt_; + bool fast_compile_; + const char** global_symbol_names_; + void** global_symbol_addresses_; + unsigned int global_symbol_count_; + int lto_; + int ftz_; + int prec_div_; + int prec_sqrt_; + int fma_; +}; + +class RTCLinkProgram : public RTCProgram { + + // Private Member Functions (forbid these function calls) + RTCLinkProgram() = delete; + RTCLinkProgram(RTCLinkProgram&) = delete; + RTCLinkProgram& operator=(RTCLinkProgram&) = delete; + + amd_comgr_data_kind_t GetCOMGRDataKind(hiprtcJITInputType input_type); + + // Linker Argumenets at hipLinkCreate + LinkArguments link_args_; + + // Private Data Members + amd_comgr_data_set_t link_input_; + std::vector link_options_; +public: + RTCLinkProgram(std::string name); + ~RTCLinkProgram() { + amd::Comgr::destroy_data_set(link_input_); + } + // Public Member Functions + bool AddLinkerOptions(unsigned int num_options, hiprtcJIT_option* options_ptr, + void** options_vals_ptr); + bool AddLinkerFile(std::string file_path, hiprtcJITInputType input_type); + bool AddLinkerData(void* image_ptr, size_t image_size, std::string link_file_name, + hiprtcJITInputType input_type); + bool LinkComplete(void** bin_out, size_t* size_out); +}; + +// Thread Local Storage Variables Aggregator Class +class TlsAggregator { +public: + hiprtcResult last_rtc_error_; + + TlsAggregator(): last_rtc_error_(HIPRTC_SUCCESS) { + } + ~TlsAggregator() { + } +}; +extern thread_local TlsAggregator tls; +} // namespace hiprtc diff --git a/projects/clr/hipamd/src/hiprtc_internal.hpp b/projects/clr/hipamd/src/hiprtc_internal.hpp new file mode 100644 index 0000000000..0d50300459 --- /dev/null +++ b/projects/clr/hipamd/src/hiprtc_internal.hpp @@ -0,0 +1,65 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef HIPRTC_SRC_HIP_INTERNAL_H +#define HIPRTC_SRC_HIP_INTERNAL_H + +#include "hip_internal.hpp" + +#if __linux__ +#include + +#if HIPRTC_USE_CXXABI +#include + +#define DEMANGLE abi::__cxa_demangle + +#else +extern "C" char * __cxa_demangle(const char *mangled_name, char *output_buffer, + size_t *length, int *status); + +#define DEMANGLE __cxa_demangle +#endif //HIPRTC_USE_CXXABI + +#elif defined(_WIN32) +#include +#include + +#define UNDECORATED_SIZE 4096 + +#endif // __linux__ + +// This macro should be called at the beginning of every HIP RTC API. +#define HIPRTC_INIT_API(...) \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%s ( %s )", __func__, ToString( __VA_ARGS__ ).c_str()); \ + amd::Thread* thread = amd::Thread::current(); \ + if (!VDI_CHECK_THREAD(thread)) { \ + HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ + } \ + HIP_INIT_VOID(); + +#define HIPRTC_RETURN(ret) \ + hiprtc::tls.last_rtc_error_ = ret; \ + ClPrint(amd::LOG_INFO, amd::LOG_API, "%s: Returned %s", __func__, \ + hiprtcGetErrorString(hiprtc::tls.last_rtc_error_)); \ + return hiprtc::tls.last_rtc_error_; + + +#endif // HIPRTC_SRC_HIP_INTERNAL_H diff --git a/projects/clr/hipamd/src/trace_helper.h b/projects/clr/hipamd/src/trace_helper.h new file mode 100644 index 0000000000..aed87dd214 --- /dev/null +++ b/projects/clr/hipamd/src/trace_helper.h @@ -0,0 +1,246 @@ +/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once + +#include +#include +#include +#include +//--- +// Helper functions to convert HIP function arguments into strings. +// Handles POD data types as well as enumerations (ie hipMemcpyKind). +// The implementation uses C++11 variadic templates and template specialization. +// The hipMemcpyKind example below is a good example that shows how to implement conversion for a +// new HSA type. + + +// Handy macro to convert an enumeration to a stringified version of same: +#define CASE_STR(x) \ + case x: \ + return #x; + +inline const char* ihipErrorString(hipError_t hip_error) { + switch (hip_error) { + CASE_STR(hipSuccess); + CASE_STR(hipErrorOutOfMemory); + CASE_STR(hipErrorNotInitialized); + CASE_STR(hipErrorDeinitialized); + CASE_STR(hipErrorProfilerDisabled); + CASE_STR(hipErrorProfilerNotInitialized); + CASE_STR(hipErrorProfilerAlreadyStarted); + CASE_STR(hipErrorProfilerAlreadyStopped); + CASE_STR(hipErrorInvalidImage); + CASE_STR(hipErrorInvalidContext); + CASE_STR(hipErrorContextAlreadyCurrent); + CASE_STR(hipErrorMapFailed); + CASE_STR(hipErrorUnmapFailed); + CASE_STR(hipErrorArrayIsMapped); + CASE_STR(hipErrorAlreadyMapped); + CASE_STR(hipErrorNoBinaryForGpu); + CASE_STR(hipErrorAlreadyAcquired); + CASE_STR(hipErrorNotMapped); + CASE_STR(hipErrorNotMappedAsArray); + CASE_STR(hipErrorNotMappedAsPointer); + CASE_STR(hipErrorECCNotCorrectable); + CASE_STR(hipErrorUnsupportedLimit); + CASE_STR(hipErrorContextAlreadyInUse); + CASE_STR(hipErrorPeerAccessUnsupported); + CASE_STR(hipErrorInvalidKernelFile); + CASE_STR(hipErrorInvalidGraphicsContext); + CASE_STR(hipErrorInvalidSource); + CASE_STR(hipErrorFileNotFound); + CASE_STR(hipErrorSharedObjectSymbolNotFound); + CASE_STR(hipErrorSharedObjectInitFailed); + CASE_STR(hipErrorOperatingSystem); + CASE_STR(hipErrorSetOnActiveProcess); + CASE_STR(hipErrorInvalidHandle); + CASE_STR(hipErrorNotFound); + CASE_STR(hipErrorIllegalAddress); + CASE_STR(hipErrorMissingConfiguration); + CASE_STR(hipErrorLaunchFailure); + CASE_STR(hipErrorPriorLaunchFailure); + CASE_STR(hipErrorLaunchTimeOut); + CASE_STR(hipErrorLaunchOutOfResources); + CASE_STR(hipErrorInvalidDeviceFunction); + CASE_STR(hipErrorInvalidConfiguration); + CASE_STR(hipErrorInvalidDevice); + CASE_STR(hipErrorInvalidValue); + CASE_STR(hipErrorInvalidPitchValue); + CASE_STR(hipErrorInvalidDevicePointer); + CASE_STR(hipErrorInvalidMemcpyDirection); + CASE_STR(hipErrorUnknown); + CASE_STR(hipErrorNotReady); + CASE_STR(hipErrorNoDevice); + CASE_STR(hipErrorPeerAccessAlreadyEnabled); + CASE_STR(hipErrorPeerAccessNotEnabled); + CASE_STR(hipErrorRuntimeMemory); + CASE_STR(hipErrorRuntimeOther); + CASE_STR(hipErrorHostMemoryAlreadyRegistered); + CASE_STR(hipErrorHostMemoryNotRegistered); + CASE_STR(hipErrorTbd); + default: + return "hipErrorUnknown"; + }; +}; + +// Building block functions: +template +inline std::string ToHexString(T v) { + std::ostringstream ss; + ss << "0x" << std::hex << v; + return ss.str(); +}; + +template +inline std::string ToString(T* v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +template +inline std::string ToString(T** v) { + std::ostringstream ss; + if (v == NULL) { + ss << "char array:"; + } else { + ss << v; + } + return ss.str(); +}; + +//--- +// Template overloads for ToString to handle specific types + +// This is the default which works for most types: +template +inline std::string ToString(T v) { + std::ostringstream ss; + ss << v; + return ss.str(); +}; + +template <> +inline std::string ToString(hipFunction_t v) { + std::ostringstream ss; + ss << "0x" << std::hex << static_cast(v); + return ss.str(); +}; + +// hipEvent_t specialization. TODO - maybe add an event ID for debug? +template <> +inline std::string ToString(hipEvent_t v) { + std::ostringstream ss; + ss << "event:" << std::hex << static_cast(v); + return ss.str(); +}; +// hipStream_t +template <> +inline std::string ToString(hipStream_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "stream:"; + } else { + ss << "stream:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipCtx_t +template <> +inline std::string ToString(hipCtx_t v) { + std::ostringstream ss; + if (v == NULL) { + ss << "context:"; + } else { + ss << "context:" << std::hex << static_cast(v); + } + + return ss.str(); +}; + +// hipPitchedPtr +template <> +inline std::string ToString(hipPitchedPtr v) { + std::ostringstream ss; + ss << "pitchPtr:" << std::hex << static_cast(v.ptr); + return ss.str(); +}; + +// hipMemcpyKind specialization +template <> +inline std::string ToString(hipMemcpyKind v) { + switch (v) { + CASE_STR(hipMemcpyHostToHost); + CASE_STR(hipMemcpyHostToDevice); + CASE_STR(hipMemcpyDeviceToHost); + CASE_STR(hipMemcpyDeviceToDevice); + CASE_STR(hipMemcpyDefault); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipFuncCache_t v) { + switch (v) { + CASE_STR(hipFuncCachePreferNone); + CASE_STR(hipFuncCachePreferShared); + CASE_STR(hipFuncCachePreferL1); + CASE_STR(hipFuncCachePreferEqual); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipSharedMemConfig v) { + switch (v) { + CASE_STR(hipSharedMemBankSizeDefault); + CASE_STR(hipSharedMemBankSizeFourByte); + CASE_STR(hipSharedMemBankSizeEightByte); + default: + return ToHexString(v); + }; +}; + +template <> +inline std::string ToString(hipError_t v) { + return ihipErrorString(v); +}; + +// Catch empty arguments case +inline std::string ToString() { return (""); } + + +//--- +// C++11 variadic template - peels off first argument, converts to string, and calls itself again to +// peel the next arg. Strings are automatically separated by comma+space. +template +inline std::string ToString(T first, Args... args) { + return ToString(first) + ", " + ToString(args...); +} +