From 4cd9afebe90a43c9319b7ca980751bcaadab549a Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Fri, 28 May 2021 23:15:18 +0000 Subject: [PATCH] SWDEV-288707 - Merge HIP refactored code to staging These changes move- - rocclr based implementation inside src/hipamd/src - platform specific (both nvidia and amd) headers inside src/hipamd/include/hip Change-Id: Ia29791a727244952591fe1d813dcef0303b73a9e [ROCm/hip commit: ce76ec8b885e0925d221d3d6789d757b9a3f7c00] --- projects/hip/CMakeLists.txt | 4 +- projects/hip/bin/hip_embed_pch.sh | 19 +- .../hip/include/hip/amd_detail/cuda/cuda.h | 1 - .../hip/amd_detail/cuda/math_functions.h | 1 - projects/hip/include/hip/channel_descriptor.h | 4 +- projects/hip/include/hip/device_functions.h | 2 +- projects/hip/include/hip/hip_complex.h | 4 +- .../hip/include/hip/hip_cooperative_groups.h | 4 +- projects/hip/include/hip/hip_fp16.h | 2 +- projects/hip/include/hip/hip_runtime.h | 4 +- projects/hip/include/hip/hip_runtime_api.h | 3848 ++++++++++++++++- projects/hip/include/hip/hip_texture_types.h | 2 +- projects/hip/include/hip/hip_vector_types.h | 2 +- projects/hip/include/hip/hiprtc.h | 2 +- projects/hip/include/hip/library_types.h | 2 +- projects/hip/include/hip/math_functions.h | 2 +- projects/hip/include/hip/texture_types.h | 2 +- projects/hip/packaging/hip-rocclr.txt | 2 +- .../hip/{rocclr => src/hipamd}/CMakeLists.txt | 74 +- .../hipamd}/cmake/FindROCclr.cmake | 0 .../hip/amd_detail/amd_channel_descriptor.h} | 4 +- .../hip/amd_detail/amd_device_functions.h} | 0 .../hip/amd_detail/amd_driver_types.h} | 0 .../include/hip/amd_detail/amd_hip_atomic.h} | 2 +- .../include/hip/amd_detail/amd_hip_common.h} | 0 .../include/hip/amd_detail/amd_hip_complex.h} | 2 +- .../amd_detail/amd_hip_cooperative_groups.h} | 0 .../include/hip/amd_detail/amd_hip_fp16.h} | 5 +- .../include/hip/amd_detail/amd_hip_runtime.h} | 10 +- .../hip/amd_detail/amd_hip_surface_types.h} | 2 +- .../hip/amd_detail/amd_hip_texture_types.h} | 4 +- .../hip/amd_detail/amd_hip_vector_types.h} | 0 .../include/hip/amd_detail/amd_hiprtc.h} | 0 .../hip/amd_detail/amd_library_types.h} | 0 .../hip/amd_detail/amd_math_functions.h} | 4 +- .../hip/amd_detail/amd_surface_functions.h} | 2 +- .../hip/amd_detail/amd_texture_types.h} | 2 +- .../include/hip/amd_detail/concepts.hpp | 0 .../hip/amd_detail/device_library_decls.h | 0 .../hip/amd_detail/functional_grid_launch.hpp | 0 .../include/hip/amd_detail/grid_launch.h | 0 .../include/hip/amd_detail/grid_launch.hpp | 0 .../hip/amd_detail/grid_launch_GGL.hpp | 0 .../include/hip/amd_detail/helpers.hpp | 0 .../hip_cooperative_groups_helper.h | 3 +- .../include/hip/amd_detail/hip_fp16_gcc.h | 0 .../hip/amd_detail/hip_fp16_math_fwd.h | 0 .../hipamd}/include/hip/amd_detail/hip_ldg.h | 2 +- .../include/hip/amd_detail/hip_memory.h | 0 .../include/hip/amd_detail/hip_runtime_prof.h | 0 .../include/hip/amd_detail/host_defines.h | 0 .../include/hip/amd_detail/hsa_helpers.hpp | 0 .../include/hip/amd_detail/llvm_intrinsics.h | 0 .../amd_detail/macro_based_grid_launch.hpp | 0 .../hipamd}/include/hip/amd_detail/math_fwd.h | 0 .../include/hip/amd_detail/ockl_image.h | 0 .../include/hip/amd_detail/program_state.hpp | 0 .../hip/amd_detail/texture_fetch_functions.h | 0 .../hip/amd_detail/texture_functions.h | 4 +- .../amd_detail/texture_indirect_functions.h | 0 .../{ => src/hipamd}/include/hip/hcc_detail | 0 .../hip/src/hipamd/include/hip/nvcc_detail | 1 + .../nvidia_channel_descriptor.h} | 0 .../hip/nvidia_detail/nvidia_hip_complex.h} | 0 .../nvidia_hip_cooperative_groups.h} | 0 .../hip/nvidia_detail/nvidia_hip_runtime.h} | 0 .../nvidia_detail/nvidia_hip_runtime_api.h} | 0 .../nvidia_detail/nvidia_hip_texture_types.h} | 0 .../hip/nvidia_detail/nvidia_hiprtc.h} | 0 .../hipamd/src}/amd_hsa_elf.hpp | 0 .../{rocclr => src/hipamd/src}/cl_d3d10.cpp | 0 .../{rocclr => src/hipamd/src}/cl_d3d11.cpp | 0 .../{rocclr => src/hipamd/src}/cl_d3d9.cpp | 0 .../hip/{rocclr => src/hipamd/src}/cl_gl.cpp | 0 .../{rocclr => src/hipamd/src}/cl_gl_amd.hpp | 0 .../hipamd/src}/cl_lqdflash_amd.cpp | 0 .../hipamd/src}/cl_lqdflash_amd.h | 0 .../hip/{rocclr => src/hipamd/src}/fixme.cpp | 0 .../hipamd/src}/hip_activity.cpp | 0 .../hipamd/src}/hip_code_object.cpp | 2 +- .../hipamd/src}/hip_code_object.hpp | 0 .../hipamd/src}/hip_context.cpp | 0 .../hipamd/src}/hip_conversions.hpp | 4 +- .../{rocclr => src/hipamd/src}/hip_device.cpp | 0 .../hipamd/src}/hip_device_runtime.cpp | 0 .../{rocclr => src/hipamd/src}/hip_error.cpp | 0 .../{rocclr => src/hipamd/src}/hip_event.cpp | 0 .../{rocclr => src/hipamd/src}/hip_event.hpp | 0 .../{rocclr => src/hipamd/src}/hip_fatbin.cpp | 0 .../{rocclr => src/hipamd/src}/hip_fatbin.hpp | 0 .../hipamd/src}/hip_formatting.hpp | 0 .../{rocclr => src/hipamd/src}/hip_global.cpp | 0 .../{rocclr => src/hipamd/src}/hip_global.hpp | 0 .../{rocclr => src/hipamd/src}/hip_graph.cpp | 0 .../hipamd/src}/hip_graph_capture.hpp | 0 .../hipamd/src}/hip_graph_helper.hpp | 0 .../hipamd/src}/hip_graph_internal.cpp | 0 .../hipamd/src}/hip_graph_internal.hpp | 0 .../{rocclr => src/hipamd/src}/hip_hcc.def.in | 0 .../{rocclr => src/hipamd/src}/hip_hcc.map.in | 0 .../hip/{rocclr => src/hipamd/src}/hip_hcc.rc | 0 .../{rocclr => src/hipamd/src}/hip_hmm.cpp | 0 .../hipamd/src}/hip_intercept.cpp | 0 .../hipamd/src}/hip_internal.hpp | 0 .../{rocclr => src/hipamd/src}/hip_memory.cpp | 0 .../{rocclr => src/hipamd/src}/hip_module.cpp | 0 .../{rocclr => src/hipamd/src}/hip_peer.cpp | 0 .../hipamd/src}/hip_platform.cpp | 2 +- .../hipamd/src}/hip_platform.hpp | 0 .../{rocclr => src/hipamd/src}/hip_prof_api.h | 0 .../hipamd/src}/hip_prof_gen.py | 0 .../hipamd/src}/hip_profile.cpp | 0 .../{rocclr => src/hipamd/src}/hip_rtc.cpp | 0 .../{rocclr => src/hipamd/src}/hip_stream.cpp | 0 .../hipamd/src}/hip_stream_ops.cpp | 0 .../hipamd/src}/hip_surface.cpp | 2 +- .../hipamd/src}/hip_texture.cpp | 2 +- .../hipamd/src}/hiprtc_internal.hpp | 0 .../{rocclr => src/hipamd/src}/trace_helper.h | 0 119 files changed, 3943 insertions(+), 97 deletions(-) delete mode 100644 projects/hip/include/hip/amd_detail/cuda/cuda.h delete mode 100644 projects/hip/include/hip/amd_detail/cuda/math_functions.h rename projects/hip/{rocclr => src/hipamd}/CMakeLists.txt (85%) rename projects/hip/{rocclr => src/hipamd}/cmake/FindROCclr.cmake (100%) rename projects/hip/{include/hip/amd_detail/channel_descriptor.h => src/hipamd/include/hip/amd_detail/amd_channel_descriptor.h} (99%) rename projects/hip/{include/hip/amd_detail/device_functions.h => src/hipamd/include/hip/amd_detail/amd_device_functions.h} (100%) rename projects/hip/{include/hip/amd_detail/driver_types.h => src/hipamd/include/hip/amd_detail/amd_driver_types.h} (100%) rename projects/hip/{include/hip/amd_detail/hip_atomic.h => src/hipamd/include/hip/amd_detail/amd_hip_atomic.h} (99%) rename projects/hip/{include/hip/amd_detail/hip_common.h => src/hipamd/include/hip/amd_detail/amd_hip_common.h} (100%) rename projects/hip/{include/hip/amd_detail/hip_complex.h => src/hipamd/include/hip/amd_detail/amd_hip_complex.h} (99%) rename projects/hip/{include/hip/amd_detail/hip_cooperative_groups.h => src/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h} (100%) rename projects/hip/{include/hip/amd_detail/hip_fp16.h => src/hipamd/include/hip/amd_detail/amd_hip_fp16.h} (99%) rename projects/hip/{include/hip/amd_detail/hip_runtime.h => src/hipamd/include/hip/amd_detail/amd_hip_runtime.h} (98%) rename projects/hip/{include/hip/amd_detail/hip_surface_types.h => src/hipamd/include/hip/amd_detail/amd_hip_surface_types.h} (97%) rename projects/hip/{include/hip/amd_detail/hip_texture_types.h => src/hipamd/include/hip/amd_detail/amd_hip_texture_types.h} (97%) rename projects/hip/{include/hip/amd_detail/hip_vector_types.h => src/hipamd/include/hip/amd_detail/amd_hip_vector_types.h} (100%) rename projects/hip/{include/hip/amd_detail/hiprtc.h => src/hipamd/include/hip/amd_detail/amd_hiprtc.h} (100%) rename projects/hip/{include/hip/amd_detail/library_types.h => src/hipamd/include/hip/amd_detail/amd_library_types.h} (100%) rename projects/hip/{include/hip/amd_detail/math_functions.h => src/hipamd/include/hip/amd_detail/amd_math_functions.h} (99%) rename projects/hip/{include/hip/amd_detail/surface_functions.h => src/hipamd/include/hip/amd_detail/amd_surface_functions.h} (97%) rename projects/hip/{include/hip/amd_detail/texture_types.h => src/hipamd/include/hip/amd_detail/amd_texture_types.h} (98%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/concepts.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/device_library_decls.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/functional_grid_launch.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/grid_launch.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/grid_launch.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/grid_launch_GGL.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/helpers.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/hip_cooperative_groups_helper.h (98%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/hip_fp16_gcc.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/hip_fp16_math_fwd.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/hip_ldg.h (99%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/hip_memory.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/hip_runtime_prof.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/host_defines.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/hsa_helpers.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/llvm_intrinsics.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/macro_based_grid_launch.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/math_fwd.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/ockl_image.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/program_state.hpp (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/texture_fetch_functions.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/texture_functions.h (99%) rename projects/hip/{ => src/hipamd}/include/hip/amd_detail/texture_indirect_functions.h (100%) rename projects/hip/{ => src/hipamd}/include/hip/hcc_detail (100%) create mode 120000 projects/hip/src/hipamd/include/hip/nvcc_detail rename projects/hip/{include/hip/nvidia_detail/channel_descriptor.h => src/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h} (100%) rename projects/hip/{include/hip/nvidia_detail/hip_complex.h => src/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h} (100%) rename projects/hip/{include/hip/nvidia_detail/hip_cooperative_groups.h => src/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h} (100%) rename projects/hip/{include/hip/nvidia_detail/hip_runtime.h => src/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h} (100%) rename projects/hip/{include/hip/nvidia_detail/hip_runtime_api.h => src/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h} (100%) rename projects/hip/{include/hip/nvidia_detail/hip_texture_types.h => src/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h} (100%) rename projects/hip/{include/hip/nvidia_detail/hiprtc.h => src/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h} (100%) rename projects/hip/{rocclr => src/hipamd/src}/amd_hsa_elf.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/cl_d3d10.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/cl_d3d11.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/cl_d3d9.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/cl_gl.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/cl_gl_amd.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/cl_lqdflash_amd.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/cl_lqdflash_amd.h (100%) rename projects/hip/{rocclr => src/hipamd/src}/fixme.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_activity.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_code_object.cpp (99%) rename projects/hip/{rocclr => src/hipamd/src}/hip_code_object.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_context.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_conversions.hpp (99%) rename projects/hip/{rocclr => src/hipamd/src}/hip_device.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_device_runtime.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_error.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_event.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_event.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_fatbin.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_fatbin.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_formatting.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_global.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_global.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_graph.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_graph_capture.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_graph_helper.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_graph_internal.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_graph_internal.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_hcc.def.in (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_hcc.map.in (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_hcc.rc (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_hmm.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_intercept.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_internal.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_memory.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_module.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_peer.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_platform.cpp (99%) rename projects/hip/{rocclr => src/hipamd/src}/hip_platform.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_prof_api.h (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_prof_gen.py (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_profile.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_rtc.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_stream.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_stream_ops.cpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/hip_surface.cpp (96%) rename projects/hip/{rocclr => src/hipamd/src}/hip_texture.cpp (99%) rename projects/hip/{rocclr => src/hipamd/src}/hiprtc_internal.hpp (100%) rename projects/hip/{rocclr => src/hipamd/src}/trace_helper.h (100%) diff --git a/projects/hip/CMakeLists.txt b/projects/hip/CMakeLists.txt index 1021b16873..9153f08d93 100755 --- a/projects/hip/CMakeLists.txt +++ b/projects/hip/CMakeLists.txt @@ -285,7 +285,7 @@ set(_versionInfoHeader file(WRITE "${PROJECT_BINARY_DIR}/include/hip/hip_version.h" ${_versionInfoHeader}) if(HIP_RUNTIME STREQUAL "rocclr") - add_subdirectory(rocclr) + add_subdirectory(src/hipamd) endif() # Generate .hipInfo @@ -322,6 +322,7 @@ if(NOT ${INSTALL_SOURCE} EQUAL 0) install(CODE "file(REMOVE_RECURSE ${CMAKE_INSTALL_PREFIX}/include)") install(DIRECTORY include DESTINATION .) + install(DIRECTORY src/hipamd/include/hip/ DESTINATION include/hip/) install(DIRECTORY cmake DESTINATION .) endif() @@ -478,6 +479,7 @@ set(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}) if(HIP_PLATFORM STREQUAL "nvidia") execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/include" "${HIP_ROOT_DIR}/include" RESULT_VARIABLE RUN_HIT ERROR_QUIET) endif() +execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/src/hipamd/include/hip/" "${HIP_ROOT_DIR}/include/hip/" RESULT_VARIABLE RUN_HIT ERROR_QUIET) execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/cmake" "${HIP_ROOT_DIR}/cmake" RESULT_VARIABLE RUN_HIT ERROR_QUIET) if(${RUN_HIT} EQUAL 0) execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/bin" "${HIP_ROOT_DIR}/bin" RESULT_VARIABLE RUN_HIT ERROR_QUIET) diff --git a/projects/hip/bin/hip_embed_pch.sh b/projects/hip/bin/hip_embed_pch.sh index 0ea905a268..b8ececf36c 100755 --- a/projects/hip/bin/hip_embed_pch.sh +++ b/projects/hip/bin/hip_embed_pch.sh @@ -21,7 +21,7 @@ printUsage() { echo - echo "Usage: $(basename "$0") HIP_BUILD_INC_DIR HIP_INC_DIR LLVM_DIR [option] [RTC_LIB_OUTPUT]" + echo "Usage: $(basename "$0") HIP_BUILD_INC_DIR HIP_INC_DIR HIP_AMD_INC_DIR LLVM_DIR [option] [RTC_LIB_OUTPUT]" echo echo "Options:" echo " -p, --generate_pch Generate pre-compiled header (default)" @@ -39,13 +39,14 @@ fi HIP_BUILD_INC_DIR="$1" HIP_INC_DIR="$2" -LLVM_DIR="$3" +HIP_AMD_INC_DIR="$3" +LLVM_DIR="$4" # By default, generate pch TARGET="generatepch" -while [ "$4" != "" ]; +while [ "$5" != "" ]; do - case "$4" in + case "$5" in -h | --help ) printUsage ; exit 0 ;; -p | --generate_pch ) @@ -58,9 +59,9 @@ do shift 1 done -# Allow hiprtc lib name to be set by argument 6 -if [[ "$5" != "" ]]; then - rtc_shared_lib_out="$5" +# Allow hiprtc lib name to be set by argument 7 +if [[ "$6" != "" ]]; then + rtc_shared_lib_out="$6" else if [[ "$OSTYPE" == cygwin ]]; then rtc_shared_lib_out=hiprtc-builtins64.dll @@ -124,7 +125,7 @@ EOF set -x - $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++17 -nogpulib -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR --cuda-device-only -x hip $tmp/hip_pch.h -E >$tmp/pch.cui && + $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++17 -nogpulib -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -x hip $tmp/hip_pch.h -E >$tmp/pch.cui && cat $tmp/hip_macros.h >> $tmp/pch.cui && @@ -174,7 +175,7 @@ __hipRTC_header_size: EOF set -x - $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++14 -nogpulib --hip-version=4.4 -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR --cuda-device-only -D__HIPCC_RTC__ -x hip $tmp/hipRTC_header.h -E -o $tmp/hiprtc && + $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++14 -nogpulib --hip-version=4.4 -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -D__HIPCC_RTC__ -x hip $tmp/hipRTC_header.h -E -o $tmp/hiprtc && cat $macroFile >> $tmp/hiprtc && $LLVM_DIR/bin/llvm-mc -o $tmp/hiprtc_header.o $tmp/hipRTC_header.mcin --filetype=obj && $LLVM_DIR/bin/clang $tmp/hiprtc_header.o -o $rtc_shared_lib_out -shared && diff --git a/projects/hip/include/hip/amd_detail/cuda/cuda.h b/projects/hip/include/hip/amd_detail/cuda/cuda.h deleted file mode 100644 index 8b13789179..0000000000 --- a/projects/hip/include/hip/amd_detail/cuda/cuda.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/projects/hip/include/hip/amd_detail/cuda/math_functions.h b/projects/hip/include/hip/amd_detail/cuda/math_functions.h deleted file mode 100644 index 8b13789179..0000000000 --- a/projects/hip/include/hip/amd_detail/cuda/math_functions.h +++ /dev/null @@ -1 +0,0 @@ - diff --git a/projects/hip/include/hip/channel_descriptor.h b/projects/hip/include/hip/channel_descriptor.h index 47d842fd3e..e012bce469 100644 --- a/projects/hip/include/hip/channel_descriptor.h +++ b/projects/hip/include/hip/channel_descriptor.h @@ -29,9 +29,9 @@ THE SOFTWARE. #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #else #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif diff --git a/projects/hip/include/hip/device_functions.h b/projects/hip/include/hip/device_functions.h index 585d986c7d..96bbb05036 100644 --- a/projects/hip/include/hip/device_functions.h +++ b/projects/hip/include/hip/device_functions.h @@ -26,7 +26,7 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #include #else diff --git a/projects/hip/include/hip/hip_complex.h b/projects/hip/include/hip/hip_complex.h index 89943a6cc0..c58e8c77c8 100644 --- a/projects/hip/include/hip/hip_complex.h +++ b/projects/hip/include/hip/hip_complex.h @@ -26,9 +26,9 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #else #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif diff --git a/projects/hip/include/hip/hip_cooperative_groups.h b/projects/hip/include/hip/hip_cooperative_groups.h index cff88d4217..afc1fe799d 100644 --- a/projects/hip/include/hip/hip_cooperative_groups.h +++ b/projects/hip/include/hip/hip_cooperative_groups.h @@ -35,10 +35,10 @@ THE SOFTWARE. #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #if __cplusplus && defined(__clang__) && defined(__HIP__) -#include +#include #endif #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #else #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif diff --git a/projects/hip/include/hip/hip_fp16.h b/projects/hip/include/hip/hip_fp16.h index 626ce64c7d..332be4d263 100644 --- a/projects/hip/include/hip/hip_fp16.h +++ b/projects/hip/include/hip/hip_fp16.h @@ -26,7 +26,7 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #include "cuda_fp16.h" #else diff --git a/projects/hip/include/hip/hip_runtime.h b/projects/hip/include/hip/hip_runtime.h index 73dd87226f..00fe21daf7 100644 --- a/projects/hip/include/hip/hip_runtime.h +++ b/projects/hip/include/hip/hip_runtime.h @@ -59,9 +59,9 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #else #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif diff --git a/projects/hip/include/hip/hip_runtime_api.h b/projects/hip/include/hip/hip_runtime_api.h index 7cd26bc1e4..6657c265a1 100644 --- a/projects/hip/include/hip/hip_runtime_api.h +++ b/projects/hip/include/hip/hip_runtime_api.h @@ -456,9 +456,3852 @@ enum hipComputeMode { */ #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include "hip/amd_detail/hip_runtime_api.h" + +#include +#include +#ifndef GENERIC_GRID_LAUNCH +#define GENERIC_GRID_LAUNCH 1 +#endif +#include +#include +#include +#include +#if defined(_MSC_VER) +#define DEPRECATED(msg) __declspec(deprecated(msg)) +#else // !defined(_MSC_VER) +#define DEPRECATED(msg) __attribute__ ((deprecated(msg))) +#endif // !defined(_MSC_VER) +#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases. For more details please refer https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_deprecated_api_list.md" +#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) +#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) +#define HIP_LAUNCH_PARAM_END ((void*)0x03) +#ifdef __cplusplus + #define __dparm(x) \ + = x +#else + #define __dparm(x) +#endif +#ifdef __GNUC__ +#pragma GCC visibility push (default) +#endif +#ifdef __cplusplus +namespace hip_impl { +hipError_t hip_init(); +} // namespace hip_impl +#endif +// Structure definitions: +#ifdef __cplusplus +extern "C" { +#endif +//--- +// API-visible structures +typedef struct ihipCtx_t* hipCtx_t; +// Note many APIs also use integer deviceIds as an alternative to the device pointer: +typedef int hipDevice_t; +typedef enum hipDeviceP2PAttr { + hipDevP2PAttrPerformanceRank = 0, + hipDevP2PAttrAccessSupported, + hipDevP2PAttrNativeAtomicSupported, + hipDevP2PAttrHipArrayAccessSupported +} hipDeviceP2PAttr; +typedef struct ihipStream_t* hipStream_t; +#define hipIpcMemLazyEnablePeerAccess 0 +#define HIP_IPC_HANDLE_SIZE 64 +typedef struct hipIpcMemHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcMemHandle_t; +typedef struct hipIpcEventHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcEventHandle_t; +typedef struct ihipModule_t* hipModule_t; +typedef struct ihipModuleSymbol_t* hipFunction_t; +typedef struct hipFuncAttributes { + int binaryVersion; + int cacheModeCA; + size_t constSizeBytes; + size_t localSizeBytes; + int maxDynamicSharedSizeBytes; + int maxThreadsPerBlock; + int numRegs; + int preferredShmemCarveout; + int ptxVersion; + size_t sharedSizeBytes; +} hipFuncAttributes; +typedef struct ihipEvent_t* hipEvent_t; +enum hipLimit_t { + hipLimitMallocHeapSize = 0x02, +}; +/** + * @addtogroup GlobalDefs More + * @{ + */ +//! Flags that can be used with hipStreamCreateWithFlags +#define hipStreamDefault \ + 0x00 ///< Default stream creation flags. These are used with hipStreamCreate(). +#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream +//! Flags that can be used with hipEventCreateWithFlags: +#define hipEventDefault 0x0 ///< Default flags +#define hipEventBlockingSync \ + 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency. +#define hipEventDisableTiming \ + 0x2 ///< Disable event's capability to record timing information. May improve performance. +#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP. +#define hipEventReleaseToDevice \ + 0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to + /// obtain more precise timings of commands between events. The flag is a no-op on + /// CUDA platforms. +#define hipEventReleaseToSystem \ + 0x80000000 /// < Use a system-scope release when recording this event. This flag is + /// useful to make non-coherent host memory visible to the host. The flag is a + /// no-op on CUDA platforms. +//! Flags that can be used with hipHostMalloc +#define hipHostMallocDefault 0x0 +#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts. +#define hipHostMallocMapped \ + 0x2 ///< Map the allocation into the address space for the current device. The device pointer + ///< can be obtained with #hipHostGetDevicePointer. +#define hipHostMallocWriteCombined 0x4 +#define hipHostMallocNumaUser \ + 0x20000000 ///< Host memory allocation will follow numa policy set by user +#define hipHostMallocCoherent \ + 0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific + ///< allocation. +#define hipHostMallocNonCoherent \ + 0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific + ///< allocation. +#define hipMemAttachGlobal 0x01 ///< Memory can be accessed by any stream on any device +#define hipMemAttachHost 0x02 ///< Memory cannot be accessed by any stream on any device +#define hipMemAttachSingle 0x04 ///< Memory can only be accessed by a single stream on + ///< the associated device +#define hipDeviceMallocDefault 0x0 +#define hipDeviceMallocFinegrained 0x1 ///< Memory is allocated in fine grained region of device. +#define hipMallocSignalMemory 0x2 ///< Memory represents a HSA signal. +//! Flags that can be used with hipHostRegister +#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable +#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts. +#define hipHostRegisterMapped \ + 0x2 ///< Map the allocation into the address space for the current device. The device pointer + ///< can be obtained with #hipHostGetDevicePointer. +#define hipHostRegisterIoMemory 0x4 ///< Not supported. +#define hipExtHostRegisterCoarseGrained 0x8 ///< Coarse Grained host memory lock +#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield +#define hipDeviceScheduleSpin \ + 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and + ///< may consume more power. +#define hipDeviceScheduleYield \ + 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers + ///< power and is friendlier to other threads in the system. +#define hipDeviceScheduleBlockingSync 0x4 +#define hipDeviceScheduleMask 0x7 +#define hipDeviceMapHost 0x8 +#define hipDeviceLmemResizeToMax 0x16 +#define hipArrayDefault 0x00 ///< Default HIP array allocation flag +#define hipArrayLayered 0x01 +#define hipArraySurfaceLoadStore 0x02 +#define hipArrayCubemap 0x04 +#define hipArrayTextureGather 0x08 +#define hipOccupancyDefault 0x00 +#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01 +#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02 +#define hipCpuDeviceId ((int)-1) +#define hipInvalidDeviceId ((int)-2) +// Flags that can be used with hipExtLaunch Set of APIs +#define hipExtAnyOrderLaunch 0x01 ///< AnyOrderLaunch of kernels +// Flags to be used with hipStreamWaitValue32 and hipStreamWaitValue64 +#define hipStreamWaitValueGte 0x0 +#define hipStreamWaitValueEq 0x1 +#define hipStreamWaitValueAnd 0x2 +#define hipStreamWaitValueNor 0x3 +/* + * @brief HIP Memory Advise values + * @enum + * @ingroup Enumerations + */ +typedef enum hipMemoryAdvise { + hipMemAdviseSetReadMostly = 1, ///< Data will mostly be read and only occassionally + ///< be written to + hipMemAdviseUnsetReadMostly = 2, ///< Undo the effect of hipMemAdviseSetReadMostly + hipMemAdviseSetPreferredLocation = 3, ///< Set the preferred location for the data as + ///< the specified device + hipMemAdviseUnsetPreferredLocation = 4, ///< Clear the preferred location for the data + hipMemAdviseSetAccessedBy = 5, ///< Data will be accessed by the specified device, + ///< so prevent page faults as much as possible + hipMemAdviseUnsetAccessedBy = 6, ///< Let HIP to decide on the page faulting policy + ///< for the specified device + hipMemAdviseSetCoarseGrain = 100, ///< The default memory model is fine-grain. That allows + ///< coherent operations between host and device, while + ///< executing kernels. The coarse-grain can be used + ///< for data that only needs to be coherent at dispatch + ///< boundaries for better performance. + hipMemAdviseUnsetCoarseGrain = 101 ///< Restores cache coherency policy back to fine-grain +} hipMemoryAdvise; +/* + * @brief HIP range attributes + * @enum + * @ingroup Enumerations + */ +typedef enum hipMemRangeAttribute { + hipMemRangeAttributeReadMostly = 1, ///< Whether the range will mostly be read and + ///< only occassionally be written to + hipMemRangeAttributePreferredLocation = 2, ///< The preferred location of the range + hipMemRangeAttributeAccessedBy = 3, ///< Memory range has hipMemAdviseSetAccessedBy + ///< set for the specified device + hipMemRangeAttributeLastPrefetchLocation = 4,///< The last location to where the range was prefetched +} hipMemRangeAttribute; +/* + * @brief hipJitOption + * @enum + * @ingroup Enumerations + */ +typedef enum hipJitOption { + hipJitOptionMaxRegisters = 0, + hipJitOptionThreadsPerBlock, + hipJitOptionWallTime, + hipJitOptionInfoLogBuffer, + hipJitOptionInfoLogBufferSizeBytes, + hipJitOptionErrorLogBuffer, + hipJitOptionErrorLogBufferSizeBytes, + hipJitOptionOptimizationLevel, + hipJitOptionTargetFromContext, + hipJitOptionTarget, + hipJitOptionFallbackStrategy, + hipJitOptionGenerateDebugInfo, + hipJitOptionLogVerbose, + hipJitOptionGenerateLineInfo, + hipJitOptionCacheMode, + hipJitOptionSm3xOpt, + hipJitOptionFastCompile, + hipJitOptionNumOptions +} hipJitOption; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncAttribute { + hipFuncAttributeMaxDynamicSharedMemorySize = 8, + hipFuncAttributePreferredSharedMemoryCarveout = 9, + hipFuncAttributeMax +} hipFuncAttribute; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipFuncCache_t { + hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default) + hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache + hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory + hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory +} hipFuncCache_t; +/** + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. + */ +typedef enum hipSharedMemConfig { + hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking. + hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best + ///< when adjacent threads access data 4 bytes apart. + hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best + ///< when adjacent threads access data 4 bytes apart. +} hipSharedMemConfig; +/** + * Struct for data in 3D + * + */ +typedef struct dim3 { + uint32_t x; ///< x + uint32_t y; ///< y + uint32_t z; ///< z +#ifdef __cplusplus + constexpr __host__ __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){}; +#endif +} dim3; +typedef struct hipLaunchParams_t { + void* func; ///< Device function symbol + dim3 gridDim; ///< Grid dimentions + dim3 blockDim; ///< Block dimentions + void **args; ///< Arguments + size_t sharedMem; ///< Shared memory + hipStream_t stream; ///< Stream identifier +} hipLaunchParams; +typedef enum hipExternalMemoryHandleType_enum { + hipExternalMemoryHandleTypeOpaqueFd = 1, + hipExternalMemoryHandleTypeOpaqueWin32 = 2, + hipExternalMemoryHandleTypeOpaqueWin32Kmt = 3, + hipExternalMemoryHandleTypeD3D12Heap = 4, + hipExternalMemoryHandleTypeD3D12Resource = 5, + hipExternalMemoryHandleTypeD3D11Resource = 6, + hipExternalMemoryHandleTypeD3D11ResourceKmt = 7, +} hipExternalMemoryHandleType; +typedef struct hipExternalMemoryHandleDesc_st { + hipExternalMemoryHandleType type; + union { + int fd; + struct { + void *handle; + const void *name; + } win32; + } handle; + unsigned long long size; + unsigned int flags; +} hipExternalMemoryHandleDesc; +typedef struct hipExternalMemoryBufferDesc_st { + unsigned long long offset; + unsigned long long size; + unsigned int flags; +} hipExternalMemoryBufferDesc; +typedef void* hipExternalMemory_t; +typedef enum hipExternalSemaphoreHandleType_enum { + hipExternalSemaphoreHandleTypeOpaqueFd = 1, + hipExternalSemaphoreHandleTypeOpaqueWin32 = 2, + hipExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3, + hipExternalSemaphoreHandleTypeD3D12Fence = 4 +} hipExternalSemaphoreHandleType; +typedef struct hipExternalSemaphoreHandleDesc_st { + hipExternalSemaphoreHandleType type; + union { + int fd; + struct { + void* handle; + const void* name; + } win32; + } handle; + unsigned int flags; +} hipExternalSemaphoreHandleDesc; +typedef void* hipExternalSemaphore_t; +typedef struct hipExternalSemaphoreSignalParams_st { + struct { + struct { + unsigned long long value; + } fence; + struct { + unsigned long long key; + } keyedMutex; + unsigned int reserved[12]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreSignalParams; +/** + * External semaphore wait parameters, compatible with driver type + */ +typedef struct hipExternalSemaphoreWaitParams_st { + struct { + struct { + unsigned long long value; + } fence; + struct { + unsigned long long key; + unsigned int timeoutMs; + } keyedMutex; + unsigned int reserved[10]; + } params; + unsigned int flags; + unsigned int reserved[16]; +} hipExternalSemaphoreWaitParams; + +#if __HIP_HAS_GET_PCH +/** + * Internal use only. This API may change in the future + * Pre-Compiled header for online compilation + * + */ + void __hipGetPCH(const char** pch, unsigned int*size); +#endif +// Doxygen end group GlobalDefs +/** @} */ +//------------------------------------------------------------------------------------------------- +// The handle allows the async commands to use the stream even if the parent hipStream_t goes +// out-of-scope. +// typedef class ihipStream_t * hipStream_t; +/* + * Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the + * surrounding hipEvent_t goes out-of-scope. This is handy for cases where the hipEvent_t goes + * out-of-scope but the true event is being written by some async queue or device */ +// typedef struct hipEvent_t { +// struct ihipEvent_t *_handle; +//} hipEvent_t; +/** + * @defgroup API HIP API + * @{ + * + * Defines the HIP API. See the individual sections for more information. + */ +/** + * @defgroup Driver Initialization and Version + * @{ + * This section describes the initializtion and version functions of HIP runtime API. + * + */ +/** + * @brief Explicitly initializes the HIP runtime. + * + * Most HIP APIs implicitly initialize the HIP runtime. + * This API provides control over the timing of the initialization. + */ +// TODO-ctx - more description on error codes. +hipError_t hipInit(unsigned int flags); +/** + * @brief Returns the approximate HIP driver version. + * + * @param [out] driverVersion + * + * @returns #hipSuccess, #hipErrorInavlidValue + * + * @warning The HIP feature set does not correspond to an exact CUDA SDK driver revision. + * This function always set *driverVersion to 4 as an approximation though HIP supports + * some features which were introduced in later CUDA SDK revisions. + * HIP apps code should not rely on the driver revision number here and should + * use arch feature flags to test device capabilities or conditional compilation. + * + * @see hipRuntimeGetVersion + */ +hipError_t hipDriverGetVersion(int* driverVersion); +/** + * @brief Returns the approximate HIP Runtime version. + * + * @param [out] runtimeVersion + * + * @returns #hipSuccess, #hipErrorInavlidValue + * + * @warning On HIP/HCC path this function returns HIP runtime patch version however on + * HIP/NVCC path this function return CUDA runtime version. + * + * @see hipDriverGetVersion + */ +hipError_t hipRuntimeGetVersion(int* runtimeVersion); +/** + * @brief Returns a handle to a compute device + * @param [out] device + * @param [in] ordinal + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceGet(hipDevice_t* device, int ordinal); +/** + * @brief Returns the compute capability of the device + * @param [out] major + * @param [out] minor + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); +/** + * @brief Returns an identifer string for the device. + * @param [out] name + * @param [in] len + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); +/** + * @brief Returns a value for attr of link between two devices + * @param [out] value + * @param [in] attr + * @param [in] srcDevice + * @param [in] dstDevice + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, + int srcDevice, int dstDevice); +/** + * @brief Returns a PCI Bus Id string for the device, overloaded to take int device ID. + * @param [out] pciBusId + * @param [in] len + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device); +/** + * @brief Returns a handle to a compute device. + * @param [out] device handle + * @param [in] PCI Bus ID + * + * @returns #hipSuccess, #hipErrorInavlidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId); +/** + * @brief Returns the total amount of memory on the device. + * @param [out] bytes + * @param [in] device + * + * @returns #hipSuccess, #hipErrorInavlidDevice + */ +hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device); +// doxygen end initialization +/** + * @} + */ +/** + * @defgroup Device Device Management + * @{ + * This section describes the device management functions of HIP runtime API. + */ +/** + * @brief Waits on all active streams on current device + * + * When this command is invoked, the host thread gets blocked until all the commands associated + * with streams associated with the device. HIP does not support multiple blocking modes (yet!). + * + * @returns #hipSuccess + * + * @see hipSetDevice, hipDeviceReset + */ +hipError_t hipDeviceSynchronize(void); +/** + * @brief The state of current device is discarded and updated to a fresh state. + * + * Calling this function deletes all streams created, memory allocated, kernels running, events + * created. Make sure that no other thread is using the device or streams, memory, kernels, events + * associated with the current device. + * + * @returns #hipSuccess + * + * @see hipDeviceSynchronize + */ +hipError_t hipDeviceReset(void); +/** + * @brief Set default device to be used for subsequent hip API calls from this thread. + * + * @param[in] deviceId Valid device in range 0...hipGetDeviceCount(). + * + * Sets @p device as the default device for the calling host thread. Valid device id's are 0... + * (hipGetDeviceCount()-1). + * + * Many HIP APIs implicitly use the "default device" : + * + * - Any device memory subsequently allocated from this host thread (using hipMalloc) will be + * allocated on device. + * - Any streams or events created from this host thread will be associated with device. + * - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device + * (unless a specific stream is specified, in which case the device associated with that stream will + * be used). + * + * This function may be called from any host thread. Multiple host threads may use the same device. + * This function does no synchronization with the previous or new device, and has very little + * runtime overhead. Applications can use hipSetDevice to quickly switch the default device before + * making a HIP runtime call which uses the default device. + * + * The default device is stored in thread-local-storage for each thread. + * Thread-pool implementations may inherit the default device of the previous thread. A good + * practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known + * standard device. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorDeviceAlreadyInUse + * + * @see hipGetDevice, hipGetDeviceCount + */ +hipError_t hipSetDevice(int deviceId); +/** + * @brief Return the default device id for the calling host thread. + * + * @param [out] device *device is written with the default device + * + * HIP maintains an default device for each thread using thread-local-storage. + * This device is used implicitly for HIP runtime APIs called by this thread. + * hipGetDevice returns in * @p device the default device for the calling host thread. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipSetDevice, hipGetDevicesizeBytes + */ +hipError_t hipGetDevice(int* deviceId); +/** + * @brief Return number of compute-capable devices. + * + * @param [output] count Returns number of compute-capable devices. + * + * @returns #hipSuccess, #hipErrorNoDevice + * + * + * Returns in @p *count the number of devices that have ability to run compute commands. If there + * are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more + * devices can be found, then hipGetDeviceCount returns #hipSuccess. + */ +hipError_t hipGetDeviceCount(int* count); +/** + * @brief Query for a specific device attribute. + * + * @param [out] pi pointer to value to return + * @param [in] attr attribute to query + * @param [in] deviceId which device to query for information + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId); +/** + * @brief Returns device properties. + * + * @param [out] prop written with device properties + * @param [in] deviceId which device to query for information + * + * @return #hipSuccess, #hipErrorInvalidDevice + * @bug HCC always returns 0 for maxThreadsPerMultiProcessor + * @bug HCC always returns 0 for regsPerBlock + * @bug HCC always returns 0 for l2CacheSize + * + * Populates hipGetDeviceProperties with information for the specified device. + */ +hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); +/** + * @brief Set L1/Shared cache partition. + * + * @param [in] cacheConfig + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Set Cache configuration for a specific function + * + * @param [in] cacheConfig + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Get Resource limits of current device + * + * @param [out] pValue + * @param [in] limit + * + * @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue + * Note: Currently, only hipLimitMallocHeapSize is available + * + */ +hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit); +/** + * @brief Returns bank width of shared memory for current device + * + * @param [out] pConfig + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Gets the flags set for current device + * + * @param [out] flags + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + */ +hipError_t hipGetDeviceFlags(unsigned int* flags); +/** + * @brief The bank width of shared memory on current device is set + * + * @param [in] config + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief The current device behavior is changed according the flags passed. + * + * @param [in] flags + * + * The schedule flags impact how HIP waits for the completion of a command running on a device. + * hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted the + * work until the command completes. This offers the lowest latency, but will consume a CPU core + * and may increase power. hipDeviceScheduleYield : The HIP runtime will yield the CPU to + * system so that other tasks can use it. This may increase latency to detect the completion but + * will consume less power and is friendlier to other tasks in the system. + * hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield. + * hipDeviceScheduleAuto : Use a hueristic to select between Spin and Yield modes. If the + * number of HIP contexts is greater than the number of logical processors in the system, use Spin + * scheduling. Else use Yield scheduling. + * + * + * hipDeviceMapHost : Allow mapping host memory. On ROCM, this is always allowed and + * the flag is ignored. hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag. + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess + * + * + */ +hipError_t hipSetDeviceFlags(unsigned flags); +/** + * @brief Device which matches hipDeviceProp_t is returned + * + * @param [out] device ID + * @param [in] device properties pointer + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop); +/** + * @brief Returns the link type and hop count between two devices + * + * @param [in] device1 Ordinal for device1 + * @param [in] device2 Ordinal for device2 + * @param [out] linktype Returns the link type (See hsa_amd_link_info_type_t) between the two devices + * @param [out] hopcount Returns the hop count between the two devices + * + * Queries and returns the HSA link type and the hop count between the two specified devices. + * + * @returns #hipSuccess, #hipInvalidDevice, #hipErrorRuntimeOther + */ +hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount); +// TODO: implement IPC apis +/** + * @brief Gets an interprocess memory handle for an existing device memory + * allocation + * + * Takes a pointer to the base of an existing device memory allocation created + * with hipMalloc and exports it for use in another process. This is a + * lightweight operation and may be called multiple times on an allocation + * without adverse effects. + * + * If a region of memory is freed with hipFree and a subsequent call + * to hipMalloc returns memory with the same device address, + * hipIpcGetMemHandle will return a unique handle for the + * new memory. + * + * @param handle - Pointer to user allocated hipIpcMemHandle to return + * the handle in. + * @param devPtr - Base pointer to previously allocated device memory + * + * @returns + * hipSuccess, + * hipErrorInvalidHandle, + * hipErrorOutOfMemory, + * hipErrorMapFailed, + * + */ +hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); +/** + * @brief Opens an interprocess memory handle exported from another process + * and returns a device pointer usable in the local process. + * + * Maps memory exported from another process with hipIpcGetMemHandle into + * the current device address space. For contexts on different devices + * hipIpcOpenMemHandle can attempt to enable peer access between the + * devices as if the user called hipDeviceEnablePeerAccess. This behavior is + * controlled by the hipIpcMemLazyEnablePeerAccess flag. + * hipDeviceCanAccessPeer can determine if a mapping is possible. + * + * Contexts that may open hipIpcMemHandles are restricted in the following way. + * hipIpcMemHandles from each device in a given process may only be opened + * by one context per device per other process. + * + * Memory returned from hipIpcOpenMemHandle must be freed with + * hipIpcCloseMemHandle. + * + * Calling hipFree on an exported memory region before calling + * hipIpcCloseMemHandle in the importing context will result in undefined + * behavior. + * + * @param devPtr - Returned device pointer + * @param handle - hipIpcMemHandle to open + * @param flags - Flags for this operation. Must be specified as hipIpcMemLazyEnablePeerAccess + * + * @returns + * hipSuccess, + * hipErrorMapFailed, + * hipErrorInvalidHandle, + * hipErrorTooManyPeers + * + * @note No guarantees are made about the address returned in @p *devPtr. + * In particular, multiple processes may not receive the same address for the same @p handle. + * + */ +hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); +/** + * @brief Close memory mapped with hipIpcOpenMemHandle + * + * Unmaps memory returnd by hipIpcOpenMemHandle. The original allocation + * in the exporting process as well as imported mappings in other processes + * will be unaffected. + * + * Any resources used to enable peer access will be freed if this is the + * last mapping using them. + * + * @param devPtr - Device pointer returned by hipIpcOpenMemHandle + * + * @returns + * hipSuccess, + * hipErrorMapFailed, + * hipErrorInvalidHandle, + * + */ +hipError_t hipIpcCloseMemHandle(void* devPtr); +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); +// end doxygen Device +/** + * @} + */ +/** + * + * @defgroup Execution Execution Control + * @{ + * This section describes the execution control functions of HIP runtime API. + * + */ +/** + * @brief Set attribute for a specific function + * + * @param [in] func; + * @param [in] attr; + * @param [in] value; + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value); +/** + * @brief Set Cache configuration for a specific function + * + * @param [in] config; + * + * @returns #hipSuccess, #hipErrorNotInitialized + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored + * on those architectures. + * + */ +hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config); +/** + * @brief Set shared memory configuation for a specific function + * + * @param [in] func + * @param [in] config + * + * @returns #hipSuccess, #hipErrorInvalidDeviceFunction, #hipErrorInvalidValue + * + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + */ +hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config); +//doxygen end execution +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Error Error Handling + * @{ + * This section describes the error handling functions of HIP runtime API. + */ +/** + * @brief Return last error returned by any HIP runtime API call and resets the stored error code to + * #hipSuccess + * + * @returns return code from last HIP called from the active host thread + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread, and then resets the saved error to #hipSuccess. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipGetLastError(void); +/** + * @brief Return last error returned by any HIP runtime API call. + * + * @return #hipSuccess + * + * Returns the last error that has been returned by any of the runtime calls in the same host + * thread. Unlike hipGetLastError, this function does not reset the saved error code. + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +hipError_t hipPeekAtLastError(void); +/** + * @brief Return name of the specified error code in text form. + * + * @param hip_error Error code to convert to name. + * @return const char pointer to the NULL-terminated error name + * + * @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorName(hipError_t hip_error); +/** + * @brief Return handy text string message to explain the error which occurred + * + * @param hipError Error code to convert to string. + * @return const char pointer to the NULL-terminated error string + * + * @warning : on HCC, this function returns the name of the error (same as hipGetErrorName) + * + * @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t + */ +const char* hipGetErrorString(hipError_t hipError); +// end doxygen Error +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Stream Stream Management + * @{ + * This section describes the stream management functions of HIP runtime API. + * The following Stream APIs are not (yet) supported in HIP: + * - hipStreamAttachMemAsync is a nop + */ +/** + * @brief Create an asynchronous stream. + * + * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the + * newly created stream. + * @return #hipSuccess, #hipErrorInvalidValue + * + * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to + * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on + * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory + * used by the stream, applicaiton must call hipStreamDestroy. + * + * @return #hipSuccess, #hipErrorInvalidValue + * + * @see hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipStreamCreate(hipStream_t* stream); +/** + * @brief Create an asynchronous stream. + * + * @param[in, out] stream Pointer to new stream + * @param[in ] flags to control stream creation. + * @return #hipSuccess, #hipErrorInvalidValue + * + * Create a new asynchronous stream. @p stream returns an opaque handle that can be used to + * reference the newly created stream in subsequent hipStream* commands. The stream is allocated on + * the heap and will remain allocated even if the handle goes out-of-scope. To release the memory + * used by the stream, applicaiton must call hipStreamDestroy. Flags controls behavior of the + * stream. See #hipStreamDefault, #hipStreamNonBlocking. + * + * + * @see hipStreamCreate, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags); +/** + * @brief Create an asynchronous stream with the specified priority. + * + * @param[in, out] stream Pointer to new stream + * @param[in ] flags to control stream creation. + * @param[in ] priority of the stream. Lower numbers represent higher priorities. + * @return #hipSuccess, #hipErrorInvalidValue + * + * Create a new asynchronous stream with the specified priority. @p stream returns an opaque handle + * that can be used to reference the newly created stream in subsequent hipStream* commands. The + * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope. + * To release the memory used by the stream, applicaiton must call hipStreamDestroy. Flags controls + * behavior of the stream. See #hipStreamDefault, #hipStreamNonBlocking. + * + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority); +/** + * @brief Returns numerical values that correspond to the least and greatest stream priority. + * + * @param[in, out] leastPriority pointer in which value corresponding to least priority is returned. + * @param[in, out] greatestPriority pointer in which value corresponding to greatest priority is returned. + * + * Returns in *leastPriority and *greatestPriority the numerical values that correspond to the least + * and greatest stream priority respectively. Stream priorities follow a convention where lower numbers + * imply greater priorities. The range of meaningful stream priorities is given by + * [*greatestPriority, *leastPriority]. If the user attempts to create a stream with a priority value + * that is outside the the meaningful range as specified by this API, the priority is automatically + * clamped to within the valid range. + */ +hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority); +/** + * @brief Destroys the specified stream. + * + * @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the + * newly created stream. + * @return #hipSuccess #hipErrorInvalidHandle + * + * Destroys the specified stream. + * + * If commands are still executing on the specified stream, some may complete execution before the + * queue is deleted. + * + * The queue may be destroyed while some commands are still inflight, or may wait for all commands + * queued to the stream before destroying it. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamQuery, hipStreamWaitEvent, + * hipStreamSynchronize + */ +hipError_t hipStreamDestroy(hipStream_t stream); +/** + * @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or + * #hipErrorNotReady if not. + * + * @param[in] stream stream to query + * + * @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle + * + * This is thread-safe and returns a snapshot of the current state of the queue. However, if other + * host threads are sending work to the stream, the status may change immediately after the function + * is called. It is typically used for debug. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamSynchronize, + * hipStreamDestroy + */ +hipError_t hipStreamQuery(hipStream_t stream); +/** + * @brief Wait for all commands in stream to complete. + * + * @param[in] stream stream identifier. + * + * @return #hipSuccess, #hipErrorInvalidHandle + * + * This command is host-synchronous : the host will block until the specified stream is empty. + * + * This command follows standard null-stream semantics. Specifically, specifying the null stream + * will cause the command to wait for other streams on the same device to complete all pending + * operations. + * + * This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active + * or blocking. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamWaitEvent, hipStreamDestroy + * + */ +hipError_t hipStreamSynchronize(hipStream_t stream); +/** + * @brief Make the specified compute stream wait for an event + * + * @param[in] stream stream to make wait. + * @param[in] event event to wait on + * @param[in] flags control operation [must be 0] + * + * @return #hipSuccess, #hipErrorInvalidHandle + * + * This function inserts a wait operation into the specified stream. + * All future work submitted to @p stream will wait until @p event reports completion before + * beginning execution. + * + * This function only waits for commands in the current stream to complete. Notably,, this function + * does not impliciy wait for commands in the default stream to complete, even if the specified + * stream is created with hipStreamNonBlocking = 0. + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipStreamSynchronize, hipStreamDestroy + */ +hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags); +/** + * @brief Return flags associated with this stream. + * + * @param[in] stream stream to be queried + * @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle + * + * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidHandle + * + * Return flags associated with this stream in *@p flags. + * + * @see hipStreamCreateWithFlags + */ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags); +/** + * @brief Query the priority of a stream. + * + * @param[in] stream stream to be queried + * @param[in,out] priority Pointer to an unsigned integer in which the stream's priority is returned + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidHandle + * + * @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidHandle + * + * Query the priority of a stream. The priority is returned in in priority. + * + * @see hipStreamCreateWithFlags + */ +hipError_t hipStreamGetPriority(hipStream_t stream, int* priority); +/** + * @brief Create an asynchronous stream with the specified CU mask. + * + * @param[in, out] stream Pointer to new stream + * @param[in ] cuMaskSize Size of CU mask bit array passed in. + * @param[in ] cuMask Bit-vector representing the CU mask. Each active bit represents using one CU. + * The first 32 bits represent the first 32 CUs, and so on. If its size is greater than physical + * CU number (i.e., multiProcessorCount member of hipDeviceProp_t), the extra elements are ignored. + * It is user's responsibility to make sure the input is meaningful. + * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * Create a new asynchronous stream with the specified CU mask. @p stream returns an opaque handle + * that can be used to reference the newly created stream in subsequent hipStream* commands. The + * stream is allocated on the heap and will remain allocated even if the handle goes out-of-scope. + * To release the memory used by the stream, application must call hipStreamDestroy. + * + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamCreateWithCUMask(hipStream_t* stream, uint32_t cuMaskSize, const uint32_t* cuMask); +/** + * @brief Get CU mask associated with an asynchronous stream + * + * @param[in] stream stream to be queried + * @param[in] cuMaskSize number of the block of memories (uint32_t *) allocated by user + * @param[out] cuMask Pointer to a pre-allocated block of memories (uint32_t *) in which + * the stream's CU mask is returned. The CU mask is returned in a chunck of 32 bits where + * each active bit represents one active CU + * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorInvalidValue + * + * @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy + */ +hipError_t hipExtStreamGetCUMask(hipStream_t stream, uint32_t cuMaskSize, uint32_t* cuMask); +/** + * Stream CallBack struct + */ +typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); +/** + * @brief Adds a callback to be called on the host after all currently enqueued + * items in the stream have completed. For each + * hipStreamAddCallback call, a callback will be executed exactly once. + * The callback will block later work in the stream until it is finished. + * @param[in] stream - Stream to add callback to + * @param[in] callback - The function to call once preceding stream operations are complete + * @param[in] userData - User specified data to be passed to the callback function + * @param[in] flags - Reserved for future use, must be 0 + * @return #hipSuccess, #hipErrorInvalidHandle, #hipErrorNotSupported + * + * @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize, + * hipStreamWaitEvent, hipStreamDestroy, hipStreamCreateWithPriority + * + */ +hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, + unsigned int flags); +// end doxygen Stream +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Stream Memory Operations + * @{ + * This section describes Stream Memory Wait and Write functions of HIP runtime API. + */ +/** + * @brief Enqueues a wait command to the stream. + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are hipStreamWaitValueGte + * hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor + * @param [in] mask - Mask to be applied on value at memory before it is compared with value, + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * hipStreamWaitValueGte: waits until *ptr&mask >= value + * hipStreamWaitValueEq : waits until *ptr&mask == value + * hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using 'hipStreamWaitValueNor', mask is applied on both 'value' and '*ptr'. + * + * @note Support for hipStreamWaitValue32 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue64, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ +hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags, + uint32_t mask __dparm(0xFFFFFFFF)); +/** + * @brief Enqueues a wait command to the stream. + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag + * @param [in] value - Value to be used in compare operation + * @param [in] flags - Defines the compare operation, supported values are hipStreamWaitValueGte + * hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor. + * @param [in] mask - Mask to be applied on value at memory before it is compared with value + * default value is set to enable every bit + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a wait command to the stream, all operations enqueued on this stream after this, will + * not execute until the defined wait condition is true. + * + * hipStreamWaitValueGte: waits until *ptr&mask >= value + * hipStreamWaitValueEq : waits until *ptr&mask == value + * hipStreamWaitValueAnd: waits until ((*ptr&mask) & value) != 0 + * hipStreamWaitValueNor: waits until ~((*ptr&mask) | (value&mask)) != 0 + * + * @note when using 'hipStreamWaitValueNor', mask is applied on both 'value' and '*ptr'. + * + * @note Support for hipStreamWaitValue64 can be queried using 'hipDeviceGetAttribute()' and + * 'hipDeviceAttributeCanUseStreamWaitValue' flag. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWaitValue32, hipStreamWriteValue64, + * hipStreamWriteValue32, hipDeviceGetAttribute + */ +hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags, + uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)); +/** + * @brief Enqueues a write command to the stream. + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ +hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags); +/** + * @brief Enqueues a write command to the stream. + * + * @param [in] stream - Stream identifier + * @param [in] ptr - Pointer to a GPU accessible memory object + * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * Enqueues a write command to the stream, write operation is performed after all earlier commands + * on this stream have completed the execution. + * + * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, + * hipStreamWaitValue64 + */ +hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags); +// end doxygen Stream Memory Operations +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Event Event Management + * @{ + * This section describes the event management functions of HIP runtime API. + */ +/** + * @brief Create an event with the specified flags + * + * @param[in,out] event Returns the newly created event. + * @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault, + #hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess + * #hipEventDefault : Default flag. The event will use active synchronization and will support + timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a + CPU to poll on the event. + * #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is + called on this event, the thread will block until the event completes. This can increase latency + for the synchroniation but can result in lower power and more resources for other CPU threads. + * #hipEventDisableTiming : Disable recording of timing information. Events created with this flag + would not record profiling data and provide best performance if used for synchronization. + * @warning On AMD platform, hipEventInterprocess support is under development. Use of this flag + will return an error. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags); +/** + * Create an event + * + * @param[in,out] event Returns the newly created event. + * + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure, #hipErrorOutOfMemory + * + * @see hipEventCreateWithFlags, hipEventRecord, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + */ +hipError_t hipEventCreate(hipEvent_t* event); +/** + * @brief Record an event in the specified stream. + * + * @param[in] event event to record. + * @param[in] stream stream in which to record event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * hipEventQuery() or hipEventSynchronize() must be used to determine when the event + * transitions from "recording" (after hipEventRecord() is called) to "recorded" + * (when timestamps are set, if requested). + * + * Events which are recorded in a non-NULL stream will transition to + * from recording to "recorded" state when they reach the head of + * the specified stream, after all previous + * commands in that stream have completed executing. + * + * If hipEventRecord() has been previously called on this event, then this call will overwrite any + * existing state in event. + * + * If this function is called on an event that is currently being recorded, results are undefined + * - either outstanding recording may save state into the event, and the order is not guaranteed. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, + * hipEventDestroy, hipEventElapsedTime + * + */ +#ifdef __cplusplus +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream = NULL); +#else +hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); +#endif +/** + * @brief Destroy the specified event. + * + * @param[in] event Event to destroy. + * @returns #hipSuccess, #hipErrorNotInitialized, #hipErrorInvalidValue, + * #hipErrorLaunchFailure + * + * Releases memory associated with the event. If the event is recording but has not completed + * recording when hipEventDestroy() is called, the function will return immediately and the + * completion_future resources will be released later, when the hipDevice is synchronized. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventSynchronize, hipEventRecord, + * hipEventElapsedTime + * + * @returns #hipSuccess + */ +hipError_t hipEventDestroy(hipEvent_t event); +/** + * @brief Wait for an event to complete. + * + * This function will block until the event is ready, waiting for all previous work in the stream + * specified when event was recorded with hipEventRecord(). + * + * If hipEventRecord() has not been called on @p event, this function returns immediately. + * + * TODO-hip- This function needs to support hipEventBlockingSync parameter. + * + * @param[in] event Event on which to wait. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized, + * #hipErrorInvalidHandle, #hipErrorLaunchFailure + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventElapsedTime + */ +hipError_t hipEventSynchronize(hipEvent_t event); +/** + * @brief Return the elapsed time between two events. + * + * @param[out] ms : Return time between start and stop in ms. + * @param[in] start : Start event. + * @param[in] stop : Stop event. + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorNotReady, #hipErrorInvalidHandle, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Computes the elapsed time between two events. Time is computed in ms, with + * a resolution of approximately 1 us. + * + * Events which are recorded in a NULL stream will block until all commands + * on all other streams complete execution, and then record the timestamp. + * + * Events which are recorded in a non-NULL stream will record their timestamp + * when they reach the head of the specified stream, after all previous + * commands in that stream have completed executing. Thus the time that + * the event recorded may be significantly after the host calls hipEventRecord(). + * + * If hipEventRecord() has not been called on either event, then #hipErrorInvalidHandle is + * returned. If hipEventRecord() has been called on both events, but the timestamp has not yet been + * recorded on one or both events (that is, hipEventQuery() would return #hipErrorNotReady on at + * least one of the events), then #hipErrorNotReady is returned. + * + * Note, for HIP Events used in kernel dispatch using hipExtLaunchKernelGGL/hipExtLaunchKernel, + * events passed in hipExtLaunchKernelGGL/hipExtLaunchKernel are not explicitly recorded and should + * only be used to get elapsed time for that specific launch. In case events are used across + * multiple dispatches, for example, start and stop events from different hipExtLaunchKernelGGL/ + * hipExtLaunchKernel calls, they will be treated as invalid unrecorded events, HIP will throw + * error "hipErrorInvalidHandle" from hipEventElapsedTime. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventQuery, hipEventDestroy, hipEventRecord, + * hipEventSynchronize + */ +hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop); +/** + * @brief Query event status + * + * @param[in] event Event to query. + * @returns #hipSuccess, #hipErrorNotReady, #hipErrorInvalidHandle, #hipErrorInvalidValue, + * #hipErrorNotInitialized, #hipErrorLaunchFailure + * + * Query the status of the specified event. This function will return #hipErrorNotReady if all + * commands in the appropriate stream (specified to hipEventRecord()) have completed. If that work + * has not completed, or if hipEventRecord() was not called on the event, then #hipSuccess is + * returned. + * + * @see hipEventCreate, hipEventCreateWithFlags, hipEventRecord, hipEventDestroy, + * hipEventSynchronize, hipEventElapsedTime + */ +hipError_t hipEventQuery(hipEvent_t event); +// end doxygen Events +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Memory Memory Management + * @{ + * This section describes the memory management functions of HIP runtime API. + * The following CUDA APIs are not currently supported: + * - cudaMalloc3D + * - cudaMalloc3DArray + * - TODO - more 2D, 3D, array APIs here. + * + * + */ +/** + * @brief Return attributes for the specified pointer + * + * @param[out] attributes for the specified pointer + * @param[in] pointer to get attributes for + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see hipGetDeviceCount, hipGetDevice, hipSetDevice, hipChooseDevice + */ +hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr); + +/** + * @brief Imports an external semaphore. + * + * @param[out] extSem_out External semaphores to be waited on + * @param[in] semHandleDesc Semaphore import handle descriptor + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extSem_out, + const hipExternalSemaphoreHandleDesc* semHandleDesc); +/** + * @brief Signals a set of external semaphore objects. + * + * @param[in] extSem_out External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipSignalExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Waits on a set of external semaphore objects + * + * @param[in] extSem_out External semaphores to be waited on + * @param[in] paramsArray Array of semaphore parameters + * @param[in] numExtSems Number of semaphores to wait on + * @param[in] stream Stream to enqueue the wait operations in + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, + const hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream); +/** + * @brief Destroys an external semaphore object and releases any references to the underlying resource. Any outstanding signals or waits must have completed before the semaphore is destroyed. + * + * @param[in] extSem handle to an external memory object + * + * @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * + * @see + */ +hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extSem); + +/** +* @brief Imports an external memory object. +* +* @param[out] extMem_out Returned handle to an external memory object +* @param[in] memHandleDesc Memory import handle descriptor +* +* @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue +* +* @see +*/ +hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out, const hipExternalMemoryHandleDesc* memHandleDesc); +/** +* @brief Maps a buffer onto an imported memory object. +* +* @param[out] devPtr Returned device pointer to buffer +* @param[in] extMem Handle to external memory object +* @param[in] bufferDesc Buffer descriptor +* +* @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue +* +* @see +*/ +hipError_t hipExternalMemoryGetMappedBuffer(void **devPtr, hipExternalMemory_t extMem, const hipExternalMemoryBufferDesc *bufferDesc); +/** +* @brief Destroys an external memory object. +* +* @param[in] extMem External memory object to be destroyed +* +* @return #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue +* +* @see +*/ +hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem); +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hipHostMalloc + */ +hipError_t hipMalloc(void** ptr, size_t size); +/** + * @brief Allocate memory on the default accelerator + * + * @param[out] ptr Pointer to the allocated memory + * @param[in] size Requested memory size + * @param[in] flags Type of memory allocation + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorOutOfMemory, #hipErrorInvalidValue (bad context, null *ptr) + * + * @see hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, hipMalloc3DArray, + * hipHostFree, hipHostMalloc + */ +hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags); +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorOutOfMemory + * + * @deprecated use hipHostMalloc() instead + */ +DEPRECATED("use hipHostMalloc instead") +hipError_t hipMallocHost(void** ptr, size_t size); +/** + * @brief Allocate pinned host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorOutOfMemory + * + * @deprecated use hipHostMalloc() instead + */ +DEPRECATED("use hipHostMalloc instead") +hipError_t hipMemAllocHost(void** ptr, size_t size); +/** + * @brief Allocate device accessible page locked host memory + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * @param[in] flags Type of host memory allocation + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorOutOfMemory + * + * @see hipSetDeviceFlags, hipHostFree + */ +hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags); +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @addtogroup Memory Managed Memory + * @{ + * @ingroup Memory + * This section describes the managed memory management functions of HIP runtime API. + * + */ +/** + * @brief Allocates memory that will be automatically managed by HIP. + * + * @param [out] dev_ptr - pointer to allocated device memory + * @param [in] size - requested allocation size in bytes + * @param [in] flags - must be either hipMemAttachGlobal or hipMemAttachHost + * (defaults to hipMemAttachGlobal) + * + * @returns #hipSuccess, #hipErrorMemoryAllocation, #hipErrorNotSupported, #hipErrorInvalidValue + */ +hipError_t hipMallocManaged(void** dev_ptr, + size_t size, + unsigned int flags __dparm(hipMemAttachGlobal)); +/** + * @brief Prefetches memory to the specified destination device using HIP. + * + * @param [in] dev_ptr pointer to be prefetched + * @param [in] count size in bytes for prefetching + * @param [in] device destination device to prefetch to + * @param [in] stream stream to enqueue prefetch operation + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemPrefetchAsync(const void* dev_ptr, + size_t count, + int device, + hipStream_t stream __dparm(0)); +/** + * @brief Advise about the usage of a given memory range to HIP. + * + * @param [in] dev_ptr pointer to memory to set the advice for + * @param [in] count size in bytes of the memory range + * @param [in] advice advice to be applied for the specified memory range + * @param [in] device device to apply the advice for + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemAdvise(const void* dev_ptr, + size_t count, + hipMemoryAdvise advice, + int device); +/** + * @brief Query an attribute of a given memory range in HIP. + * + * @param [in/out] data a pointer to a memory location where the result of each + * attribute query will be written to + * @param [in] data_size the size of data + * @param [in] attribute the attribute to query + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemRangeGetAttribute(void* data, + size_t data_size, + hipMemRangeAttribute attribute, + const void* dev_ptr, + size_t count); +/** + * @brief Query attributes of a given memory range in HIP. + * + * @param [in/out] data a two-dimensional array containing pointers to memory locations + * where the result of each attribute query will be written to + * @param [in] data_sizes an array, containing the sizes of each result + * @param [in] attributes the attribute to query + * @param [in] num_attributes an array of attributes to query (numAttributes and the number + * of attributes in this array should match) + * @param [in] dev_ptr start of the range to query + * @param [in] count size of the range to query + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipMemRangeGetAttributes(void** data, + size_t* data_sizes, + hipMemRangeAttribute* attributes, + size_t num_attributes, + const void* dev_ptr, + size_t count); +/** + * @brief Attach memory to a stream asynchronously in HIP. + * + * @param [in] stream - stream in which to enqueue the attach operation + * @param [in] dev_ptr - pointer to memory (must be a pointer to managed memory or + * to a valid host-accessible region of system-allocated memory) + * @param [in] length - length of memory (defaults to zero) + * @param [in] flags - must be one of hipMemAttachGlobal, hipMemAttachHost or + * hipMemAttachSingle (defaults to hipMemAttachSingle) + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipStreamAttachMemAsync(hipStream_t stream, + hipDeviceptr_t* dev_ptr, + size_t length __dparm(0), + unsigned int flags __dparm(hipMemAttachSingle)); +// end doxygen Managed Memory +/** + * @} + */ +/** + * @brief Allocate device accessible page locked host memory [Deprecated] + * + * @param[out] ptr Pointer to the allocated host pinned memory + * @param[in] size Requested memory size + * @param[in] flags Type of host memory allocation + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return #hipSuccess, #hipErrorOutOfMemory + * + * @deprecated use hipHostMalloc() instead + */ +DEPRECATED("use hipHostMalloc instead") +hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags); +/** + * @brief Get Device pointer from Host Pointer allocated through hipHostMalloc + * + * @param[out] dstPtr Device Pointer mapped to passed host pointer + * @param[in] hstPtr Host Pointer allocated through hipHostMalloc + * @param[in] flags Flags to be passed for extension + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorOutOfMemory + * + * @see hipSetDeviceFlags, hipHostMalloc + */ +hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags); +/** + * @brief Return flags associated with host pointer + * + * @param[out] flagsPtr Memory location to store flags + * @param[in] hostPtr Host Pointer allocated through hipHostMalloc + * @return #hipSuccess, #hipErrorInvalidValue + * + * @see hipHostMalloc + */ +hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr); +/** + * @brief Register host memory so it can be accessed from the current device. + * + * @param[out] hostPtr Pointer to host memory to be registered. + * @param[in] sizeBytes size of the host memory + * @param[in] flags. See below. + * + * Flags: + * - #hipHostRegisterDefault Memory is Mapped and Portable + * - #hipHostRegisterPortable Memory is considered registered by all contexts. HIP only supports + * one context so this is always assumed true. + * - #hipHostRegisterMapped Map the allocation into the address space for the current device. + * The device pointer can be obtained with #hipHostGetDevicePointer. + * + * + * After registering the memory, use #hipHostGetDevicePointer to obtain the mapped device pointer. + * On many systems, the mapped device pointer will have a different value than the mapped host + * pointer. Applications must use the device pointer in device code, and the host pointer in device + * code. + * + * On some systems, registered memory is pinned. On some systems, registered memory may not be + * actually be pinned but uses OS or hardware facilities to all GPU access to the host memory. + * + * Developers are strongly encouraged to register memory blocks which are aligned to the host + * cache-line size. (typically 64-bytes but can be obtains from the CPUID instruction). + * + * If registering non-aligned pointers, the application must take care when register pointers from + * the same cache line on different devices. HIP's coarse-grained synchronization model does not + * guarantee correct results if different devices write to different parts of the same cache block - + * typically one of the writes will "win" and overwrite data from the other registered memory + * region. + * + * @return #hipSuccess, #hipErrorOutOfMemory + * + * @see hipHostUnregister, hipHostGetFlags, hipHostGetDevicePointer + */ +hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags); +/** + * @brief Un-register host pointer + * + * @param[in] hostPtr Host pointer previously registered with #hipHostRegister + * @return Error code + * + * @see hipHostRegister + */ +hipError_t hipHostUnregister(void* hostPtr); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] ptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * + * @return Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height); +/** + * Allocates at least width (in bytes) * height bytes of linear memory + * Padding may occur to ensure alighnment requirements are met for the given row + * The change in width size due to padding will be returned in *pitch. + * Currently the alignment is set to 128 bytes + * + * @param[out] dptr Pointer to the allocated device memory + * @param[out] pitch Pitch for allocation (in bytes) + * @param[in] width Requested pitched allocation width (in bytes) + * @param[in] height Requested pitched allocation height + * + * If size is 0, no memory is allocated, *ptr returns nullptr, and hipSuccess is returned. + * The intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array. + * Given the row and column of an array element of type T, the address is computed as: + * T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + * + * @return Error code + * + * @see hipMalloc, hipFree, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr, size_t* pitch, size_t widthInBytes, size_t height, unsigned int elementSizeBytes); +/** + * @brief Free memory allocated by the hcc hip memory allocation API. + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @return #hipSuccess + * @return #hipErrorInvalidDevicePointer (if pointer is invalid, including host pointers allocated + * with hipHostMalloc) + * + * @see hipMalloc, hipMallocPitch, hipMallocArray, hipFreeArray, hipHostFree, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipFree(void* ptr); +/** + * @brief Free memory allocated by the hcc hip host memory allocation API. [Deprecated] + * + * @param[in] ptr Pointer to memory to be freed + * @return #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with + hipMalloc) + * @deprecated use hipHostFree() instead + */ +DEPRECATED("use hipHostFree instead") +hipError_t hipFreeHost(void* ptr); +/** + * @brief Free memory allocated by the hcc hip host memory allocation API + * This API performs an implicit hipDeviceSynchronize() call. + * If pointer is NULL, the hip runtime is initialized and hipSuccess is returned. + * + * @param[in] ptr Pointer to memory to be freed + * @return #hipSuccess, + * #hipErrorInvalidValue (if pointer is invalid, including device pointers allocated with + * hipMalloc) + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipFreeArray, hipMalloc3D, + * hipMalloc3DArray, hipHostMalloc + */ +hipError_t hipHostFree(void* ptr); +/** + * @brief Copy data from src to dst. + * + * It supports memory from host to device, + * device to host, device to device and host to host + * The src and dst must not overlap. + * + * For hipMemcpy, the copy is always performed by the current device (set by hipSetDevice). + * For multi-gpu or peer-to-peer configurations, it is recommended to set the current device to the + * device where the src data is physically located. For optimal peer-to-peer copies, the copy device + * must be able to access the src and dst pointers (by calling hipDeviceEnablePeerAccess with copy + * agent as the current device and src/dest as the peerDevice argument. if this is not done, the + * hipMemcpy will still work, but will perform the copy using a staging buffer on the host. + * Calling hipMemcpy with dst and src pointers that do not match the hipMemcpyKind results in + * undefined behavior. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] copyType Memory copy type + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknowni + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); +// TODO: Add description +hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, + hipMemcpyKind kind, hipStream_t stream); +/** + * @brief Copy data from Host to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes); +/** + * @brief Copy data from Device to Host + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copy data from Device to Device + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes); +/** + * @brief Copy data from Host to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream); +/** + * @brief Copy data from Device to Host asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream); +/** + * @brief Copy data from Device to Device asynchronously + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * + * @return #hipSuccess, #hipErrorDeInitialized, #hipErrorNotInitialized, #hipErrorInvalidContext, + * #hipErrorInvalidValue + * + * @see hipArrayCreate, hipArrayDestroy, hipArrayGetDescriptor, hipMemAlloc, hipMemAllocHost, + * hipMemAllocPitch, hipMemcpy2D, hipMemcpy2DAsync, hipMemcpy2DUnaligned, hipMemcpyAtoA, + * hipMemcpyAtoD, hipMemcpyAtoH, hipMemcpyAtoHAsync, hipMemcpyDtoA, hipMemcpyDtoD, + * hipMemcpyDtoDAsync, hipMemcpyDtoH, hipMemcpyDtoHAsync, hipMemcpyHtoA, hipMemcpyHtoAAsync, + * hipMemcpyHtoDAsync, hipMemFree, hipMemFreeHost, hipMemGetAddressRange, hipMemGetInfo, + * hipMemHostAlloc, hipMemHostGetDevicePointer + */ +hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, + hipStream_t stream); +hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, + hipModule_t hmod, const char* name); +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol); +hipError_t hipGetSymbolSize(size_t* size, const void* symbol); +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, + size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)); +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)); +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, + size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)); +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, + size_t sizeBytes, size_t offset, + hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Copy data from src to dst asynchronously. + * + * @warning If host or dest are not pinned, the memory copy will be performed synchronously. For + * best performance, use hipHostMalloc to allocate host memory that is transferred asynchronously. + * + * @warning on HCC hipMemcpyAsync does not support overlapped H2D and D2H copies. + * For hipMemcpy, the copy is always performed by the device associated with the specified stream. + * + * For multi-gpu or peer-to-peer configurations, it is recommended to use a stream which is a + * attached to the device where the src data is physically located. For optimal peer-to-peer copies, + * the copy device must be able to access the src and dst pointers (by calling + * hipDeviceEnablePeerAccess with copy agent as the current device and src/dest as the peerDevice + * argument. if this is not done, the hipMemcpy will still work, but will perform the copy using a + * staging buffer on the host. + * + * @param[out] dst Data being copy to + * @param[in] src Data being copy from + * @param[in] sizeBytes Data size in bytes + * @param[in] accelerator_view Accelerator view which the copy is being enqueued + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyToSymbol, + * hipMemcpyFromSymbol, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync, + * hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync, + * hipMemcpyFromSymbolAsync + */ +hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dst Data being filled + * @param[in] constant value to be set + * @param[in] sizeBytes Data size in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemset(void* dst, int value, size_t sizeBytes); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] number of values to be set + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * byte value value. + * + * hipMemsetD8Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] number of values to be set + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t count, hipStream_t stream __dparm(0)); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] number of values to be set + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dest with the constant + * short value value. + * + * hipMemsetD16Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Data ptr to be filled + * @param[in] constant value to be set + * @param[in] number of values to be set + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t count, hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dest with the constant integer + * value for specified number of times. + * + * @param[out] dst Data being filled + * @param[in] constant value to be set + * @param[in] number of values to be set + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + */ +hipError_t hipMemsetD32(hipDeviceptr_t dest, int value, size_t count); +/** + * @brief Fills the first sizeBytes bytes of the memory area pointed to by dev with the constant + * byte value value. + * + * hipMemsetAsync() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value - Value to set for each byte of specified memory + * @param[in] sizeBytes - Size in bytes to set + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dev with the constant integer + * value for specified number of times. + * + * hipMemsetD32Async() is asynchronous with respect to the host, so the call may return before the + * memset is complete. The operation can optionally be associated to a stream by passing a non-zero + * stream argument. If stream is non-zero, the operation may overlap with operations in other + * streams. + * + * @param[out] dst Pointer to device memory + * @param[in] value - Value to set for each byte of specified memory + * @param[in] count - number of values to be set + * @param[in] stream - Stream identifier + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemsetD32Async(hipDeviceptr_t dst, int value, size_t count, + hipStream_t stream __dparm(0)); +/** + * @brief Fills the memory area pointed to by dst with the constant value. + * + * @param[out] dst Pointer to device memory + * @param[in] pitch - data size in bytes + * @param[in] value - constant value to be set + * @param[in] width + * @param[in] height + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height); +/** + * @brief Fills asynchronously the memory area pointed to by dst with the constant value. + * + * @param[in] dst Pointer to device memory + * @param[in] pitch - data size in bytes + * @param[in] value - constant value to be set + * @param[in] width + * @param[in] height + * @param[in] stream + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height,hipStream_t stream __dparm(0)); +/** + * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr + * @param[in] value - constant value to be set + * @param[in] extent + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ); +/** + * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr + * @param[in] value - constant value to be set + * @param[in] extent + * @param[in] stream + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream __dparm(0)); +/** + * @brief Query memory info. + * Return snapshot of free memory, and total allocatable memory on the device. + * + * Returns in *free a snapshot of the current free memory. + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue + * @warning On HCC, the free memory only accounts for memory allocated by this process and may be + *optimistic. + **/ +hipError_t hipMemGetInfo(size_t* free, size_t* total); +hipError_t hipMemPtrGetInfo(void* ptr, size_t* size); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] width Requested array allocation width + * @param[in] height Requested array allocation height + * @param[in] flags Requested properties of allocated array + * @return #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, + size_t height __dparm(0), unsigned int flags __dparm(hipArrayDefault)); +hipError_t hipArrayCreate(hipArray** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray); +hipError_t hipArrayDestroy(hipArray* array); +hipError_t hipArray3DCreate(hipArray** array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray); +hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent); +/** + * @brief Frees an array on the device. + * + * @param[in] array Pointer to array to free + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorNotInitialized + * + * @see hipMalloc, hipMallocPitch, hipFree, hipMallocArray, hipHostMalloc, hipHostFree + */ +hipError_t hipFreeArray(hipArray* array); +/** + * @brief Frees a mipmapped array on the device + * + * @param[in] mipmappedArray - Pointer to mipmapped array to free + * + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipFreeMipmappedArray(hipMipmappedArray_t mipmappedArray); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] extent Requested array allocation width, height and depth + * @param[in] flags Requested properties of allocated array + * @return #hipSuccess, #hipErrorOutOfMemory + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ +hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* desc, + struct hipExtent extent, unsigned int flags); +/** + * @brief Allocate a mipmapped array on the device + * + * @param[out] mipmappedArray - Pointer to allocated mipmapped array in device memory + * @param[in] desc - Requested channel format + * @param[in] extent - Requested allocation size (width field in elements) + * @param[in] numLevels - Number of mipmap levels to allocate + * @param[in] flags - Flags for extensions + * + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryAllocation + */ +hipError_t hipMallocMipmappedArray( + hipMipmappedArray_t *mipmappedArray, + const struct hipChannelFormatDesc* desc, + struct hipExtent extent, + unsigned int numLevels, + unsigned int flags __dparm(0)); +/** + * @brief Gets a mipmap level of a HIP mipmapped array + * + * @param[out] levelArray - Returned mipmap level HIP array + * @param[in] mipmappedArray - HIP mipmapped array + * @param[in] level - Mipmap level + * + * @return #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipGetMipmappedArrayLevel( + hipArray_t *levelArray, + hipMipmappedArray_const_t mipmappedArray, + unsigned int level); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync +*/ +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy); +/** + * @brief Copies memory for 2D arrays. + * @param[in] pCopy Parameters for the memory copy + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync +*/ +hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] spitch Pitch of source memory + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpyToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t spitch, size_t width, size_t height, hipMemcpyKind kind, + hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] wOffset Destination starting X offset + * @param[in] hOffset Destination starting Y offset + * @param[in] src Source memory address + * @param[in] count size in bytes to copy + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source memory address + * @param[in] woffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] count Size in bytes to copy + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, size_t wOffset, size_t hOffset, + size_t count, hipMemcpyKind kind); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] dst Destination memory address + * @param[in] dpitch Pitch of destination memory + * @param[in] src Source memory address + * @param[in] wOffset Source starting X offset + * @param[in] hOffset Source starting Y offset + * @param[in] width Width of matrix transfer (columns in bytes) + * @param[in] height Height of matrix transfer (rows) + * @param[in] kind Type of transfer + * @param[in] stream Accelerator view which the copy is being enqueued + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] dst Destination memory address + * @param[in] srcArray Source array + * @param[in] srcoffset Offset in bytes of source array + * @param[in] count Size of memory copy in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] dstArray Destination memory address + * @param[in] dstOffset Offset in bytes of destination array + * @param[in] srcHost Source host pointer + * @param[in] count Size of memory copy in bytes + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, size_t count); +/** + * @brief Copies data between host and device. + * + * @param[in] p 3D memory copy parameters + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms* p); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] p 3D memory copy parameters + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms* p, hipStream_t stream __dparm(0)); +/** + * @brief Copies data between host and device. + * + * @param[in] pCopy 3D memory copy parameters + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy); +/** + * @brief Copies data between host and device asynchronously. + * + * @param[in] pCopy 3D memory copy parameters + * @param[in] stream Stream to use + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2DToArray, hipMemcpy2D, hipMemcpyFromArray, hipMemcpyToSymbol, + * hipMemcpyAsync + */ +hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream); +// doxygen end Memory +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup PeerToPeer PeerToPeer Device Memory Access + * @{ + * @warning PeerToPeer support is experimental. + * This section describes the PeerToPeer device memory access functions of HIP runtime API. + */ +/** + * @brief Determine if a device can access a peer's memory. + * + * @param [out] canAccessPeer Returns the peer access capability (0 or 1) + * @param [in] device - device from where memory may be accessed. + * @param [in] peerDevice - device where memory is physically located + * + * Returns "1" in @p canAccessPeer if the specified @p device is capable + * of directly accessing memory physically located on peerDevice , or "0" if not. + * + * Returns "0" in @p canAccessPeer if deviceId == peerDeviceId, and both are valid devices : a + * device is not a peer of itself. + * + * @returns #hipSuccess, + * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices + */ +hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId); +/** + * @brief Enable direct access from current device's virtual address space to memory allocations + * physically located on a peer device. + * + * Memory which already allocated on peer device will be mapped into the address space of the + * current device. In addition, all future memory allocations on peerDeviceId will be mapped into + * the address space of the current device when the memory is allocated. The peer memory remains + * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerDeviceId + * @param [in] flags + * + * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. + */ +hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags); +/** + * @brief Disable direct access from current device's virtual address space to memory allocations + * physically located on a peer device. + * + * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been + * enabled from the current device. + * + * @param [in] peerDeviceId + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + */ +hipError_t hipDeviceDisablePeerAccess(int peerDeviceId); +/** + * @brief Get information on memory allocations. + * + * @param [out] pbase - BAse pointer address + * @param [out] psize - Size of allocation + * @param [in] dptr- Device Pointer + * + * @returns #hipSuccess, #hipErrorInvalidDevicePointer + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, hipDeviceptr_t dptr); +#ifndef USE_PEER_NON_UNIFIED +#define USE_PEER_NON_UNIFIED 1 +#endif +#if USE_PEER_NON_UNIFIED == 1 +/** + * @brief Copies memory from one device to memory on another device. + * + * @param [out] dst - Destination device pointer. + * @param [in] dstDeviceId - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDeviceId - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, + size_t sizeBytes); +/** + * @brief Copies memory from one device to memory on another device. + * + * @param [out] dst - Destination device pointer. + * @param [in] dstDevice - Destination device + * @param [in] src - Source device pointer + * @param [in] srcDevice - Source device + * @param [in] sizeBytes - Size of memory copy in bytes + * @param [in] stream - Stream identifier + * + * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice + */ +hipError_t hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice, + size_t sizeBytes, hipStream_t stream __dparm(0)); +#endif +// doxygen end PeerToPeer +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Context Context Management + * @{ + * This section describes the context management functions of HIP runtime API. + */ +/** + * + * @addtogroup ContextD Context Management [Deprecated] + * @{ + * @ingroup Context + * This section describes the deprecated context management functions of HIP runtime API. + */ +/** + * @brief Create a context and set it as current/ default context + * + * @param [out] ctx + * @param [in] flags + * @param [in] associated device handle + * + * @return #hipSuccess + * + * @see hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxPushCurrent, + * hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device); +/** + * @brief Destroy a HIP context. + * + * @param [in] ctx Context to destroy + * + * @returns #hipSuccess, #hipErrorInvalidValue + * + * @see hipCtxCreate, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent,hipCtxSetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxDestroy(hipCtx_t ctx); +/** + * @brief Pop the current/default context and return the popped context. + * + * @param [out] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxSetCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxPopCurrent(hipCtx_t* ctx); +/** + * @brief Push the context to be set as current/ default context + * + * @param [in] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxPushCurrent(hipCtx_t ctx); +/** + * @brief Set the passed context as current/default + * + * @param [in] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize , hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSetCurrent(hipCtx_t ctx); +/** + * @brief Get the handle of the current/ default context + * + * @param [out] ctx + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetCurrent(hipCtx_t* ctx); +/** + * @brief Get the handle of the device associated with current/default context + * + * @param [out] device + * + * @returns #hipSuccess, #hipErrorInvalidContext + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetDevice(hipDevice_t* device); +/** + * @brief Returns the approximate HIP api version. + * + * @param [in] ctx Context to check + * @param [out] apiVersion + * + * @return #hipSuccess + * + * @warning The HIP feature set does not correspond to an exact CUDA SDK api revision. + * This function always set *apiVersion to 4 as an approximation though HIP supports + * some features which were introduced in later CUDA SDK revisions. + * HIP apps code should not rely on the api revision number here and should + * use arch feature flags to test device capabilities or conditional compilation. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetDevice, hipCtxGetFlags, hipCtxPopCurrent, + * hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion); +/** + * @brief Set Cache configuration for a specific function + * + * @param [out] cacheConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig); +/** + * @brief Set L1/Shared cache partition. + * + * @param [in] cacheConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig); +/** + * @brief Set Shared memory bank configuration. + * + * @param [in] sharedMemoryConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config); +/** + * @brief Get Shared memory bank configuration. + * + * @param [out] sharedMemoryConfiguration + * + * @return #hipSuccess + * + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is + * ignored on those architectures. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig); +/** + * @brief Blocks until the default context has completed all preceding requested tasks. + * + * @return #hipSuccess + * + * @warning This function waits for all streams on the default context to complete execution, and + * then returns. + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxSynchronize(void); +/** + * @brief Return flags used for creating default context. + * + * @param [out] flags + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxGetFlags(unsigned int* flags); +/** + * @brief Enables direct access to memory allocations in a peer context. + * + * Memory which already allocated on peer device will be mapped into the address space of the + * current device. In addition, all future memory allocations on peerDeviceId will be mapped into + * the address space of the current device when the memory is allocated. The peer memory remains + * accessible from the current device until a call to hipDeviceDisablePeerAccess or hipDeviceReset. + * + * + * @param [in] peerCtx + * @param [in] flags + * + * @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, + * #hipErrorPeerAccessAlreadyEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags); +/** + * @brief Disable direct access from current context's virtual address space to memory allocations + * physically located on a peer context.Disables direct access to memory allocations in a peer + * context and unregisters any registered allocations. + * + * Returns hipErrorPeerAccessNotEnabled if direct access to memory on peerDevice has not yet been + * enabled from the current device. + * + * @param [in] peerCtx + * + * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning PeerToPeer support is experimental. + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx); +// doxygen end Context deprecated +/** + * @} + */ +/** + * @brief Get the state of the primary context. + * + * @param [in] Device to get primary context flags for + * @param [out] Pointer to store flags + * @param [out] Pointer to store context state; 0 = inactive, 1 = active + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active); +/** + * @brief Release the primary context on the GPU. + * + * @param [in] Device which primary context is released + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + * @warning This function return #hipSuccess though doesn't release the primaryCtx by design on + * HIP/HCC path. + */ +hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev); +/** + * @brief Retain the primary context on the GPU. + * + * @param [out] Returned context handle of the new context + * @param [in] Device which primary context is released + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev); +/** + * @brief Resets the primary context on the GPU. + * + * @param [in] Device which primary context is reset + * + * @returns #hipSuccess + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev); +/** + * @brief Set flags for the primary context. + * + * @param [in] Device for which the primary context flags are set + * @param [in] New flags for the device + * + * @returns #hipSuccess, #hipErrorContextAlreadyInUse + * + * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, + * hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice + */ +hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags); +// doxygen end Context Management +/** + * @} + */ +/** + * + * @defgroup Module Module Management + * @{ + * This section describes the module management functions of HIP runtime API. + * + */ +/** + * @brief Loads code object from file into a hipModule_t + * + * @param [in] fname + * @param [out] module + * + * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound, + * hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized + * + * + */ +hipError_t hipModuleLoad(hipModule_t* module, const char* fname); +/** + * @brief Frees the module + * + * @param [in] module + * + * @returns hipSuccess, hipInvalidValue + * module is freed and the code objects associated with it are destroyed + * + */ +hipError_t hipModuleUnload(hipModule_t module); +/** + * @brief Function with kname will be extracted if present in module + * + * @param [in] module + * @param [in] kname + * @param [out] function + * + * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, + * hipErrorNotFound, + */ +hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); +/** + * @brief Find out attributes for a given function. + * + * @param [out] attr + * @param [in] func + * + * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func); +/** + * @brief Find out a specific attribute for a given function. + * + * @param [out] value + * @param [in] attrib + * @param [in] hfunc + * + * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidDeviceFunction + */ +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc); +/** + * @brief returns the handle of the texture reference with the name from the module. + * + * @param [in] hmod + * @param [in] name + * @param [out] texRef + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorNotFound, hipErrorInvalidValue + */ +hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name); +/** + * @brief builds module from code object which resides in host memory. Image is pointer to that + * location. + * + * @param [in] image + * @param [out] module + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadData(hipModule_t* module, const void* image); +/** + * @brief builds module from code object which resides in host memory. Image is pointer to that + * location. Options are not used. hipModuleLoadData is called. + * + * @param [in] image + * @param [out] module + * @param [in] number of options + * @param [in] options for JIT + * @param [in] option values for JIT + * + * @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized + */ +hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, + hipJitOption* options, void** optionValues); +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra + * + * @param [in] f Kernel to launch. + * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. + * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. + * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. + * @param [in] blockDimX X block dimensions specified in work-items + * @param [in] blockDimY Y grid dimension specified in work-items + * @param [in] blockDimZ Z grid dimension specified in work-items + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * @param [in] kernelParams + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and + * must be in the memory layout and alignment expected by the kernel. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please + * refer to hip_porting_driver_api.md for sample usage. + */ +hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ, + unsigned int sharedMemBytes, hipStream_t stream, + void** kernelParams, void** extra); +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute + * + * @param [in] f Kernel to launch. + * @param [in] gridDim Grid dimensions specified as multiple of blockDim. + * @param [in] blockDim Block dimensions specified in work-items + * @param [in] kernelParams A list of kernel arguments + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream); +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] hipLaunchParams List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge + */ +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags); +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other threads + * + * + * @param [in] hipLaunchParams List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + */ +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags); +// doxygen end Module +/** + * @} + */ +/** + * + * @defgroup Occupancy Occupancy + * @{ + * This section describes the occupancy functions of HIP runtime API. + * + */ +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue + */ +//TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue + */ +//TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function (hipFunction) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (only default supported) + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (currently ignored) + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue + */ +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit); +// doxygen end Occupancy +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Profiler Profiler Control[Deprecated] + * @{ + * This section describes the profiler control functions of HIP runtime API. + * + * @warning The cudaProfilerInitialize API format for "configFile" is not supported. + * + */ +// TODO - expand descriptions: +/** + * @brief Start recording of profiling information + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @warning : hipProfilerStart API is under development. + */ +DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStart(); +/** + * @brief Stop recording of profiling information. + * When using this API, start the profiler with profiling disabled. (--startdisabled) + * @warning : hipProfilerStop API is under development. + */ +DEPRECATED("use roctracer/rocTX instead") +hipError_t hipProfilerStop(); +// doxygen end profiler +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Clang Launch API to support the triple-chevron syntax + * @{ + * This section describes the API to support the triple-chevron syntax. + */ +/** + * @brief Configure a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ +hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dparm(0), hipStream_t stream __dparm(0)); +/** + * @brief Set a kernel argument. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + * @param [in] arg Pointer the argument in host memory. + * @param [in] size Size of the argument. + * @param [in] offset Offset of the argument on the argument stack. + * + */ +hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset); +/** + * @brief Launch a kernel. + * + * @param [in] func Kernel to launch. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ +hipError_t hipLaunchByPtr(const void* func); +/** + * @brief Push configuration of a kernel launch. + * + * @param [in] gridDim grid dimension specified as multiple of blockDim. + * @param [in] blockDim block dimensions specified in work-items + * @param [in] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ +hipError_t __hipPushCallConfiguration(dim3 gridDim, + dim3 blockDim, + size_t sharedMem __dparm(0), + hipStream_t stream __dparm(0)); +/** + * @brief Pop configuration of a kernel launch. + * + * @param [out] gridDim grid dimension specified as multiple of blockDim. + * @param [out] blockDim block dimensions specified in work-items + * @param [out] sharedMem Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [out] stream Stream where the kernel should be dispatched. May be 0, in which case the + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + */ +hipError_t __hipPopCallConfiguration(dim3 *gridDim, + dim3 *blockDim, + size_t *sharedMem, + hipStream_t *stream); +/** + * @brief C compliant kernel launch API + * + * @param [in] function_address - kernel stub function pointer. + * @param [in] numBlocks - number of blocks + * @param [in] dimBlocks - dimension of a block + * @param [in] args - kernel arguments + * @param [in] sharedMemBytes - Amount of dynamic shared memory to allocate for this kernel. The + * HIP-Clang compiler provides support for extern shared declarations. + * @param [in] stream - Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns #hipSuccess, #hipErrorInvalidValue, hipInvalidDevice + * + */ +hipError_t hipLaunchKernel(const void* function_address, + dim3 numBlocks, + dim3 dimBlocks, + void** args, + size_t sharedMemBytes __dparm(0), + hipStream_t stream __dparm(0)); +/** + * Copies memory for 2D arrays. + * + * @param pCopy - Parameters for the memory copy + * + * @returns #hipSuccess, #hipErrorInvalidValue + */ +hipError_t hipDrvMemcpy2DUnaligned(const hip_Memcpy2D* pCopy); +//TODO: Move this to hip_ext.h +hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks, + void** args, size_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, int flags); +// doxygen end Clang launch +/** + * @} + */ +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup Textur Texture Management + * @{ + * This section describes the texture management functions of HIP runtime API. + */ +/** + * + * @addtogroup TexturD Texture Management [Deprecated] + * @{ + * @ingroup Texture + * This section describes the deprecated texture management functions of HIP runtime API. + */ +DEPRECATED(DEPRECATED_MSG) +hipError_t hipBindTexture( + size_t* offset, + const textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size __dparm(UINT_MAX)); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipBindTexture2D( + size_t* offset, + const textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipBindTextureToArray( + const textureReference* tex, + hipArray_const_t array, + const hipChannelFormatDesc* desc); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipGetTextureAlignmentOffset( + size_t* offset, + const textureReference* texref); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipUnbindTexture(const textureReference* tex); +// doxygen end deprecated texture management +/** + * @} + */ +hipError_t hipBindTextureToMipmappedArray( + const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + hipError_t hipGetTextureReference( + const textureReference** texref, + const void* symbol); +hipError_t hipCreateTextureObject( + hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const struct hipResourceViewDesc* pResViewDesc); +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); +hipError_t hipGetChannelDesc( + hipChannelFormatDesc* desc, + hipArray_const_t array); +hipError_t hipGetTextureObjectResourceDesc( + hipResourceDesc* pResDesc, + hipTextureObject_t textureObject); +hipError_t hipGetTextureObjectResourceViewDesc( + struct hipResourceViewDesc* pResViewDesc, + hipTextureObject_t textureObject); +hipError_t hipGetTextureObjectTextureDesc( + hipTextureDesc* pTexDesc, + hipTextureObject_t textureObject); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetAddress( + hipDeviceptr_t* dev_ptr, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetAddressMode( + enum hipTextureAddressMode* pam, + const textureReference* texRef, + int dim); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetFilterMode( + enum hipTextureFilterMode* pfm, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetFlags( + unsigned int* pFlags, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetFormat( + hipArray_Format* pFormat, + int* pNumChannels, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetMaxAnisotropy( + int* pmaxAnsio, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapFilterMode( + enum hipTextureFilterMode* pfm, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelBias( + float* pbias, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetMipmapLevelClamp( + float* pminMipmapLevelClamp, + float* pmaxMipmapLevelClamp, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefGetMipMappedArray( + hipMipmappedArray_t* pArray, + const textureReference* texRef); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefSetAddress( + size_t* ByteOffset, + textureReference* texRef, + hipDeviceptr_t dptr, + size_t bytes); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefSetAddress2D( + textureReference* texRef, + const HIP_ARRAY_DESCRIPTOR* desc, + hipDeviceptr_t dptr, + size_t Pitch); +hipError_t hipTexRefSetAddressMode( + textureReference* texRef, + int dim, + enum hipTextureAddressMode am); +hipError_t hipTexRefSetArray( + textureReference* tex, + hipArray_const_t array, + unsigned int flags); +hipError_t hipTexRefSetFilterMode( + textureReference* texRef, + enum hipTextureFilterMode fm); +hipError_t hipTexRefSetFlags( + textureReference* texRef, + unsigned int Flags); +hipError_t hipTexRefSetFormat( + textureReference* texRef, + hipArray_Format fmt, + int NumPackedComponents); +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefSetMaxAnisotropy( + textureReference* texRef, + unsigned int maxAniso); +hipError_t hipTexObjectCreate( + hipTextureObject_t* pTexObject, + const HIP_RESOURCE_DESC* pResDesc, + const HIP_TEXTURE_DESC* pTexDesc, + const HIP_RESOURCE_VIEW_DESC* pResViewDesc); +hipError_t hipTexObjectDestroy( + hipTextureObject_t texObject); +hipError_t hipTexObjectGetResourceDesc( + HIP_RESOURCE_DESC* pResDesc, + hipTextureObject_t texObject); +hipError_t hipTexObjectGetResourceViewDesc( + HIP_RESOURCE_VIEW_DESC* pResViewDesc, + hipTextureObject_t texObject); +hipError_t hipTexObjectGetTextureDesc( + HIP_TEXTURE_DESC* pTexDesc, + hipTextureObject_t texObject); +// doxygen end Texture management +/** + * @} + */ +// The following are not supported. +DEPRECATED(DEPRECATED_MSG) +hipError_t hipTexRefSetBorderColor( + textureReference* texRef, + float* pBorderColor); +hipError_t hipTexRefSetMipmapFilterMode( + textureReference* texRef, + enum hipTextureFilterMode fm); +hipError_t hipTexRefSetMipmapLevelBias( + textureReference* texRef, + float bias); +hipError_t hipTexRefSetMipmapLevelClamp( + textureReference* texRef, + float minMipMapLevelClamp, + float maxMipMapLevelClamp); +hipError_t hipTexRefSetMipmappedArray( + textureReference* texRef, + struct hipMipmappedArray* mipmappedArray, + unsigned int Flags); +hipError_t hipMipmappedArrayCreate( + hipMipmappedArray_t* pHandle, + HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, + unsigned int numMipmapLevels); +hipError_t hipMipmappedArrayDestroy( + hipMipmappedArray_t hMipmappedArray); +hipError_t hipMipmappedArrayGetLevel( + hipArray_t* pLevelArray, + hipMipmappedArray_t hMipMappedArray, + unsigned int level); +/** + * Callback/Activity API + */ +hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg); +hipError_t hipRemoveApiCallback(uint32_t id); +hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg); +hipError_t hipRemoveActivityCallback(uint32_t id); +const char* hipApiName(uint32_t id); +const char* hipKernelNameRef(const hipFunction_t f); +const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream); +int hipGetStreamDeviceId(hipStream_t stream); +#ifdef __cplusplus +/** + * An opaque value that represents a hip graph + */ +class hipGraph; +typedef hipGraph* hipGraph_t; +/** + * An opaque value that represents a hip graph node + */ +class hipGraphNode; +typedef hipGraphNode* hipGraphNode_t; +/** + * An opaque value that represents a hip graph Exec + */ +class hipGraphExec; +typedef hipGraphExec* hipGraphExec_t; +typedef enum hipGraphNodeType { + hipGraphNodeTypeKernel = 1, ///< GPU kernel node + hipGraphNodeTypeMemcpy = 2, ///< Memcpy 3D node + hipGraphNodeTypeMemset = 3, ///< Memset 1D node + hipGraphNodeTypeHost = 4, ///< Host (executable) node + hipGraphNodeTypeGraph = 5, ///< Node which executes an embedded graph + hipGraphNodeTypeEmpty = 6, ///< Empty (no-op) node + hipGraphNodeTypeWaitEvent = 7, ///< External event wait node + hipGraphNodeTypeEventRecord = 8, ///< External event record node + hipGraphNodeTypeMemcpy1D = 9, ///< Memcpy 1D node + hipGraphNodeTypeMemcpyFromSymbol = 10, ///< MemcpyFromSymbol node + hipGraphNodeTypeMemcpyToSymbol = 11, ///< MemcpyToSymbol node + hipGraphNodeTypeCount +} hipGraphNodeType; +typedef void (*hipHostFn_t)(void* userData); +typedef struct hipHostNodeParams { + hipHostFn_t fn; + void* userData; +} hipHostNodeParams; +typedef struct hipKernelNodeParams { + dim3 blockDim; + void** extra; + void* func; + dim3 gridDim; + void** kernelParams; + unsigned int sharedMemBytes; +} hipKernelNodeParams; +typedef struct hipMemsetParams { + void* dst; + unsigned int elementSize; + size_t height; + size_t pitch; + unsigned int value; + size_t width; +} hipMemsetParams; +enum hipGraphExecUpdateResult { + hipGraphExecUpdateSuccess = 0x0, ///< The update succeeded + hipGraphExecUpdateError = 0x1, ///< The update failed for an unexpected reason which is described + ///< in the return value of the function + hipGraphExecUpdateErrorTopologyChanged = 0x2, ///< The update failed because the topology changed + hipGraphExecUpdateErrorNodeTypeChanged = 0x3, ///< The update failed because a node type changed + hipGraphExecUpdateErrorFunctionChanged = + 0x4, ///< The update failed because the function of a kernel node changed + hipGraphExecUpdateErrorParametersChanged = + 0x5, ///< The update failed because the parameters changed in a way that is not supported + hipGraphExecUpdateErrorNotSupported = + 0x6, ///< The update failed because something about the node is not supported + hipGraphExecUpdateErrorUnsupportedFunctionChange = 0x7 +}; +enum hipStreamCaptureMode { + hipStreamCaptureModeGlobal = 0, + hipStreamCaptureModeThreadLocal, + hipStreamCaptureModeRelaxed +}; +enum hipStreamCaptureStatus { + hipStreamCaptureStatusNone = 0, ///< Stream is not capturing + hipStreamCaptureStatusActive, ///< Stream is actively capturing + hipStreamCaptureStatusInvalidated ///< Stream is part of a capture sequence that has been + ///< invalidated, but not terminated +}; +hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode); +hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph); +// Creates a graph. +hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags); +// Destroys a graph. +hipError_t hipGraphDestroy(hipGraph_t graph); +// Destroys an executable graph. +hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec); +// Creates an executable graph from a graph. +hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, + hipGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize); +// Launches an executable graph in a stream. +hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream); +// Creates a kernel execution node and adds it to a graph. +hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipKernelNodeParams* pNodeParams); +// Creates a memcpy node and adds it to a graph. +hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemcpy3DParms* pCopyParams); +// Creates a memset node and adds it to a graph. +hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipMemsetParams* pMemsetParams); +#endif +// doxygen end graph API +/** + * @} + */ +#ifdef __cplusplus +} /* extern "c" */ +#endif +#if USE_PROF_API +#include +#endif +#ifdef __cplusplus +#if defined(__clang__) && defined(__HIP__) +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); +} +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); +} +#endif // defined(__clang__) && defined(__HIP__) +template +hipError_t hipGetSymbolAddress(void** devPtr, const T &symbol) { + return ::hipGetSymbolAddress(devPtr, (const void *)&symbol); +} +template +hipError_t hipGetSymbolSize(size_t* size, const T &symbol) { + return ::hipGetSymbolSize(size, (const void *)&symbol); +} +template +hipError_t hipMemcpyToSymbol(const T& symbol, const void* src, size_t sizeBytes, + size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) { + return ::hipMemcpyToSymbol((const void*)&symbol, src, sizeBytes, offset, kind); +} +template +hipError_t hipMemcpyToSymbolAsync(const T& symbol, const void* src, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyToSymbolAsync((const void*)&symbol, src, sizeBytes, offset, kind, stream); +} +template +hipError_t hipMemcpyFromSymbol(void* dst, const T &symbol, + size_t sizeBytes, size_t offset __dparm(0), + hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { + return ::hipMemcpyFromSymbol(dst, (const void*)&symbol, sizeBytes, offset, kind); +} +template +hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes, size_t offset, + hipMemcpyKind kind, hipStream_t stream __dparm(0)) { + return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream); +} +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk); +} +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk, flags); +} +template +inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) { +return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize,(hipFunction_t)kernel, dynSharedMemPerBlk, blockSizeLimit); +} +template +inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, + blockDim, kernelParams, sharedMemBytes, stream); +} +template +inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags = 0) { + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); +} +template +inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags = 0) { + return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags); +} +hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc); +hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject); +template +DEPRECATED(DEPRECATED_MSG) +static inline hipError_t hipBindTexture(size_t* offset, const struct texture& tex, + const void* devPtr, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &tex.channelDesc, size); +} +template +DEPRECATED(DEPRECATED_MSG) +static inline hipError_t + hipBindTexture(size_t* offset, const struct texture& tex, const void* devPtr, + const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { + return hipBindTexture(offset, &tex, devPtr, &desc, size); +} +template +DEPRECATED(DEPRECATED_MSG) +static inline hipError_t hipBindTexture2D( + size_t *offset, + const struct texture &tex, + const void *devPtr, + size_t width, + size_t height, + size_t pitch) +{ + return hipBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch); +} +template +DEPRECATED(DEPRECATED_MSG) +static inline hipError_t hipBindTexture2D( + size_t *offset, + const struct texture &tex, + const void *devPtr, + const struct hipChannelFormatDesc &desc, + size_t width, + size_t height, + size_t pitch) +{ + return hipBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch); +} +template +DEPRECATED(DEPRECATED_MSG) +static inline hipError_t hipBindTextureToArray( + const struct texture &tex, + hipArray_const_t array) +{ + struct hipChannelFormatDesc desc; + hipError_t err = hipGetChannelDesc(&desc, array); + return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, &desc) : err; +} +template +DEPRECATED(DEPRECATED_MSG) +static inline hipError_t hipBindTextureToArray( + const struct texture &tex, + hipArray_const_t array, + const struct hipChannelFormatDesc &desc) +{ + return hipBindTextureToArray(&tex, array, &desc); +} +template +static inline hipError_t hipBindTextureToMipmappedArray( + const struct texture &tex, + hipMipmappedArray_const_t mipmappedArray) +{ + struct hipChannelFormatDesc desc; + hipArray_t levelArray; + hipError_t err = hipGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0); + if (err != hipSuccess) { + return err; + } + err = hipGetChannelDesc(&desc, levelArray); + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc) : err; +} +template +static inline hipError_t hipBindTextureToMipmappedArray( + const struct texture &tex, + hipMipmappedArray_const_t mipmappedArray, + const struct hipChannelFormatDesc &desc) +{ + return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); +} +template +DEPRECATED(DEPRECATED_MSG) +static inline hipError_t hipUnbindTexture( + const struct texture &tex) +{ + return hipUnbindTexture(&tex); +} +// doxygen end Texture +/** + * @} + */ +#endif // __cplusplus +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif +// doxygen end HIP API +/** + * @} + */ + #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include "hip/nvidia_detail/hip_runtime_api.h" +#include "hip/nvidia_detail/nvidia_hip_runtime_api.h" #else #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif @@ -495,5 +4338,4 @@ static inline hipError_t hipMallocManaged(T** devPtr, size_t size, return hipMallocManaged((void**)devPtr, size, flags); } #endif - #endif diff --git a/projects/hip/include/hip/hip_texture_types.h b/projects/hip/include/hip/hip_texture_types.h index 308da167a0..7a03e32e51 100644 --- a/projects/hip/include/hip/hip_texture_types.h +++ b/projects/hip/include/hip/hip_texture_types.h @@ -25,7 +25,7 @@ THE SOFTWARE. #define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #include #else diff --git a/projects/hip/include/hip/hip_vector_types.h b/projects/hip/include/hip/hip_vector_types.h index 9aa27dbbf1..aba545e5d4 100644 --- a/projects/hip/include/hip/hip_vector_types.h +++ b/projects/hip/include/hip/hip_vector_types.h @@ -30,7 +30,7 @@ THE SOFTWARE. #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #if __cplusplus -#include +#include #endif #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #include diff --git a/projects/hip/include/hip/hiprtc.h b/projects/hip/include/hip/hiprtc.h index 6dd11de600..f52697b942 100644 --- a/projects/hip/include/hip/hiprtc.h +++ b/projects/hip/include/hip/hiprtc.h @@ -24,7 +24,7 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) - #include + #include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #include #else diff --git a/projects/hip/include/hip/library_types.h b/projects/hip/include/hip/library_types.h index 805a385644..a7f5177f5d 100644 --- a/projects/hip/include/hip/library_types.h +++ b/projects/hip/include/hip/library_types.h @@ -26,7 +26,7 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #include "library_types.h" #else diff --git a/projects/hip/include/hip/math_functions.h b/projects/hip/include/hip/math_functions.h index 7488052e73..efb704f876 100644 --- a/projects/hip/include/hip/math_functions.h +++ b/projects/hip/include/hip/math_functions.h @@ -30,7 +30,7 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) //#include #else diff --git a/projects/hip/include/hip/texture_types.h b/projects/hip/include/hip/texture_types.h index 4088d67af4..07cd9833a9 100644 --- a/projects/hip/include/hip/texture_types.h +++ b/projects/hip/include/hip/texture_types.h @@ -26,7 +26,7 @@ THE SOFTWARE. #include #if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) -#include +#include #elif !(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && (defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) #include "texture_types.h" #else diff --git a/projects/hip/packaging/hip-rocclr.txt b/projects/hip/packaging/hip-rocclr.txt index 8632a6f080..f2a65034ed 100644 --- a/projects/hip/packaging/hip-rocclr.txt +++ b/projects/hip/packaging/hip-rocclr.txt @@ -31,7 +31,7 @@ endif() install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) -install(FILES @PROJECT_BINARY_DIR@/rocclr/hip-lang-config.cmake @PROJECT_BINARY_DIR@/rocclr/hip-lang-config-version.cmake DESTINATION lib/cmake/hip-lang) +install(FILES @PROJECT_BINARY_DIR@/src/hipamd/hip-lang-config.cmake @PROJECT_BINARY_DIR@/src/hipamd/hip-lang-config-version.cmake DESTINATION lib/cmake/hip-lang) if(@__HIP_ENABLE_RTC@) install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc-builtins.so.@HIP_LIB_VERSION_MAJOR@.@HIP_LIB_VERSION_MINOR@ DESTINATION lib) diff --git a/projects/hip/rocclr/CMakeLists.txt b/projects/hip/src/hipamd/CMakeLists.txt similarity index 85% rename from projects/hip/rocclr/CMakeLists.txt rename to projects/hip/src/hipamd/CMakeLists.txt index 922a5ffe6a..a63f5b793e 100755 --- a/projects/hip/rocclr/CMakeLists.txt +++ b/projects/hip/src/hipamd/CMakeLists.txt @@ -82,51 +82,52 @@ if(NOT WIN32) endif() target_sources(amdhip64 PRIVATE - cl_gl.cpp - cl_lqdflash_amd.cpp - fixme.cpp - hip_activity.cpp - hip_code_object.cpp - hip_context.cpp - hip_device_runtime.cpp - hip_device.cpp - hip_error.cpp - hip_event.cpp - hip_fatbin.cpp - hip_global.cpp - hip_graph_internal.cpp - hip_graph.cpp - hip_hmm.cpp - hip_intercept.cpp - hip_memory.cpp - hip_module.cpp - hip_peer.cpp - hip_platform.cpp - hip_profile.cpp - hip_rtc.cpp - hip_stream_ops.cpp - hip_stream.cpp - hip_surface.cpp - hip_texture.cpp) + src/cl_gl.cpp + src/cl_lqdflash_amd.cpp + src/fixme.cpp + src/hip_activity.cpp + src/hip_code_object.cpp + src/hip_context.cpp + src/hip_device_runtime.cpp + src/hip_device.cpp + src/hip_error.cpp + src/hip_event.cpp + src/hip_fatbin.cpp + src/hip_global.cpp + src/hip_graph_internal.cpp + src/hip_graph.cpp + src/hip_hmm.cpp + src/hip_intercept.cpp + src/hip_memory.cpp + src/hip_module.cpp + src/hip_peer.cpp + src/hip_platform.cpp + src/hip_profile.cpp + src/hip_rtc.cpp + src/hip_stream_ops.cpp + src/hip_stream.cpp + src/hip_surface.cpp + src/hip_texture.cpp) if(WIN32) target_sources(amdhip64 PRIVATE - cl_d3d9.cpp - cl_d3d10.cpp - cl_d3d11.cpp) + src/cl_d3d9.cpp + src/cl_d3d10.cpp + src/cl_d3d11.cpp) endif() if(BUILD_SHARED_LIBS) if(WIN32) target_sources(amdhip64 PRIVATE amdhip.def) else() - target_link_libraries(amdhip64 PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") - set_target_properties(amdhip64 PROPERTIES LINK_DEPENDS "${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") + target_link_libraries(amdhip64 PRIVATE "-Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/src/hip_hcc.map.in") + set_target_properties(amdhip64 PROPERTIES LINK_DEPENDS "${CMAKE_CURRENT_LIST_DIR}/src/hip_hcc.map.in") endif() endif() target_include_directories(amdhip64 PRIVATE + ${PROJECT_SOURCE_DIR}/src/hipamd/include ${PROJECT_SOURCE_DIR}/include ${PROJECT_BINARY_DIR}/include) @@ -156,7 +157,8 @@ if(__HIP_ENABLE_PCH) # find_package(LLVM) returns the lib/cmake/llvm location. We require the root. set(HIP_LLVM_ROOT "${LLVM_DIR}/../../..") - execute_process(COMMAND sh -c "${CMAKE_CURRENT_SOURCE_DIR}/../bin/hip_embed_pch.sh ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include ${HIP_LLVM_ROOT}" COMMAND_ECHO STDERR RESULT_VARIABLE EMBED_PCH_RC) + # execute_process(COMMAND sh -c "${CMAKE_CURRENT_SOURCE_DIR}/../bin/hip_embed_pch.sh ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include ${HIP_LLVM_ROOT}" COMMAND_ECHO STDERR RESULT_VARIABLE EMBED_PCH_RC) + execute_process(COMMAND sh -c "${CMAKE_CURRENT_SOURCE_DIR}/../../bin/hip_embed_pch.sh ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src/hipamd/include ${HIP_LLVM_ROOT}" COMMAND_ECHO STDERR RESULT_VARIABLE EMBED_PCH_RC) if (EMBED_PCH_RC AND NOT EMBED_PCH_RC EQUAL 0) message(FATAL_ERROR "Failed to embed PCH") endif() @@ -179,7 +181,7 @@ if(__HIP_ENABLE_RTC) set(HIPRTC_LIB_NAME "libhiprtc-builtins.so.${HIP_LIB_VERSION_MAJOR}.${HIP_LIB_VERSION_MINOR}") endif() execute_process( - COMMAND sh -c "mkdir -p ${PROJECT_BINARY_DIR}/lib; ${CMAKE_CURRENT_SOURCE_DIR}/../bin/hip_embed_pch.sh ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include ${HIP_LLVM_ROOT} -r ${PROJECT_BINARY_DIR}/lib/${HIPRTC_LIB_NAME}" + COMMAND sh -c "mkdir -p ${PROJECT_BINARY_DIR}/lib; ${CMAKE_CURRENT_SOURCE_DIR}/../../bin/hip_embed_pch.sh ${PROJECT_BINARY_DIR}/include ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src/hipamd/include ${HIP_LLVM_ROOT} -r ${PROJECT_BINARY_DIR}/lib/${HIPRTC_LIB_NAME}" COMMAND_ECHO STDERR RESULT_VARIABLE EMBED_RTC_RC ) @@ -194,9 +196,9 @@ endif() ############################# # Generate profiling API macros/structures header set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/amd_detail/hip_prof_str.h") -set(PROF_API_HDR "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_runtime_api.h") -set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") -set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") +set(PROF_API_HDR "${PROJECT_SOURCE_DIR}/include/hip/hip_runtime_api.h") +set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src") +set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/src/hip_prof_gen.py") set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") find_package(PythonInterp REQUIRED) diff --git a/projects/hip/rocclr/cmake/FindROCclr.cmake b/projects/hip/src/hipamd/cmake/FindROCclr.cmake similarity index 100% rename from projects/hip/rocclr/cmake/FindROCclr.cmake rename to projects/hip/src/hipamd/cmake/FindROCclr.cmake diff --git a/projects/hip/include/hip/amd_detail/channel_descriptor.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_channel_descriptor.h similarity index 99% rename from projects/hip/include/hip/amd_detail/channel_descriptor.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_channel_descriptor.h index d23f341ef9..7a282a61db 100644 --- a/projects/hip/include/hip/amd_detail/channel_descriptor.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_channel_descriptor.h @@ -24,8 +24,8 @@ THE SOFTWARE. #define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H #include -#include -#include +#include +#include #ifdef __cplusplus diff --git a/projects/hip/include/hip/amd_detail/device_functions.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_device_functions.h similarity index 100% rename from projects/hip/include/hip/amd_detail/device_functions.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_device_functions.h diff --git a/projects/hip/include/hip/amd_detail/driver_types.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_driver_types.h similarity index 100% rename from projects/hip/include/hip/amd_detail/driver_types.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_driver_types.h diff --git a/projects/hip/include/hip/amd_detail/hip_atomic.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_atomic.h similarity index 99% rename from projects/hip/include/hip/amd_detail/hip_atomic.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_atomic.h index 0c4bc80cf6..6a25533beb 100644 --- a/projects/hip/include/hip/amd_detail/hip_atomic.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_atomic.h @@ -1,6 +1,6 @@ -#include "device_functions.h" +#include "amd_device_functions.h" #if __has_builtin(__hip_atomic_compare_exchange_strong) diff --git a/projects/hip/include/hip/amd_detail/hip_common.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_common.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hip_common.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_common.h diff --git a/projects/hip/include/hip/amd_detail/hip_complex.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_complex.h similarity index 99% rename from projects/hip/include/hip/amd_detail/hip_complex.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_complex.h index db312780c1..214172cd5e 100644 --- a/projects/hip/include/hip/amd_detail/hip_complex.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_complex.h @@ -23,7 +23,7 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H -#include "hip/amd_detail/hip_vector_types.h" +#include "hip/amd_detail/amd_hip_vector_types.h" #if defined(__HIPCC_RTC__) #define __HOST_DEVICE__ __device__ diff --git a/projects/hip/include/hip/amd_detail/hip_cooperative_groups.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hip_cooperative_groups.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h diff --git a/projects/hip/include/hip/amd_detail/hip_fp16.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_fp16.h similarity index 99% rename from projects/hip/include/hip/amd_detail/hip_fp16.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_fp16.h index fb344aa7d5..2b553bada9 100644 --- a/projects/hip/include/hip/amd_detail/hip_fp16.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_fp16.h @@ -24,7 +24,8 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H -#include +#include + #include "hip/amd_detail/host_defines.h" #if defined(__HIPCC_RTC__) #define __HOST_DEVICE__ __device__ @@ -64,7 +65,7 @@ THE SOFTWARE. #if defined(__cplusplus) #include "hip_fp16_math_fwd.h" - #include "hip_vector_types.h" + #include "amd_hip_vector_types.h" #include "host_defines.h" namespace std diff --git a/projects/hip/include/hip/amd_detail/hip_runtime.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_runtime.h similarity index 98% rename from projects/hip/include/hip/amd_detail/hip_runtime.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_runtime.h index a3db57ffe3..15952722b8 100644 --- a/projects/hip/include/hip/amd_detail/hip_runtime.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_runtime.h @@ -29,7 +29,7 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_H -#include +#include //--- // Top part of file can be compiled with any compiler @@ -67,10 +67,10 @@ extern int HIP_TRACE_API; #ifdef __cplusplus #include #endif -#include +#include #include -#include -#include +#include +#include #include #include @@ -359,7 +359,7 @@ extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint); #define hipGridDim_y (__ockl_get_num_groups(1)) #define hipGridDim_z (__ockl_get_num_groups(2)) -#include +#include #if __HIP_HCC_COMPAT_MODE__ // Define HCC work item functions in terms of HIP builtin variables. diff --git a/projects/hip/include/hip/amd_detail/hip_surface_types.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_surface_types.h similarity index 97% rename from projects/hip/include/hip/amd_detail/hip_surface_types.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_surface_types.h index 2c53d19558..27bb84ad39 100644 --- a/projects/hip/include/hip/amd_detail/hip_surface_types.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_surface_types.h @@ -28,7 +28,7 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_SURFACE_TYPES_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_SURFACE_TYPES_H -#include +#include /** * An opaque value that represents a hip surface object diff --git a/projects/hip/include/hip/amd_detail/hip_texture_types.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_texture_types.h similarity index 97% rename from projects/hip/include/hip/amd_detail/hip_texture_types.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_texture_types.h index 0dc40ec0ed..f53362ae9e 100644 --- a/projects/hip/include/hip/amd_detail/hip_texture_types.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_texture_types.h @@ -35,9 +35,9 @@ THE SOFTWARE. *******************************************************************************/ #if !defined(__HIPCC_RTC__) #include -#include +#include #endif // !defined(__HIPCC_RTC__) -#include +#include #if __cplusplus diff --git a/projects/hip/include/hip/amd_detail/hip_vector_types.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_vector_types.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hip_vector_types.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hip_vector_types.h diff --git a/projects/hip/include/hip/amd_detail/hiprtc.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_hiprtc.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hiprtc.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_hiprtc.h diff --git a/projects/hip/include/hip/amd_detail/library_types.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_library_types.h similarity index 100% rename from projects/hip/include/hip/amd_detail/library_types.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_library_types.h diff --git a/projects/hip/include/hip/amd_detail/math_functions.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_math_functions.h similarity index 99% rename from projects/hip/include/hip/amd_detail/math_functions.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_math_functions.h index 2cbee4829a..24f6433b53 100644 --- a/projects/hip/include/hip/amd_detail/math_functions.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_math_functions.h @@ -23,7 +23,7 @@ THE SOFTWARE. #pragma once #include "hip_fp16_math_fwd.h" -#include "hip_vector_types.h" +#include "amd_hip_vector_types.h" #include "math_fwd.h" #include @@ -1499,4 +1499,4 @@ inline _Float16 pow(_Float16 base, int iexp) { // For backward compatibility. // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros // defined after including math_functions.h. -#include +#include diff --git a/projects/hip/include/hip/amd_detail/surface_functions.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_surface_functions.h similarity index 97% rename from projects/hip/include/hip/amd_detail/surface_functions.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_surface_functions.h index 51c32bf85d..9e595b6e1a 100644 --- a/projects/hip/include/hip/amd_detail/surface_functions.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_surface_functions.h @@ -23,7 +23,7 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H -#include +#include #define __SURFACE_FUNCTIONS_DECL__ static inline __device__ template diff --git a/projects/hip/include/hip/amd_detail/texture_types.h b/projects/hip/src/hipamd/include/hip/amd_detail/amd_texture_types.h similarity index 98% rename from projects/hip/include/hip/amd_detail/texture_types.h rename to projects/hip/src/hipamd/include/hip/amd_detail/amd_texture_types.h index 3cfb7dac9f..f9f0290e5b 100644 --- a/projects/hip/include/hip/amd_detail/texture_types.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/amd_texture_types.h @@ -24,7 +24,7 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_TEXTURE_TYPES_H #define HIP_INCLUDE_HIP_AMD_DETAIL_TEXTURE_TYPES_H -#include +#include #define hipTextureType1D 0x01 #define hipTextureType2D 0x02 diff --git a/projects/hip/include/hip/amd_detail/concepts.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/concepts.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/concepts.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/concepts.hpp diff --git a/projects/hip/include/hip/amd_detail/device_library_decls.h b/projects/hip/src/hipamd/include/hip/amd_detail/device_library_decls.h similarity index 100% rename from projects/hip/include/hip/amd_detail/device_library_decls.h rename to projects/hip/src/hipamd/include/hip/amd_detail/device_library_decls.h diff --git a/projects/hip/include/hip/amd_detail/functional_grid_launch.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/functional_grid_launch.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/functional_grid_launch.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/functional_grid_launch.hpp diff --git a/projects/hip/include/hip/amd_detail/grid_launch.h b/projects/hip/src/hipamd/include/hip/amd_detail/grid_launch.h similarity index 100% rename from projects/hip/include/hip/amd_detail/grid_launch.h rename to projects/hip/src/hipamd/include/hip/amd_detail/grid_launch.h diff --git a/projects/hip/include/hip/amd_detail/grid_launch.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/grid_launch.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/grid_launch.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/grid_launch.hpp diff --git a/projects/hip/include/hip/amd_detail/grid_launch_GGL.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/grid_launch_GGL.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/grid_launch_GGL.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/grid_launch_GGL.hpp diff --git a/projects/hip/include/hip/amd_detail/helpers.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/helpers.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/helpers.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/helpers.hpp diff --git a/projects/hip/include/hip/amd_detail/hip_cooperative_groups_helper.h b/projects/hip/src/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h similarity index 98% rename from projects/hip/include/hip/amd_detail/hip_cooperative_groups_helper.h rename to projects/hip/src/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h index 90463485b6..6ef96e9a17 100644 --- a/projects/hip/include/hip/amd_detail/hip_cooperative_groups_helper.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h @@ -32,8 +32,7 @@ THE SOFTWARE. #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H #if __cplusplus -#include -#include +#include #if !defined(__align__) #define __align__(x) __attribute__((aligned(x))) diff --git a/projects/hip/include/hip/amd_detail/hip_fp16_gcc.h b/projects/hip/src/hipamd/include/hip/amd_detail/hip_fp16_gcc.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hip_fp16_gcc.h rename to projects/hip/src/hipamd/include/hip/amd_detail/hip_fp16_gcc.h diff --git a/projects/hip/include/hip/amd_detail/hip_fp16_math_fwd.h b/projects/hip/src/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hip_fp16_math_fwd.h rename to projects/hip/src/hipamd/include/hip/amd_detail/hip_fp16_math_fwd.h diff --git a/projects/hip/include/hip/amd_detail/hip_ldg.h b/projects/hip/src/hipamd/include/hip/amd_detail/hip_ldg.h similarity index 99% rename from projects/hip/include/hip/amd_detail/hip_ldg.h rename to projects/hip/src/hipamd/include/hip/amd_detail/hip_ldg.h index 4b8b1227a1..91e76e6564 100644 --- a/projects/hip/include/hip/amd_detail/hip_ldg.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/hip_ldg.h @@ -24,7 +24,7 @@ THE SOFTWARE. #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_LDG_H #if __HIP_CLANG_ONLY__ -#include "hip_vector_types.h" +#include "amd_hip_vector_types.h" #include "host_defines.h" __device__ inline static char __ldg(const char* ptr) { return *ptr; } diff --git a/projects/hip/include/hip/amd_detail/hip_memory.h b/projects/hip/src/hipamd/include/hip/amd_detail/hip_memory.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hip_memory.h rename to projects/hip/src/hipamd/include/hip/amd_detail/hip_memory.h diff --git a/projects/hip/include/hip/amd_detail/hip_runtime_prof.h b/projects/hip/src/hipamd/include/hip/amd_detail/hip_runtime_prof.h similarity index 100% rename from projects/hip/include/hip/amd_detail/hip_runtime_prof.h rename to projects/hip/src/hipamd/include/hip/amd_detail/hip_runtime_prof.h diff --git a/projects/hip/include/hip/amd_detail/host_defines.h b/projects/hip/src/hipamd/include/hip/amd_detail/host_defines.h similarity index 100% rename from projects/hip/include/hip/amd_detail/host_defines.h rename to projects/hip/src/hipamd/include/hip/amd_detail/host_defines.h diff --git a/projects/hip/include/hip/amd_detail/hsa_helpers.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/hsa_helpers.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/hsa_helpers.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/hsa_helpers.hpp diff --git a/projects/hip/include/hip/amd_detail/llvm_intrinsics.h b/projects/hip/src/hipamd/include/hip/amd_detail/llvm_intrinsics.h similarity index 100% rename from projects/hip/include/hip/amd_detail/llvm_intrinsics.h rename to projects/hip/src/hipamd/include/hip/amd_detail/llvm_intrinsics.h diff --git a/projects/hip/include/hip/amd_detail/macro_based_grid_launch.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/macro_based_grid_launch.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/macro_based_grid_launch.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/macro_based_grid_launch.hpp diff --git a/projects/hip/include/hip/amd_detail/math_fwd.h b/projects/hip/src/hipamd/include/hip/amd_detail/math_fwd.h similarity index 100% rename from projects/hip/include/hip/amd_detail/math_fwd.h rename to projects/hip/src/hipamd/include/hip/amd_detail/math_fwd.h diff --git a/projects/hip/include/hip/amd_detail/ockl_image.h b/projects/hip/src/hipamd/include/hip/amd_detail/ockl_image.h similarity index 100% rename from projects/hip/include/hip/amd_detail/ockl_image.h rename to projects/hip/src/hipamd/include/hip/amd_detail/ockl_image.h diff --git a/projects/hip/include/hip/amd_detail/program_state.hpp b/projects/hip/src/hipamd/include/hip/amd_detail/program_state.hpp similarity index 100% rename from projects/hip/include/hip/amd_detail/program_state.hpp rename to projects/hip/src/hipamd/include/hip/amd_detail/program_state.hpp diff --git a/projects/hip/include/hip/amd_detail/texture_fetch_functions.h b/projects/hip/src/hipamd/include/hip/amd_detail/texture_fetch_functions.h similarity index 100% rename from projects/hip/include/hip/amd_detail/texture_fetch_functions.h rename to projects/hip/src/hipamd/include/hip/amd_detail/texture_fetch_functions.h diff --git a/projects/hip/include/hip/amd_detail/texture_functions.h b/projects/hip/src/hipamd/include/hip/amd_detail/texture_functions.h similarity index 99% rename from projects/hip/include/hip/amd_detail/texture_functions.h rename to projects/hip/src/hipamd/include/hip/amd_detail/texture_functions.h index 5da388ce3c..5c2da3ef17 100644 --- a/projects/hip/include/hip/amd_detail/texture_functions.h +++ b/projects/hip/src/hipamd/include/hip/amd_detail/texture_functions.h @@ -22,8 +22,8 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_TEXTURE_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_TEXTURE_FUNCTIONS_H -#include -#include +#include +#include #pragma push_macro("TYPEDEF_VECTOR_VALUE_TYPE") #define TYPEDEF_VECTOR_VALUE_TYPE(SCALAR_TYPE) \ diff --git a/projects/hip/include/hip/amd_detail/texture_indirect_functions.h b/projects/hip/src/hipamd/include/hip/amd_detail/texture_indirect_functions.h similarity index 100% rename from projects/hip/include/hip/amd_detail/texture_indirect_functions.h rename to projects/hip/src/hipamd/include/hip/amd_detail/texture_indirect_functions.h diff --git a/projects/hip/include/hip/hcc_detail b/projects/hip/src/hipamd/include/hip/hcc_detail similarity index 100% rename from projects/hip/include/hip/hcc_detail rename to projects/hip/src/hipamd/include/hip/hcc_detail diff --git a/projects/hip/src/hipamd/include/hip/nvcc_detail b/projects/hip/src/hipamd/include/hip/nvcc_detail new file mode 120000 index 0000000000..e02ee85e4c --- /dev/null +++ b/projects/hip/src/hipamd/include/hip/nvcc_detail @@ -0,0 +1 @@ +nvidia_detail \ No newline at end of file diff --git a/projects/hip/include/hip/nvidia_detail/channel_descriptor.h b/projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h similarity index 100% rename from projects/hip/include/hip/nvidia_detail/channel_descriptor.h rename to projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h diff --git a/projects/hip/include/hip/nvidia_detail/hip_complex.h b/projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h similarity index 100% rename from projects/hip/include/hip/nvidia_detail/hip_complex.h rename to projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h diff --git a/projects/hip/include/hip/nvidia_detail/hip_cooperative_groups.h b/projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h similarity index 100% rename from projects/hip/include/hip/nvidia_detail/hip_cooperative_groups.h rename to projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h diff --git a/projects/hip/include/hip/nvidia_detail/hip_runtime.h b/projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h similarity index 100% rename from projects/hip/include/hip/nvidia_detail/hip_runtime.h rename to projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h diff --git a/projects/hip/include/hip/nvidia_detail/hip_runtime_api.h b/projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h similarity index 100% rename from projects/hip/include/hip/nvidia_detail/hip_runtime_api.h rename to projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h diff --git a/projects/hip/include/hip/nvidia_detail/hip_texture_types.h b/projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h similarity index 100% rename from projects/hip/include/hip/nvidia_detail/hip_texture_types.h rename to projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h diff --git a/projects/hip/include/hip/nvidia_detail/hiprtc.h b/projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h similarity index 100% rename from projects/hip/include/hip/nvidia_detail/hiprtc.h rename to projects/hip/src/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h diff --git a/projects/hip/rocclr/amd_hsa_elf.hpp b/projects/hip/src/hipamd/src/amd_hsa_elf.hpp similarity index 100% rename from projects/hip/rocclr/amd_hsa_elf.hpp rename to projects/hip/src/hipamd/src/amd_hsa_elf.hpp diff --git a/projects/hip/rocclr/cl_d3d10.cpp b/projects/hip/src/hipamd/src/cl_d3d10.cpp similarity index 100% rename from projects/hip/rocclr/cl_d3d10.cpp rename to projects/hip/src/hipamd/src/cl_d3d10.cpp diff --git a/projects/hip/rocclr/cl_d3d11.cpp b/projects/hip/src/hipamd/src/cl_d3d11.cpp similarity index 100% rename from projects/hip/rocclr/cl_d3d11.cpp rename to projects/hip/src/hipamd/src/cl_d3d11.cpp diff --git a/projects/hip/rocclr/cl_d3d9.cpp b/projects/hip/src/hipamd/src/cl_d3d9.cpp similarity index 100% rename from projects/hip/rocclr/cl_d3d9.cpp rename to projects/hip/src/hipamd/src/cl_d3d9.cpp diff --git a/projects/hip/rocclr/cl_gl.cpp b/projects/hip/src/hipamd/src/cl_gl.cpp similarity index 100% rename from projects/hip/rocclr/cl_gl.cpp rename to projects/hip/src/hipamd/src/cl_gl.cpp diff --git a/projects/hip/rocclr/cl_gl_amd.hpp b/projects/hip/src/hipamd/src/cl_gl_amd.hpp similarity index 100% rename from projects/hip/rocclr/cl_gl_amd.hpp rename to projects/hip/src/hipamd/src/cl_gl_amd.hpp diff --git a/projects/hip/rocclr/cl_lqdflash_amd.cpp b/projects/hip/src/hipamd/src/cl_lqdflash_amd.cpp similarity index 100% rename from projects/hip/rocclr/cl_lqdflash_amd.cpp rename to projects/hip/src/hipamd/src/cl_lqdflash_amd.cpp diff --git a/projects/hip/rocclr/cl_lqdflash_amd.h b/projects/hip/src/hipamd/src/cl_lqdflash_amd.h similarity index 100% rename from projects/hip/rocclr/cl_lqdflash_amd.h rename to projects/hip/src/hipamd/src/cl_lqdflash_amd.h diff --git a/projects/hip/rocclr/fixme.cpp b/projects/hip/src/hipamd/src/fixme.cpp similarity index 100% rename from projects/hip/rocclr/fixme.cpp rename to projects/hip/src/hipamd/src/fixme.cpp diff --git a/projects/hip/rocclr/hip_activity.cpp b/projects/hip/src/hipamd/src/hip_activity.cpp similarity index 100% rename from projects/hip/rocclr/hip_activity.cpp rename to projects/hip/src/hipamd/src/hip_activity.cpp diff --git a/projects/hip/rocclr/hip_code_object.cpp b/projects/hip/src/hipamd/src/hip_code_object.cpp similarity index 99% rename from projects/hip/rocclr/hip_code_object.cpp rename to projects/hip/src/hipamd/src/hip_code_object.cpp index a14cc8bcc6..8bd790f1f3 100755 --- a/projects/hip/rocclr/hip_code_object.cpp +++ b/projects/hip/src/hipamd/src/hip_code_object.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. #include -#include +#include #include "hip/hip_runtime_api.h" #include "hip/hip_runtime.h" #include "hip_internal.hpp" diff --git a/projects/hip/rocclr/hip_code_object.hpp b/projects/hip/src/hipamd/src/hip_code_object.hpp similarity index 100% rename from projects/hip/rocclr/hip_code_object.hpp rename to projects/hip/src/hipamd/src/hip_code_object.hpp diff --git a/projects/hip/rocclr/hip_context.cpp b/projects/hip/src/hipamd/src/hip_context.cpp similarity index 100% rename from projects/hip/rocclr/hip_context.cpp rename to projects/hip/src/hipamd/src/hip_context.cpp diff --git a/projects/hip/rocclr/hip_conversions.hpp b/projects/hip/src/hipamd/src/hip_conversions.hpp similarity index 99% rename from projects/hip/rocclr/hip_conversions.hpp rename to projects/hip/src/hipamd/src/hip_conversions.hpp index 35948703ad..03d6320b60 100644 --- a/projects/hip/rocclr/hip_conversions.hpp +++ b/projects/hip/src/hipamd/src/hip_conversions.hpp @@ -22,8 +22,8 @@ THE SOFTWARE. #pragma once -#include -#include +#include +#include namespace hip { diff --git a/projects/hip/rocclr/hip_device.cpp b/projects/hip/src/hipamd/src/hip_device.cpp similarity index 100% rename from projects/hip/rocclr/hip_device.cpp rename to projects/hip/src/hipamd/src/hip_device.cpp diff --git a/projects/hip/rocclr/hip_device_runtime.cpp b/projects/hip/src/hipamd/src/hip_device_runtime.cpp similarity index 100% rename from projects/hip/rocclr/hip_device_runtime.cpp rename to projects/hip/src/hipamd/src/hip_device_runtime.cpp diff --git a/projects/hip/rocclr/hip_error.cpp b/projects/hip/src/hipamd/src/hip_error.cpp similarity index 100% rename from projects/hip/rocclr/hip_error.cpp rename to projects/hip/src/hipamd/src/hip_error.cpp diff --git a/projects/hip/rocclr/hip_event.cpp b/projects/hip/src/hipamd/src/hip_event.cpp similarity index 100% rename from projects/hip/rocclr/hip_event.cpp rename to projects/hip/src/hipamd/src/hip_event.cpp diff --git a/projects/hip/rocclr/hip_event.hpp b/projects/hip/src/hipamd/src/hip_event.hpp similarity index 100% rename from projects/hip/rocclr/hip_event.hpp rename to projects/hip/src/hipamd/src/hip_event.hpp diff --git a/projects/hip/rocclr/hip_fatbin.cpp b/projects/hip/src/hipamd/src/hip_fatbin.cpp similarity index 100% rename from projects/hip/rocclr/hip_fatbin.cpp rename to projects/hip/src/hipamd/src/hip_fatbin.cpp diff --git a/projects/hip/rocclr/hip_fatbin.hpp b/projects/hip/src/hipamd/src/hip_fatbin.hpp similarity index 100% rename from projects/hip/rocclr/hip_fatbin.hpp rename to projects/hip/src/hipamd/src/hip_fatbin.hpp diff --git a/projects/hip/rocclr/hip_formatting.hpp b/projects/hip/src/hipamd/src/hip_formatting.hpp similarity index 100% rename from projects/hip/rocclr/hip_formatting.hpp rename to projects/hip/src/hipamd/src/hip_formatting.hpp diff --git a/projects/hip/rocclr/hip_global.cpp b/projects/hip/src/hipamd/src/hip_global.cpp similarity index 100% rename from projects/hip/rocclr/hip_global.cpp rename to projects/hip/src/hipamd/src/hip_global.cpp diff --git a/projects/hip/rocclr/hip_global.hpp b/projects/hip/src/hipamd/src/hip_global.hpp similarity index 100% rename from projects/hip/rocclr/hip_global.hpp rename to projects/hip/src/hipamd/src/hip_global.hpp diff --git a/projects/hip/rocclr/hip_graph.cpp b/projects/hip/src/hipamd/src/hip_graph.cpp similarity index 100% rename from projects/hip/rocclr/hip_graph.cpp rename to projects/hip/src/hipamd/src/hip_graph.cpp diff --git a/projects/hip/rocclr/hip_graph_capture.hpp b/projects/hip/src/hipamd/src/hip_graph_capture.hpp similarity index 100% rename from projects/hip/rocclr/hip_graph_capture.hpp rename to projects/hip/src/hipamd/src/hip_graph_capture.hpp diff --git a/projects/hip/rocclr/hip_graph_helper.hpp b/projects/hip/src/hipamd/src/hip_graph_helper.hpp similarity index 100% rename from projects/hip/rocclr/hip_graph_helper.hpp rename to projects/hip/src/hipamd/src/hip_graph_helper.hpp diff --git a/projects/hip/rocclr/hip_graph_internal.cpp b/projects/hip/src/hipamd/src/hip_graph_internal.cpp similarity index 100% rename from projects/hip/rocclr/hip_graph_internal.cpp rename to projects/hip/src/hipamd/src/hip_graph_internal.cpp diff --git a/projects/hip/rocclr/hip_graph_internal.hpp b/projects/hip/src/hipamd/src/hip_graph_internal.hpp similarity index 100% rename from projects/hip/rocclr/hip_graph_internal.hpp rename to projects/hip/src/hipamd/src/hip_graph_internal.hpp diff --git a/projects/hip/rocclr/hip_hcc.def.in b/projects/hip/src/hipamd/src/hip_hcc.def.in similarity index 100% rename from projects/hip/rocclr/hip_hcc.def.in rename to projects/hip/src/hipamd/src/hip_hcc.def.in diff --git a/projects/hip/rocclr/hip_hcc.map.in b/projects/hip/src/hipamd/src/hip_hcc.map.in similarity index 100% rename from projects/hip/rocclr/hip_hcc.map.in rename to projects/hip/src/hipamd/src/hip_hcc.map.in diff --git a/projects/hip/rocclr/hip_hcc.rc b/projects/hip/src/hipamd/src/hip_hcc.rc similarity index 100% rename from projects/hip/rocclr/hip_hcc.rc rename to projects/hip/src/hipamd/src/hip_hcc.rc diff --git a/projects/hip/rocclr/hip_hmm.cpp b/projects/hip/src/hipamd/src/hip_hmm.cpp similarity index 100% rename from projects/hip/rocclr/hip_hmm.cpp rename to projects/hip/src/hipamd/src/hip_hmm.cpp diff --git a/projects/hip/rocclr/hip_intercept.cpp b/projects/hip/src/hipamd/src/hip_intercept.cpp similarity index 100% rename from projects/hip/rocclr/hip_intercept.cpp rename to projects/hip/src/hipamd/src/hip_intercept.cpp diff --git a/projects/hip/rocclr/hip_internal.hpp b/projects/hip/src/hipamd/src/hip_internal.hpp similarity index 100% rename from projects/hip/rocclr/hip_internal.hpp rename to projects/hip/src/hipamd/src/hip_internal.hpp diff --git a/projects/hip/rocclr/hip_memory.cpp b/projects/hip/src/hipamd/src/hip_memory.cpp similarity index 100% rename from projects/hip/rocclr/hip_memory.cpp rename to projects/hip/src/hipamd/src/hip_memory.cpp diff --git a/projects/hip/rocclr/hip_module.cpp b/projects/hip/src/hipamd/src/hip_module.cpp similarity index 100% rename from projects/hip/rocclr/hip_module.cpp rename to projects/hip/src/hipamd/src/hip_module.cpp diff --git a/projects/hip/rocclr/hip_peer.cpp b/projects/hip/src/hipamd/src/hip_peer.cpp similarity index 100% rename from projects/hip/rocclr/hip_peer.cpp rename to projects/hip/src/hipamd/src/hip_peer.cpp diff --git a/projects/hip/rocclr/hip_platform.cpp b/projects/hip/src/hipamd/src/hip_platform.cpp similarity index 99% rename from projects/hip/rocclr/hip_platform.cpp rename to projects/hip/src/hipamd/src/hip_platform.cpp index 5fca5f21f2..80ffb2c657 100755 --- a/projects/hip/rocclr/hip_platform.cpp +++ b/projects/hip/src/hipamd/src/hip_platform.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include -#include +#include #include "hip_platform.hpp" #include "hip_internal.hpp" #include "platform/program.hpp" diff --git a/projects/hip/rocclr/hip_platform.hpp b/projects/hip/src/hipamd/src/hip_platform.hpp similarity index 100% rename from projects/hip/rocclr/hip_platform.hpp rename to projects/hip/src/hipamd/src/hip_platform.hpp diff --git a/projects/hip/rocclr/hip_prof_api.h b/projects/hip/src/hipamd/src/hip_prof_api.h similarity index 100% rename from projects/hip/rocclr/hip_prof_api.h rename to projects/hip/src/hipamd/src/hip_prof_api.h diff --git a/projects/hip/rocclr/hip_prof_gen.py b/projects/hip/src/hipamd/src/hip_prof_gen.py similarity index 100% rename from projects/hip/rocclr/hip_prof_gen.py rename to projects/hip/src/hipamd/src/hip_prof_gen.py diff --git a/projects/hip/rocclr/hip_profile.cpp b/projects/hip/src/hipamd/src/hip_profile.cpp similarity index 100% rename from projects/hip/rocclr/hip_profile.cpp rename to projects/hip/src/hipamd/src/hip_profile.cpp diff --git a/projects/hip/rocclr/hip_rtc.cpp b/projects/hip/src/hipamd/src/hip_rtc.cpp similarity index 100% rename from projects/hip/rocclr/hip_rtc.cpp rename to projects/hip/src/hipamd/src/hip_rtc.cpp diff --git a/projects/hip/rocclr/hip_stream.cpp b/projects/hip/src/hipamd/src/hip_stream.cpp similarity index 100% rename from projects/hip/rocclr/hip_stream.cpp rename to projects/hip/src/hipamd/src/hip_stream.cpp diff --git a/projects/hip/rocclr/hip_stream_ops.cpp b/projects/hip/src/hipamd/src/hip_stream_ops.cpp similarity index 100% rename from projects/hip/rocclr/hip_stream_ops.cpp rename to projects/hip/src/hipamd/src/hip_stream_ops.cpp diff --git a/projects/hip/rocclr/hip_surface.cpp b/projects/hip/src/hipamd/src/hip_surface.cpp similarity index 96% rename from projects/hip/rocclr/hip_surface.cpp rename to projects/hip/src/hipamd/src/hip_surface.cpp index c88e3ea3cf..c2b2525fac 100644 --- a/projects/hip/rocclr/hip_surface.cpp +++ b/projects/hip/src/hipamd/src/hip_surface.cpp @@ -21,7 +21,7 @@ #include #include "hip_internal.hpp" -#include +#include hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc) { diff --git a/projects/hip/rocclr/hip_texture.cpp b/projects/hip/src/hipamd/src/hip_texture.cpp similarity index 99% rename from projects/hip/rocclr/hip_texture.cpp rename to projects/hip/src/hipamd/src/hip_texture.cpp index 304cc818a1..2b97cea6d5 100755 --- a/projects/hip/rocclr/hip_texture.cpp +++ b/projects/hip/src/hipamd/src/hip_texture.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include -#include +#include #include "hip_internal.hpp" #include "hip_platform.hpp" #include "hip_conversions.hpp" diff --git a/projects/hip/rocclr/hiprtc_internal.hpp b/projects/hip/src/hipamd/src/hiprtc_internal.hpp similarity index 100% rename from projects/hip/rocclr/hiprtc_internal.hpp rename to projects/hip/src/hipamd/src/hiprtc_internal.hpp diff --git a/projects/hip/rocclr/trace_helper.h b/projects/hip/src/hipamd/src/trace_helper.h similarity index 100% rename from projects/hip/rocclr/trace_helper.h rename to projects/hip/src/hipamd/src/trace_helper.h