Remove hip-hcc codes: Part four

Remove __HCC__, __HCC_ONLY__, __HCC_CPP__, __HCC_C__,
__HCC_OR_HIP_CLANG__, __HIP_ROCclr__ and their guarded codes.

Remove Hcc codes from directed_tests and samples.

Remove __HIP_PLATFORM_HCC__ and __HIP_PLATFORM_NVCC__ from
some files where they are not necessary.

Add deprecation notice.

Change-Id: I1ae467eafd749d6c25bca204c1724b026be21fce
This commit is contained in:
Tao Sang
2021-01-04 19:58:20 -05:00
zatwierdzone przez Tao Sang
rodzic d1c921ffca
commit b34dd95124
61 zmienionych plików z 151 dodań i 1739 usunięć
-6
Wyświetl plik
@@ -112,10 +112,6 @@ $HSA_PATH = $hipvars::HSA_PATH;
$HIP_ROCCLR_HOME = $hipvars::HIP_ROCCLR_HOME;
# If using ROCclr runtime, need to find HIP_ROCCLR_HOME
if ($HIP_PLATFORM eq "amd") {
$HIPCXXFLAGS .= "-D__HIP_ROCclr__";
$HIPCFLAGS .= "-D__HIP_ROCclr__";
}
if (!defined $DEVICE_LIB_PATH and -e "$HIP_ROCCLR_HOME/lib/bitcode") {
$DEVICE_LIB_PATH = "$HIP_ROCCLR_HOME/lib/bitcode";
@@ -203,8 +199,6 @@ if ($HIP_PLATFORM eq "amd") {
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
$HIPCXXFLAGS .= " -isystem $HSA_PATH/include";
$HIPCFLAGS .= " -isystem $HSA_PATH/include";
$HIPCXXFLAGS .= " -D__HIP_ROCclr__";
$HIPCFLAGS .= " -D__HIP_ROCclr__";
} elsif ($HIP_PLATFORM eq "nvidia") {
$CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda';
+3 -5
Wyświetl plik
@@ -48,13 +48,11 @@ if ($HIP_COMPILER eq "clang") {
$HIP_CLANG_VERSION=~/.*clang version (\S+).*/;
$HIP_CLANG_VERSION=$1;
$CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__= -I$HIP_PATH/include -I$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION -I$HSA_PATH/include";
}
if ($HIP_RUNTIME eq "rocclr") {
$CPP_CONFIG .= " -D__HIP_ROCclr__";
$CPP_CONFIG = " -D__HIP_PLATFORM_AMD__= -I$HIP_PATH/include -I$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION -I$HSA_PATH/include";
}
if ($HIP_PLATFORM eq "nvidia") {
$CPP_CONFIG = " -D__HIP_PLATFORM_NVCC__= -D__HIP_PLATFORM_NVIDIA__= -I$HIP_PATH/include -I$CUDA_PATH/include";
$CPP_CONFIG = " -D__HIP_PLATFORM_NVIDIA__= -I$HIP_PATH/include -I$CUDA_PATH/include";
};
if ($p_help) {
+13 -35
Wyświetl plik
@@ -7,20 +7,18 @@ include(CheckCXXCompilerFlag)
###############################################################################
# User defined flags
set(HIP_HIPCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HIPCC")
set(HIP_HCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HCC")
set(HIP_CLANG_FLAGS "" CACHE STRING "Semicolon delimited flags for CLANG")
set(HIP_NVCC_FLAGS "" CACHE STRING "Semicolon delimted flags for NVCC")
mark_as_advanced(HIP_HIPCC_FLAGS HIP_HCC_FLAGS HIP_CLANG_FLAGS HIP_NVCC_FLAGS)
mark_as_advanced(HIP_HIPCC_FLAGS HIP_CLANG_FLAGS HIP_NVCC_FLAGS)
set(_hip_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
list(REMOVE_DUPLICATES _hip_configuration_types)
foreach(config ${_hip_configuration_types})
string(TOUPPER ${config} config_upper)
set(HIP_HIPCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HIPCC")
set(HIP_HCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HCC")
set(HIP_CLANG_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for CLANG")
set(HIP_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for NVCC")
mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_HCC_FLAGS_${config_upper} HIP_CLANG_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper})
mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_CLANG_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper})
endforeach()
option(HIP_HOST_COMPILATION_CPP "Host code compilation mode" ON)
option(HIP_VERBOSE_BUILD "Print out the commands run while compiling the HIP source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)
@@ -211,11 +209,6 @@ if("${HIP_COMPILER}" STREQUAL "nvcc")
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <LINK_LIBRARIES> -shared" )
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
elseif("${HIP_COMPILER}" STREQUAL "hcc")
# Set the CMake Flags to use the hcc Compiler.
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <LINK_LIBRARIES> -shared" )
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
elseif("${HIP_COMPILER}" STREQUAL "clang")
#Set HIP_CLANG_PATH
if("x${HIP_CLANG_PATH}" STREQUAL "x")
@@ -290,13 +283,11 @@ hip_find_helper_file(run_hipcc cmake)
###############################################################################
macro(HIP_RESET_FLAGS)
unset(HIP_HIPCC_FLAGS)
unset(HIP_HCC_FLAGS)
unset(HIP_CLANG_FLAGS)
unset(HIP_NVCC_FLAGS)
foreach(config ${_hip_configuration_types})
string(TOUPPER ${config} config_upper)
unset(HIP_HIPCC_FLAGS_${config_upper})
unset(HIP_HCC_FLAGS_${config_upper})
unset(HIP_CLANG_FLAGS_${config_upper})
unset(HIP_NVCC_FLAGS_${config_upper})
endforeach()
@@ -305,11 +296,10 @@ endmacro()
###############################################################################
# MACRO: Separate the options from the sources
###############################################################################
macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options)
macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _clang_options _nvcc_options)
set(${_sources})
set(${_cmake_options})
set(${_hipcc_options})
set(${_hcc_options})
set(${_clang_options})
set(${_nvcc_options})
set(_hipcc_found_options FALSE)
@@ -323,6 +313,9 @@ macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_op
set(_clang_found_options FALSE)
set(_nvcc_found_options FALSE)
elseif("x${arg}" STREQUAL "xHCC_OPTIONS")
# To be removed after HCC_OPTIONS is removed from hip_add_executable()
# via upstream updation
message(WARNING, "Please remove obsolete HCC_OPTIONS from hip_add_executable()")
set(_hipcc_found_options FALSE)
set(_hcc_found_options TRUE)
set(_clang_found_options FALSE)
@@ -348,7 +341,7 @@ macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_op
if(_hipcc_found_options)
list(APPEND ${_hipcc_options} ${arg})
elseif(_hcc_found_options)
list(APPEND ${_hcc_options} ${arg})
message(WARNING, "Please remove obsolete HCC_OPTIONS ${arg} from hip_add_executable()")
elseif(_clang_found_options)
list(APPEND ${_clang_options} ${arg})
elseif(_nvcc_found_options)
@@ -484,9 +477,8 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files
endforeach()
endif()
HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options ${ARGN})
HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _clang_options _nvcc_options ${ARGN})
HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options})
HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options})
HIP_PARSE_HIPCC_OPTIONS(HIP_CLANG_FLAGS ${_clang_options})
HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options})
@@ -508,7 +500,6 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files
# If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS
if(_hip_build_shared_libs)
list(APPEND HIP_HCC_FLAGS "-fPIC")
list(APPEND HIP_CLANG_FLAGS "-fPIC")
list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'")
endif()
@@ -519,14 +510,12 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files
# Set compiler flags
set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})")
set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})")
set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})")
set(_HIP_CLANG_FLAGS "set(HIP_CLANG_FLAGS ${HIP_CLANG_FLAGS})")
set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})")
foreach(config ${_hip_configuration_types})
string(TOUPPER ${config} config_upper)
set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})")
set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})")
set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})")
set(_HIP_CLANG_FLAGS "${_HIP_CLANG_FLAGS}\nset(HIP_CLANG_FLAGS_${config_upper} ${HIP_CLANG_FLAGS_${config_upper}})")
set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})")
endforeach()
@@ -633,23 +622,12 @@ endmacro()
###############################################################################
macro(HIP_ADD_EXECUTABLE hip_target)
# Separate the sources from the options
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options ${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options})
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _clang_options _nvcc_options ${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if("${HIP_COMPILER}" STREQUAL "hcc")
if("x${HCC_HOME}" STREQUAL "x")
if (DEFINED ENV{ROCM_PATH})
set(HCC_HOME "$ENV{ROCM_PATH}/hcc")
elseif(DEFINED ENV{HIP_PATH})
set(HCC_HOME "$ENV{HIP_PATH}/../hcc")
else()
set(HCC_HOME "/opt/rocm/hcc")
endif()
endif()
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
elseif("${HIP_COMPILER}" STREQUAL "clang")
if("${HIP_COMPILER}" STREQUAL "clang")
if("x${HIP_CLANG_PATH}" STREQUAL "x")
if(DEFINED ENV{HIP_CLANG_PATH})
set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH})
@@ -683,8 +661,8 @@ endmacro()
###############################################################################
macro(HIP_ADD_LIBRARY hip_target)
# Separate the sources from the options
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _clang_options _nvcc_options ${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options})
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _clang_options _nvcc_options ${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
+2 -10
Wyświetl plik
@@ -26,13 +26,11 @@ set(HIP_HIPCONFIG_EXECUTABLE "@HIP_HIPCONFIG_EXECUTABLE@") #path
set(HIP_HOST_COMPILER "@HIP_HOST_COMPILER@") # path
set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path
set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path
set(HCC_HOME "@HCC_HOME@") #path
set(HIP_CLANG_PATH "@HIP_CLANG_PATH@") #path
set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "@HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS@")
@HIP_HOST_FLAGS@
@_HIP_HIPCC_FLAGS@
@_HIP_HCC_FLAGS@
@_HIP_CLANG_FLAGS@
@_HIP_NVCC_FLAGS@
#Needed to bring the HIP_HIPCC_INCLUDE_ARGS variable in scope
@@ -49,17 +47,11 @@ execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --runtime OUTPUT_VARIABLE HI
if(NOT host_flag)
set(__CC ${HIP_HIPCC_EXECUTABLE})
if("${HIP_PLATFORM}" STREQUAL "amd")
if("${HIP_COMPILER}" STREQUAL "hcc")
if(NOT "x${HCC_HOME}" STREQUAL "x")
set(ENV{HCC_HOME} ${HCC_HOME})
endif()
set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_HCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_HCC_FLAGS_${build_configuration}})
elseif("${HIP_COMPILER}" STREQUAL "clang")
if("${HIP_COMPILER}" STREQUAL "clang")
if(NOT "x${HIP_CLANG_PATH}" STREQUAL "x")
set(ENV{HIP_CLANG_PATH} ${HIP_CLANG_PATH})
endif()
# Temporarily include HIP_HCC_FLAGS for HIP-Clang for PyTorch builds
set(__CC_FLAGS ${HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS} ${HIP_HIPCC_FLAGS} ${HIP_HCC_FLAGS} ${HIP_CLANG_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_HCC_FLAGS_${build_configuration}} ${HIP_CLANG_FLAGS_${build_configuration}})
set(__CC_FLAGS ${HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS} ${HIP_HIPCC_FLAGS} ${HIP_CLANG_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_CLANG_FLAGS_${build_configuration}})
endif()
else()
set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_NVCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_NVCC_FLAGS_${build_configuration}})
+1 -1
Wyświetl plik
@@ -2170,7 +2170,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED = __HCC__ __HIP_PLATFORM_HCC__ __HIP_PLATFORM_AMD__
PREDEFINED = __HIP_PLATFORM_AMD__
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
+1 -18
Wyświetl plik
@@ -88,8 +88,6 @@ if(HIP_COMPILER STREQUAL "clang")
find_dependency(AMDDeviceLibs)
set(AMDGPU_TARGETS "gfx900;gfx906;gfx908" CACHE STRING "AMD GPU targets to compile for")
set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for")
else()
find_dependency(hcc)
endif()
find_dependency(amd_comgr)
@@ -121,26 +119,11 @@ endif()
# Right now this is only supported for amd platforms
set_target_properties(hip::host PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_HCC__=1;__HIP_PLATFORM_AMD__=1"
INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_AMD__=1"
)
if(HIP_RUNTIME MATCHES "rocclr")
set_target_properties(hip::amdhip64 PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "__HIP_ROCclr__=1"
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}"
)
set_target_properties(hip::device PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "__HIP_ROCclr__=1"
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include"
)
else()
set_target_properties(hip::hip_hcc_static PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}")
set_target_properties(hip::hip_hcc PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}"
)
@@ -29,14 +29,8 @@ THE SOFTWARE.
#ifdef __cplusplus
#if __HIP_ROCclr__
extern "C" {
#endif
HIP_PUBLIC_API
extern "C" HIP_PUBLIC_API
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f);
#if __HIP_ROCclr__
}
#endif
static inline hipChannelFormatDesc hipCreateChannelDescHalf() {
int e = (int)sizeof(unsigned short) * 8;
+3 -122
Wyświetl plik
@@ -34,19 +34,12 @@ THE SOFTWARE.
#include <hip/amd_detail/device_library_decls.h>
#include <hip/amd_detail/llvm_intrinsics.h>
#if __HIP_CLANG_ONLY__ && __HIP_ROCclr__ && !_WIN32
#if __HIP_CLANG_ONLY__ && !_WIN32
extern "C" __device__ int printf(const char *fmt, ...);
#else
#if HC_FEATURE_PRINTF
template <typename... All>
static inline __device__ void printf(const char* format, All... all) {
hc::printf(format, all...);
}
#else
template <typename... All>
static inline __device__ void printf(const char* format, All... all) {}
#endif // HC_FEATURE_PRINTF
#endif // __HIP_CLANG_ONLY__ && __HIP_ROCclr__
#endif // __HIP_CLANG_ONLY__ && !_WIN32
/*
Integer Intrinsics
@@ -278,22 +271,14 @@ __device__ static inline float __hip_ds_permutef(int index, float src) {
template <int pattern>
__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
union { int i; unsigned u; float f; } tmp; tmp.u = src;
#if defined(__HCC__)
tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
#else
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
#endif
return tmp.u;
}
template <int pattern>
__device__ static inline float __hip_ds_swizzlef_N(float src) {
union { int i; unsigned u; float f; } tmp; tmp.f = src;
#if defined(__HCC__)
tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
#else
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
#endif
return tmp.f;
}
@@ -964,15 +949,7 @@ __device__ static inline float __ull2float_rn(unsigned long long int x) { return
__device__ static inline float __ull2float_ru(unsigned long long int x) { return (float)x; }
__device__ static inline float __ull2float_rz(unsigned long long int x) { return (float)x; }
#if defined(__HCC__)
#define __HCC_OR_HIP_CLANG__ 1
#elif defined(__clang__) && defined(__HIP__)
#define __HCC_OR_HIP_CLANG__ 1
#else
#define __HCC_OR_HIP_CLANG__ 0
#endif
#if __HCC_OR_HIP_CLANG__
#if __HIP_CLANG_ONLY__
// Clock functions
__device__ long long int __clock64();
@@ -985,10 +962,6 @@ __device__ void __named_sync(int a, int b);
#ifdef __HIP_DEVICE_COMPILE__
// Clock functions
#if __HCC__
extern "C" uint64_t __clock_u64() __HC__;
#endif
__device__
inline __attribute((always_inline))
long long int __clock64() {
@@ -1096,80 +1069,6 @@ void *__amdgcn_get_dynamicgroupbaseptr() {
return __get_dynamicgroupbaseptr();
}
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
// hip.amdgcn.bc - sync threads
#define __CLK_LOCAL_MEM_FENCE 0x01
typedef unsigned __cl_mem_fence_flags;
typedef enum __memory_scope {
__memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
__memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
__memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
__memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
__memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
} __memory_scope;
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum __memory_order
{
__memory_order_relaxed = __ATOMIC_RELAXED,
__memory_order_acquire = __ATOMIC_ACQUIRE,
__memory_order_release = __ATOMIC_RELEASE,
__memory_order_acq_rel = __ATOMIC_ACQ_REL,
__memory_order_seq_cst = __ATOMIC_SEQ_CST
} __memory_order;
__device__
inline
static void
__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope)
{
// We're tying global-happens-before and local-happens-before together as does HSA
if (order != __memory_order_relaxed) {
switch (scope) {
case __memory_scope_work_item:
break;
case __memory_scope_sub_group:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_sg(); break;
case __memory_order_release: __llvm_fence_rel_sg(); break;
case __memory_order_acq_rel: __llvm_fence_ar_sg(); break;
case __memory_order_seq_cst: __llvm_fence_sc_sg(); break;
}
break;
case __memory_scope_work_group:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_wg(); break;
case __memory_order_release: __llvm_fence_rel_wg(); break;
case __memory_order_acq_rel: __llvm_fence_ar_wg(); break;
case __memory_order_seq_cst: __llvm_fence_sc_wg(); break;
}
break;
case __memory_scope_device:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_dev(); break;
case __memory_order_release: __llvm_fence_rel_dev(); break;
case __memory_order_acq_rel: __llvm_fence_ar_dev(); break;
case __memory_order_seq_cst: __llvm_fence_sc_dev(); break;
}
break;
case __memory_scope_all_svm_devices:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_sys(); break;
case __memory_order_release: __llvm_fence_rel_sys(); break;
case __memory_order_acq_rel: __llvm_fence_ar_sys(); break;
case __memory_order_seq_cst: __llvm_fence_sc_sys(); break;
}
break;
}
}
}
#endif
// Memory Fence Functions
__device__
inline
@@ -1200,24 +1099,6 @@ void abort() {
return __builtin_trap();
}
#endif // __HCC_OR_HIP_CLANG__
#ifdef __HCC__
/**
* extern __shared__
*/
// Macro to replace extern __shared__ declarations
// to local variable definitions
#define HIP_DYNAMIC_SHARED(type, var) type* var = (type*)__get_dynamicgroupbaseptr();
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
#elif defined(__clang__) && defined(__HIP__)
// The noinline attribute helps encapsulate the printf expansion,
// which otherwise has a performance impact just by increasing the
// size of the calling function. Additionally, the weak attribute
@@ -86,29 +86,6 @@ extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_or_i32(int a)
__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
#endif //__HIP_DEVICE_COMPILE__
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
// __llvm_fence* functions from device-libs/irif/src/fence.ll
extern "C" __device__ void __llvm_fence_acq_sg(void);
extern "C" __device__ void __llvm_fence_acq_wg(void);
extern "C" __device__ void __llvm_fence_acq_dev(void);
extern "C" __device__ void __llvm_fence_acq_sys(void);
extern "C" __device__ void __llvm_fence_rel_sg(void);
extern "C" __device__ void __llvm_fence_rel_wg(void);
extern "C" __device__ void __llvm_fence_rel_dev(void);
extern "C" __device__ void __llvm_fence_rel_sys(void);
extern "C" __device__ void __llvm_fence_ar_sg(void);
extern "C" __device__ void __llvm_fence_ar_wg(void);
extern "C" __device__ void __llvm_fence_ar_dev(void);
extern "C" __device__ void __llvm_fence_ar_sys(void);
extern "C" __device__ void __llvm_fence_sc_sg(void);
extern "C" __device__ void __llvm_fence_sc_wg(void);
extern "C" __device__ void __llvm_fence_sc_dev(void);
extern "C" __device__ void __llvm_fence_sc_sys(void);
#else
// Using hip.amdgcn.bc - sync threads
#define __CLK_LOCAL_MEM_FENCE 0x01
typedef unsigned __cl_mem_fence_flags;
@@ -134,6 +111,5 @@ typedef enum __memory_order
// Linked from hip.amdgcn.bc
extern "C" __device__ void
__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope);
#endif
#endif
@@ -22,9 +22,5 @@ THE SOFTWARE.
#pragma once
#if GENERIC_GRID_LAUNCH == 1
#if __hcc_workweek__ >= 17481
#include "functional_grid_launch.hpp"
#else
#include "macro_based_grid_launch.hpp"
#endif
#endif // GENERIC_GRID_LAUNCH
+1 -9
Wyświetl plik
@@ -23,17 +23,9 @@ SOFTWARE.
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H
#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMMON_H
#if defined(__HCC__)
#define __HCC_OR_HIP_CLANG__ 1
#define __HCC_ONLY__ 1
#define __HIP_CLANG_ONLY__ 0
#elif defined(__clang__) && defined(__HIP__)
#define __HCC_OR_HIP_CLANG__ 1
#define __HCC_ONLY__ 0
#if defined(__clang__) && defined(__HIP__)
#define __HIP_CLANG_ONLY__ 1
#else
#define __HCC_OR_HIP_CLANG__ 0
#define __HCC_ONLY__ 0
#define __HIP_CLANG_ONLY__ 0
#endif
+2 -2
Wyświetl plik
@@ -34,7 +34,7 @@ THE SOFTWARE.
#include <utility>
#endif
#if __HCC_OR_HIP_CLANG__
#if __HIP_CLANG_ONLY__
typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
struct __half_raw {
@@ -1416,7 +1416,7 @@ THE SOFTWARE.
}
// Math functions
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
#if __HIP_CLANG_ONLY__
inline
__device__
float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) {
@@ -55,7 +55,7 @@ extern "C"
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
typedef short __2i16 __attribute__((ext_vector_type(2)));
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
#if __HIP_CLANG_ONLY__
__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s);
#endif
+2 -5
Wyświetl plik
@@ -23,8 +23,7 @@ THE SOFTWARE.
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_LDG_H
#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_LDG_H
#if defined(__HCC_OR_HIP_CLANG__)
#if __hcc_workweek__ >= 16164 || __HIP_CLANG_ONLY__
#if __HIP_CLANG_ONLY__
#include "hip_vector_types.h"
#include "host_defines.h"
@@ -96,8 +95,6 @@ __device__ inline static double __ldg(const double* ptr) { return ptr[0]; }
__device__ inline static double2 __ldg(const double2* ptr) { return ptr[0]; }
#endif // __hcc_workweek__ || __HIP_CLANG_ONLY__
#endif // defined(__HCC_OR_HIP_CLANG__)
#endif // __HIP_CLANG_ONLY__
#endif // HIP_LDG_H
+2 -2
Wyświetl plik
@@ -27,7 +27,7 @@ THE SOFTWARE.
// HIP heap is implemented as a global array with fixed size. Users may define
// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap.
#if (__HCC__ || __HIP__) && __HIP_ENABLE_DEVICE_MALLOC__
#if __HIP__ && __HIP_ENABLE_DEVICE_MALLOC__
// Size of page in bytes.
#ifndef __HIP_SIZE_OF_PAGE
@@ -41,7 +41,7 @@ THE SOFTWARE.
#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE)
#if __HIP__ && __HIP_DEVICE_COMPILE__
#if __HIP_DEVICE_COMPILE__
__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
__attribute__((weak)) __device__
uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
+5 -221
Wyświetl plik
@@ -49,56 +49,16 @@ THE SOFTWARE.
#define __HIP_ENABLE_DEVICE_MALLOC__ 0
#endif
#if __HCC_OR_HIP_CLANG__
#if __HIP_CLANG_ONLY__
#if __HIP__
#if !defined(__align__)
#define __align__(x) __attribute__((aligned(x)))
#endif
#endif
#define CUDA_SUCCESS hipSuccess
#include <hip/hip_runtime_api.h>
#endif // __HCC_OR_HIP_CLANG__
#if __HCC__
// define HIP_ENABLE_PRINTF to enable printf
#ifdef HIP_ENABLE_PRINTF
#define HCC_ENABLE_ACCELERATOR_PRINTF 1
#endif
//---
// Remainder of this file only compiles with HCC
#if defined __HCC__
#include "grid_launch.h"
#include "hc_printf.hpp"
// TODO-HCC-GL - change this to typedef.
// typedef grid_launch_parm hipLaunchParm ;
#if GENERIC_GRID_LAUNCH == 0
#define hipLaunchParm grid_launch_parm
#else
namespace hip_impl {
struct Empty_launch_parm {};
} // namespace hip_impl
#define hipLaunchParm hip_impl::Empty_launch_parm
#endif // GENERIC_GRID_LAUNCH
#if defined(GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || GENERIC_GRID_LAUNCH == 1
#else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
#error(HCC must support GRID_LAUNCH_20)
#endif // GRID_LAUNCH_VERSION
#endif // HCC
#if GENERIC_GRID_LAUNCH == 1 && defined __HCC__
#include "grid_launch_GGL.hpp"
#endif // GENERIC_GRID_LAUNCH
#endif // HCC
#if __HCC_OR_HIP_CLANG__
extern int HIP_TRACE_API;
#ifdef __cplusplus
@@ -108,30 +68,14 @@ extern int HIP_TRACE_API;
#include <hip/amd_detail/host_defines.h>
#include <hip/amd_detail/device_functions.h>
#include <hip/amd_detail/surface_functions.h>
#if __HCC__
#include <hip/amd_detail/math_functions.h>
#include <hip/amd_detail/texture_functions.h>
#else
#include <hip/amd_detail/texture_fetch_functions.h>
#include <hip/amd_detail/texture_indirect_functions.h>
#endif
#include <hip/amd_detail/texture_fetch_functions.h>
#include <hip/amd_detail/texture_indirect_functions.h>
// TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
#if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
#define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
#endif
// TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call.
#if defined(__HCC__) && __HIP_DEVICE_COMPILE__ == 1
#undef assert
#define assert(COND) \
{ \
if (!(COND)) { \
abort(); \
} \
}
#endif
// Feature tests:
#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
// Device compile and not host compile:
@@ -177,13 +121,6 @@ extern int HIP_TRACE_API;
#define __launch_bounds__(...) \
select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
// Detect if we are compiling C++ mode or C mode
#if defined(__cplusplus)
#define __HCC_CPP__
#elif defined(__STDC_VERSION__)
#define __HCC_C__
#endif
__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
#if __HIP_ARCH_GFX701__ == 0
@@ -203,118 +140,7 @@ __device__ int __hip_move_dpp_N(int src);
#endif //__HIP_ARCH_GFX803__ == 1
#endif // __HCC_OR_HIP_CLANG__
#if defined __HCC__
namespace hip_impl {
struct GroupId {
using R = decltype(hc_get_group_id(0));
__device__
R operator()(std::uint32_t x) const noexcept { return hc_get_group_id(x); }
};
struct GroupSize {
using R = decltype(hc_get_group_size(0));
__device__
R operator()(std::uint32_t x) const noexcept {
return hc_get_group_size(x);
}
};
struct NumGroups {
using R = decltype(hc_get_num_groups(0));
__device__
R operator()(std::uint32_t x) const noexcept {
return hc_get_num_groups(x);
}
};
struct WorkitemId {
using R = decltype(hc_get_workitem_id(0));
__device__
R operator()(std::uint32_t x) const noexcept {
return hc_get_workitem_id(x);
}
};
} // Namespace hip_impl.
template <typename F>
struct Coordinates {
using R = decltype(F{}(0));
struct X { __device__ operator R() const noexcept { return F{}(0); } };
struct Y { __device__ operator R() const noexcept { return F{}(1); } };
struct Z { __device__ operator R() const noexcept { return F{}(2); } };
static constexpr X x{};
static constexpr Y y{};
static constexpr Z z{};
};
inline
__device__
std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::X,
Coordinates<hip_impl::GroupSize>::X) noexcept {
return hc_get_grid_size(0);
}
inline
__device__
std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::X,
Coordinates<hip_impl::NumGroups>::X) noexcept {
return hc_get_grid_size(0);
}
inline
__device__
std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Y,
Coordinates<hip_impl::GroupSize>::Y) noexcept {
return hc_get_grid_size(1);
}
inline
__device__
std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Y,
Coordinates<hip_impl::NumGroups>::Y) noexcept {
return hc_get_grid_size(1);
}
inline
__device__
std::uint32_t operator*(Coordinates<hip_impl::NumGroups>::Z,
Coordinates<hip_impl::GroupSize>::Z) noexcept {
return hc_get_grid_size(2);
}
inline
__device__
std::uint32_t operator*(Coordinates<hip_impl::GroupSize>::Z,
Coordinates<hip_impl::NumGroups>::Z) noexcept {
return hc_get_grid_size(2);
}
static constexpr Coordinates<hip_impl::GroupSize> blockDim{};
static constexpr Coordinates<hip_impl::GroupId> blockIdx{};
static constexpr Coordinates<hip_impl::NumGroups> gridDim{};
static constexpr Coordinates<hip_impl::WorkitemId> threadIdx{};
#define hipThreadIdx_x (hc_get_workitem_id(0))
#define hipThreadIdx_y (hc_get_workitem_id(1))
#define hipThreadIdx_z (hc_get_workitem_id(2))
#define hipBlockIdx_x (hc_get_group_id(0))
#define hipBlockIdx_y (hc_get_group_id(1))
#define hipBlockIdx_z (hc_get_group_id(2))
#define hipBlockDim_x (hc_get_group_size(0))
#define hipBlockDim_y (hc_get_group_size(1))
#define hipBlockDim_z (hc_get_group_size(2))
#define hipGridDim_x (hc_get_num_groups(0))
#define hipGridDim_y (hc_get_num_groups(1))
#define hipGridDim_z (hc_get_num_groups(2))
#endif // defined __HCC__
#ifndef __OPENMP_AMDGCN__
#if __HCC_OR_HIP_CLANG__
#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
#if __HIP_ENABLE_DEVICE_MALLOC__
extern "C" __device__ void* __hip_malloc(size_t);
@@ -326,48 +152,8 @@ static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nu
static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; }
#endif
#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
#endif //__HCC_OR_HIP_CLANG__
#endif // !__OPENMP_AMDGCN__
#ifdef __HCC__
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
#define HIP_KERNEL_NAME(...) (__VA_ARGS__)
#define HIP_SYMBOL(X) #X
#if defined __HCC_CPP__
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block,
grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block,
grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block,
grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block,
grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired = 0);
extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, grid_launch_parm& lp, bool unlockPostponed = 0);
#if GENERIC_GRID_LAUNCH == 0
//#warning "Original hipLaunchKernel defined"
// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be
// either size_t or dim3 types
#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
do { \
grid_launch_parm lp; \
lp.dynamic_group_mem_bytes = _groupMemBytes; \
hipStream_t trueStream = \
(ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \
_kernelName(lp, ##__VA_ARGS__); \
ihipPostLaunchKernel(#_kernelName, trueStream, lp); \
} while (0)
#endif // GENERIC_GRID_LAUNCH
#elif defined(__HCC_C__)
// TODO - develop C interface.
#endif //__HCC_CPP__
// End doxygen API:
/**
* @}
@@ -376,8 +162,6 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri
//
// hip-clang functions
//
#elif defined(__clang__) && defined(__HIP__)
#define HIP_KERNEL_NAME(...) __VA_ARGS__
#define HIP_SYMBOL(X) X
@@ -605,7 +389,7 @@ hc_get_workitem_absolute_id(int dim)
#pragma pop_macro("__CUDA__")
#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
#endif // defined(__clang__) && defined(__HIP__)
#endif // __HIP_CLANG_ONLY__
#include <hip/amd_detail/hip_memory.h>
+21 -549
Wyświetl plik
@@ -35,20 +35,11 @@ THE SOFTWARE.
#define GENERIC_GRID_LAUNCH 1
#endif
#ifndef __HIP_ROCclr__
#define __HIP_ROCclr__ 0
#endif
#include <hip/amd_detail/host_defines.h>
#include <hip/amd_detail/driver_types.h>
#include <hip/amd_detail/hip_texture_types.h>
#include <hip/amd_detail/hip_surface_types.h>
#if !__HIP_ROCclr__ && defined(__cplusplus)
#include <hsa/hsa.h>
#include <hip/amd_detail/program_state.hpp>
#endif
#if defined(_MSC_VER)
#define DEPRECATED(msg) __declspec(deprecated(msg))
#else // !defined(_MSC_VER)
@@ -57,10 +48,6 @@ THE SOFTWARE.
#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases. For more details please refer https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_deprecated_api_list.md"
#if defined(__HCC__) && (__hcc_workweek__ < 16155)
#error("This version of HIP requires a newer version of HCC.");
#endif
#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
#define HIP_LAUNCH_PARAM_END ((void*)0x03)
@@ -77,7 +64,6 @@ THE SOFTWARE.
#endif
#ifdef __cplusplus
namespace hip_impl {
hipError_t hip_init();
} // namespace hip_impl
@@ -112,15 +98,10 @@ typedef struct hipIpcMemHandle_st {
char reserved[HIP_IPC_HANDLE_SIZE];
} hipIpcMemHandle_t;
#if __HIP_ROCclr__
// TODO: IPC event handle currently unsupported
struct ihipIpcEventHandle_t;
typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t;
#else
typedef struct hipIpcEventHandle_st {
char reserved[HIP_IPC_HANDLE_SIZE];
} hipIpcEventHandle_t;
#endif
typedef struct ihipModule_t* hipModule_t;
typedef struct ihipModuleSymbol_t* hipFunction_t;
@@ -1957,7 +1938,6 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes,
hipStream_t stream);
#if __HIP_ROCclr__
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
hipModule_t hmod, const char* name);
@@ -1976,205 +1956,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol,
size_t sizeBytes, size_t offset,
hipMemcpyKind kind,
hipStream_t stream __dparm(0));
#else
hipError_t hipModuleGetGlobal(void**, size_t*, hipModule_t, const char*);
#ifdef __cplusplus //Start : Not supported in gcc
namespace hip_impl {
inline
__attribute__((visibility("hidden")))
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
const char* name);
} // Namespace hip_impl.
/**
* @brief Copies the memory address of symbol @p symbolName to @p devPtr
*
* @param[in] symbolName - Symbol on device
* @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol
* @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
*
* @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
* hipMemcpyFromSymbolAsync
*/
inline
__attribute__((visibility("hidden")))
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
//HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName);
hip_impl::hip_init();
size_t size = 0;
return hip_impl::read_agent_global_from_process(devPtr, &size, (const char*)symbolName);
}
/**
* @brief Copies the size of symbol @p symbolName to @p size
*
* @param[in] symbolName - Symbol on device
* @param[out] size - Pointer to the size of the symbol
* @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
*
* @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
* hipMemcpyFromSymbolAsync
*/
inline
__attribute__((visibility("hidden")))
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
// HIP_INIT_API(hipGetSymbolSize, size, symbolName);
hip_impl::hip_init();
void* devPtr = nullptr;
return hip_impl::read_agent_global_from_process(&devPtr, size, (const char*)symbolName);
}
#endif // End : Not supported in gcc
#if defined(__cplusplus)
} // extern "C"
#endif
#ifdef __cplusplus
namespace hip_impl {
hipError_t hipMemcpyToSymbol(void*, const void*, size_t, size_t, hipMemcpyKind,
const char*);
} // Namespace hip_impl.
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/**
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
* pointed to by @p offset bytes from the start of symbol @p symbol.
*
* The memory areas may not overlap. Symbol can either be a variable that resides in global or
* constant memory space, or it can be a character string, naming a variable that resides in global
* or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
* TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use
* hipErrorUnknown for now.
*
* @param[in] symbolName - Symbol destination on device
* @param[in] src - Data being copy from
* @param[in] sizeBytes - Data size in bytes
* @param[in] offset - Offset from start of symbol in bytes
* @param[in] kind - Type of transfer
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
*
* @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
* hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol,
* hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
* hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
* hipMemcpyFromSymbolAsync
*/
#ifdef __cplusplus
inline
__attribute__((visibility("hidden")))
hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src,
size_t sizeBytes, size_t offset __dparm(0),
hipMemcpyKind kind __dparm(hipMemcpyHostToDevice)) {
if (!symbolName) return hipErrorInvalidSymbol;
hipDeviceptr_t dst = NULL;
hipGetSymbolAddress(&dst, (const char*)symbolName);
return hip_impl::hipMemcpyToSymbol(dst, src, sizeBytes, offset, kind,
(const char*)symbolName);
}
#endif
#if defined(__cplusplus)
} // extern "C"
#endif
#ifdef __cplusplus
namespace hip_impl {
hipError_t hipMemcpyToSymbolAsync(void*, const void*, size_t, size_t,
hipMemcpyKind, hipStream_t, const char*);
hipError_t hipMemcpyFromSymbol(void*, const void*, size_t, size_t,
hipMemcpyKind, const char*);
hipError_t hipMemcpyFromSymbolAsync(void*, const void*, size_t, size_t,
hipMemcpyKind, hipStream_t, const char*);
} // Namespace hip_impl.
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/**
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
* pointed to by @p offset bytes from the start of symbol @p symbol
*
* The memory areas may not overlap. Symbol can either be a variable that resides in global or
* constant memory space, or it can be a character string, naming a variable that resides in global
* or constant memory space. Kind can be either hipMemcpyHostToDevice or hipMemcpyDeviceToDevice
* hipMemcpyToSymbolAsync() is asynchronous with respect to the host, so the call may return before
* copy is complete.
* TODO: cudaErrorInvalidSymbol and cudaErrorInvalidMemcpyDirection is not supported, use
* hipErrorUnknown for now.
*
* @param[in] symbolName - Symbol destination on device
* @param[in] src - Data being copy from
* @param[in] sizeBytes - Data size in bytes
* @param[in] offset - Offset from start of symbol in bytes
* @param[in] kind - Type of transfer
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
*
* @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray,
* hipMemcpy2DFromArray, hipMemcpyArrayToArray, hipMemcpy2DArrayToArray, hipMemcpyFromSymbol,
* hipMemcpyAsync, hipMemcpy2DAsync, hipMemcpyToArrayAsync, hipMemcpy2DToArrayAsync,
* hipMemcpyFromArrayAsync, hipMemcpy2DFromArrayAsync, hipMemcpyToSymbolAsync,
* hipMemcpyFromSymbolAsync
*/
#ifdef __cplusplus //Start : Not supported in gcc
inline
__attribute__((visibility("hidden")))
hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src,
size_t sizeBytes, size_t offset,
hipMemcpyKind kind, hipStream_t stream __dparm(0)) {
if (!symbolName) return hipErrorInvalidSymbol;
hipDeviceptr_t dst = NULL;
hipGetSymbolAddress(&dst, symbolName);
return hip_impl::hipMemcpyToSymbolAsync(dst, src, sizeBytes, offset, kind,
stream,
(const char*)symbolName);
}
inline
__attribute__((visibility("hidden")))
hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName,
size_t sizeBytes, size_t offset __dparm(0),
hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) {
if (!symbolName) return hipErrorInvalidSymbol;
hipDeviceptr_t src = NULL;
hipGetSymbolAddress(&src, symbolName);
return hip_impl::hipMemcpyFromSymbol(dst, src, sizeBytes, offset, kind,
(const char*)symbolName);
}
inline
__attribute__((visibility("hidden")))
hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName,
size_t sizeBytes, size_t offset,
hipMemcpyKind kind,
hipStream_t stream __dparm(0)) {
if (!symbolName) return hipErrorInvalidSymbol;
hipDeviceptr_t src = NULL;
hipGetSymbolAddress(&src, symbolName);
return hip_impl::hipMemcpyFromSymbolAsync(dst, src, sizeBytes, offset, kind,
stream,
(const char*)symbolName);
}
#endif // End : Not supported in gcc
#endif // __HIP_ROCclr__
/**
* @brief Copy data from src to dst asynchronously.
*
@@ -3234,63 +3016,6 @@ hipError_t hipFuncGetAttributes(struct hipFuncAttributes* attr, const void* func
*/
hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc);
#if !__HIP_ROCclr__
#if defined(__cplusplus)
} // extern "C"
#endif
#ifdef __cplusplus
namespace hip_impl {
class agent_globals_impl;
class agent_globals {
public:
agent_globals();
~agent_globals();
agent_globals(const agent_globals&) = delete;
hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes,
hipModule_t hmod, const char* name);
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
const char* name);
private:
agent_globals_impl* impl;
};
inline
__attribute__((visibility("hidden")))
agent_globals& get_agent_globals() {
static agent_globals ag;
return ag;
}
extern "C"
inline
__attribute__((visibility("hidden")))
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes,
const char* name) {
return get_agent_globals().read_agent_global_from_process(dptr, bytes, name);
}
} // Namespace hip_impl.
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/**
* @brief returns device memory pointer and size of the kernel present in the module with symbol @p
* name
*
* @param [out] dptr
* @param [out] bytes
* @param [in] hmod
* @param [in] name
*
* @returns hipSuccess, hipErrorInvalidValue, hipErrorNotInitialized
*/
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
hipModule_t hmod, const char* name);
#endif // __HIP_ROCclr__
/**
* @brief returns the handle of the texture reference with the name from the module.
*
@@ -3358,8 +3083,6 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne
unsigned int sharedMemBytes, hipStream_t stream,
void** kernelParams, void** extra);
#if __HIP_ROCclr__ && !defined(__HCC__)
/**
* @brief launches kernel f with launch parameters and shared memory on stream with arguments passed
* to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute
@@ -3392,7 +3115,6 @@ hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags);
#endif
/**
* @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched
@@ -3665,7 +3387,6 @@ hipError_t hipLaunchKernel(const void* function_address,
size_t sharedMemBytes __dparm(0),
hipStream_t stream __dparm(0));
#if __HIP_ROCclr__ || !defined(__HCC__)
//TODO: Move this to hip_ext.h
hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks, dim3 dimBlocks,
void** args, size_t sharedMemBytes, hipStream_t stream,
@@ -3877,18 +3598,31 @@ hipError_t hipTexObjectGetResourceViewDesc(
hipError_t hipTexObjectGetTextureDesc(
HIP_TEXTURE_DESC* pTexDesc,
hipTextureObject_t texObject);
#endif
/**
* @}
* Callback/Activity API
*/
hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg);
hipError_t hipRemoveApiCallback(uint32_t id);
hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg);
hipError_t hipRemoveActivityCallback(uint32_t id);
const char* hipApiName(uint32_t id);
const char* hipKernelNameRef(const hipFunction_t f);
const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream);
int hipGetStreamDeviceId(hipStream_t stream);
#ifdef __cplusplus
} /* extern "c" */
#endif
#if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
#if USE_PROF_API
#include <hip/amd_detail/hip_prof_str.h>
#endif
#ifdef __cplusplus
#if defined(__clang__) && defined(__HIP__)
template <typename T>
static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) {
@@ -3900,9 +3634,7 @@ static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int
T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) {
return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast<const void*>(f),dynSharedMemPerBlk,blockSizeLimit);
}
#endif // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__)
#if defined(__cplusplus) && !defined(__HCC__)
#endif // defined(__clang__) && defined(__HIP__)
template <typename T>
hipError_t hipGetSymbolAddress(void** devPtr, const T &symbol) {
@@ -3940,32 +3672,6 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const T& symbol, size_t sizeBytes
return ::hipMemcpyFromSymbolAsync(dst, (const void*)&symbol, sizeBytes, offset, kind, stream);
}
#endif
#if USE_PROF_API
#include <hip/amd_detail/hip_prof_str.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/**
* Callback/Activity API
*/
hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg);
hipError_t hipRemoveApiCallback(uint32_t id);
hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg);
hipError_t hipRemoveActivityCallback(uint32_t id);
const char* hipApiName(uint32_t id);
const char* hipKernelNameRef(const hipFunction_t f);
const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream);
int hipGetStreamDeviceId(hipStream_t stream);
#ifdef __cplusplus
} /* extern "C" */
#endif
#ifdef __cplusplus
template <class T>
inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk) {
@@ -3980,168 +3686,6 @@ inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
numBlocks, reinterpret_cast<const void*>(f), blockSize, dynSharedMemPerBlk, flags);
}
class TlsData;
#if !__HIP_ROCclr__
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc* desc, size_t size = UINT_MAX);
#endif
#if !__HIP_ROCclr__
hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t size, textureReference* tex);
#endif
/*
* @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
*reference tex.
*
* @p desc describes how the memory is interpreted when fetching values from the texture. The @p
*offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any
*memory previously bound to tex is unbound.
*
* @param[in] offset - Offset in bytes
* @param[out] tex - texture to bind
* @param[in] devPtr - Memory area on device
* @param[in] desc - Channel format
* @param[in] size - Size of the memory area pointed to by devPtr
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
**/
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex);
}
#endif
/*
* @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture
*reference tex.
*
* @p desc describes how the memory is interpreted when fetching values from the texture. The @p
*offset parameter is an optional byte offset as with the low-level hipBindTexture() function. Any
*memory previously bound to tex is unbound.
*
* @param[in] offset - Offset in bytes
* @param[in] tex - texture to bind
* @param[in] devPtr - Memory area on device
* @param[in] size - Size of the memory area pointed to by devPtr
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown
**/
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
size_t size = UINT_MAX) {
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
}
#endif
// C API
#if !__HIP_ROCclr__
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc* desc, size_t width, size_t height,
size_t pitch);
#endif
#if !__HIP_ROCclr__
hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t width, size_t height, textureReference* tex, size_t pitch);
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
const void* devPtr, size_t width, size_t height, size_t pitch) {
return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), width, height,
&tex);
}
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& tex,
const void* devPtr, const struct hipChannelFormatDesc& desc,
size_t width, size_t height, size_t pitch) {
return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, &desc, width, height, &tex);
}
#endif
// C API
#if !__HIP_ROCclr__
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
const hipChannelFormatDesc* desc);
#endif
#if !__HIP_ROCclr__
hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode,
hipArray_const_t array,
const struct hipChannelFormatDesc& desc,
textureReference* tex);
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex);
}
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array,
const struct hipChannelFormatDesc& desc) {
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex);
}
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
hipArray_const_t array,
const struct hipChannelFormatDesc* desc) {
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex);
}
#endif
// C API
#if !__HIP_ROCclr__
hipError_t hipBindTextureToMipmappedArray(const textureReference* tex,
hipMipmappedArray_const_t mipmappedArray,
const hipChannelFormatDesc* desc);
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
hipMipmappedArray_const_t mipmappedArray) {
return hipSuccess;
}
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTextureToMipmappedArray(const texture<T, dim, readMode>& tex,
hipMipmappedArray_const_t mipmappedArray,
const hipChannelFormatDesc& desc) {
return hipSuccess;
}
#endif
#if __HIP_ROCclr__ && !defined(__HCC__)
template <typename F>
inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) {
@@ -4161,87 +3705,16 @@ inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchP
return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags);
}
template <class T>
inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
unsigned int numDevices, unsigned int flags = 0) {
return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags);
}
#endif
/*
* @brief Unbinds the textuer bound to @p tex
*
* @param[in] tex - texture to unbind
*
* @return #hipSuccess
**/
#if !__HIP_ROCclr__
DEPRECATED(DEPRECATED_MSG)
hipError_t hipUnbindTexture(const textureReference* tex);
#endif
#if !__HIP_ROCclr__
extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject);
#endif
#if !__HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
hipError_t hipUnbindTexture(struct texture<T, dim, readMode>& tex) {
return ihipUnbindTextureImpl(tex.textureObject);
}
#endif
#if !__HIP_ROCclr__
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array);
DEPRECATED(DEPRECATED_MSG)
hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref);
hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol);
hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc,
const hipTextureDesc* pTexDesc,
const hipResourceViewDesc* pResViewDesc);
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject);
hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
hipTextureObject_t textureObject);
hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc,
hipTextureObject_t textureObject);
hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
hipTextureObject_t textureObject);
hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags);
hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex);
hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am);
hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim);
hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm);
hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags);
hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents);
hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr,
size_t size);
hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex);
hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
hipDeviceptr_t devPtr, size_t pitch);
#endif
hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, const hipResourceDesc* pResDesc);
hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject);
#if __HIP_ROCclr__
template <class T, int dim, enum hipTextureReadMode readMode>
DEPRECATED(DEPRECATED_MSG)
static inline hipError_t hipBindTexture(size_t* offset, const struct texture<T, dim, readMode>& tex,
@@ -4336,7 +3809,6 @@ static inline hipError_t hipUnbindTexture(
{
return hipUnbindTexture(&tex);
}
#endif
// doxygen end Texture
/**
@@ -4344,7 +3816,7 @@ static inline hipError_t hipUnbindTexture(
*/
#endif
#endif // __cplusplus
#ifdef __GNUC__
#pragma GCC visibility pop
@@ -4355,4 +3827,4 @@ static inline hipError_t hipUnbindTexture(
* @}
*/
#endif
#endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_RUNTIME_API_H
@@ -28,10 +28,6 @@ THE SOFTWARE.
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_VECTOR_TYPES_H
#if defined(__HCC__) && (__hcc_workweek__ < 16032)
#error("This version of HIP requires a newer version of HCC.");
#endif
#include "hip/amd_detail/host_defines.h"
#if defined(__has_attribute)
+1 -29
Wyświetl plik
@@ -34,35 +34,7 @@ THE SOFTWARE.
#define GENERIC_GRID_LAUNCH 1
#endif
#ifdef __HCC__
/**
* Function and kernel markers
*/
#define __host__ __attribute__((cpu))
#define __device__ __attribute__((hc))
#if GENERIC_GRID_LAUNCH == 0
#define __global__ __attribute__((hc_grid_launch)) __attribute__((used))
#else
#if __hcc_workweek__ >= 17481
#define __global__ __attribute__((annotate("__HIP_global_function__"), cpu, hc, used))
#else
#define __global__ __attribute__((hc, used))
#endif
#endif // GENERIC_GRID_LAUNCH
#define __noinline__ __attribute__((noinline))
#define __forceinline__ inline __attribute__((always_inline))
/*
* Variable Type Qualifiers:
*/
// _restrict is supported by the compiler
#define __shared__ tile_static
#define __constant__ __attribute__((hc, annotate("__HIP_constant__")))
#elif defined(__clang__) && defined(__HIP__)
#if defined(__clang__) && defined(__HIP__)
#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
#define __host__ __attribute__((host))
+1 -58
Wyświetl plik
@@ -41,12 +41,6 @@ THE SOFTWARE.
#include <limits>
#include <stdint.h>
// HCC's own math functions should be included first, otherwise there will
// be conflicts when hip/math_functions.h is included before hip/hip_runtime.h.
#ifdef __HCC__
#include "kalmar_math.h"
#endif
#if _LIBCPP_VERSION && __HIP__
namespace std {
template <>
@@ -63,13 +57,8 @@ struct __numeric_type<_Float16>
#pragma push_macro("__DEVICE__")
#pragma push_macro("__RETURN_TYPE")
#ifdef __HCC__
#define __DEVICE__ __device__
#define __RETURN_TYPE int
#else // to be consistent with __clang_cuda_math_forward_declares
#define __DEVICE__ static __device__
#define __RETURN_TYPE bool
#endif
#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
__DEVICE__
@@ -143,7 +132,7 @@ uint64_t __make_mantissa(const char* tagp)
#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
// DOT FUNCTIONS
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
#if __HIP_CLANG_ONLY__
__DEVICE__
inline
int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
@@ -1430,50 +1419,6 @@ __DEVICE__ inline T max(T arg1, T arg2) {
return (arg1 > arg2) ? arg1 : arg2;
}
#if __HCC__
__DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) {
return min(arg1, (uint32_t) arg2);
}
/*__DEVICE__ inline static uint32_t min(int32_t arg1, uint32_t arg2) {
return min((uint32_t) arg1, arg2);
}
__DEVICE__ inline static uint64_t min(uint64_t arg1, int64_t arg2) {
return min(arg1, (uint64_t) arg2);
}
__DEVICE__ inline static uint64_t min(int64_t arg1, uint64_t arg2) {
return min((uint64_t) arg1, arg2);
}
__DEVICE__ inline static unsigned long long min(unsigned long long arg1, long long arg2) {
return min(arg1, (unsigned long long) arg2);
}
__DEVICE__ inline static unsigned long long min(long long arg1, unsigned long long arg2) {
return min((unsigned long long) arg1, arg2);
}*/
__DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) {
return max(arg1, (uint32_t) arg2);
}
__DEVICE__ inline static uint32_t max(int32_t arg1, uint32_t arg2) {
return max((uint32_t) arg1, arg2);
}
/*__DEVICE__ inline static uint64_t max(uint64_t arg1, int64_t arg2) {
return max(arg1, (uint64_t) arg2);
}
__DEVICE__ inline static uint64_t max(int64_t arg1, uint64_t arg2) {
return max((uint64_t) arg1, arg2);
}
__DEVICE__ inline static unsigned long long max(unsigned long long arg1, long long arg2) {
return max(arg1, (unsigned long long) arg2);
}
__DEVICE__ inline static unsigned long long max(long long arg1, unsigned long long arg2) {
return max((unsigned long long) arg1, arg2);
}*/
#else
__DEVICE__ inline int min(int arg1, int arg2) {
return (arg1 < arg2) ? arg1 : arg2;
}
@@ -1515,8 +1460,6 @@ double min(double x, double y) {
__HIP_OVERLOAD2(double, max)
__HIP_OVERLOAD2(double, min)
#endif
__host__ inline static int min(int arg1, int arg2) {
return std::min(arg1, arg2);
}
+1 -1
Wyświetl plik
@@ -28,7 +28,7 @@ THE SOFTWARE.
#endif
// DOT FUNCTIONS
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
#if __HIP_CLANG_ONLY__
__device__
__attribute__((const))
int __ockl_sdot2(
@@ -49,7 +49,7 @@ union TData {
#define __TEXTURE_FUNCTIONS_DECL__ static inline __device__
#if (__hcc_workweek__ >= 18114) || __clang__
#if __clang__
#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4)))
#else
#define ADDRESS_SPACE_CONSTANT __attribute__((address_space(2)))
+3 -3
Wyświetl plik
@@ -29,7 +29,7 @@
#ifndef _HIP_BFLOAT16_H_
#define _HIP_BFLOAT16_H_
#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
#if __cplusplus < 201103L || !defined(__HIPCC__)
// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only
// include a minimal definition of hip_bfloat16
@@ -41,7 +41,7 @@ typedef struct
uint16_t data;
} hip_bfloat16;
#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
#else // __cplusplus < 201103L || !defined(__HIPCC__)
#include <cmath>
#include <cstddef>
@@ -275,6 +275,6 @@ namespace std
}
}
#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
#endif // __cplusplus < 201103L || !defined(__HIPCC__)
#endif // _HIP_BFLOAT16_H_
+2 -8
Wyświetl plik
@@ -26,20 +26,14 @@ THE SOFTWARE.
// Common code included at start of every hip file.
// Auto enable __HIP_PLATFORM_AMD__ if compiling on AMD platform
// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly
#if defined(__HCC__) || (defined(__clang__) && defined(__HIP__))
#ifndef __HIP_PLATFORM_HCC__
#define __HIP_PLATFORM_HCC__ // To be removed
#endif
#if defined(__clang__) && defined(__HIP__)
#ifndef __HIP_PLATFORM_AMD__
#define __HIP_PLATFORM_AMD__
#endif
#endif //__HCC__
#endif // defined(__clang__) && defined(__HIP__)
// Auto enable __HIP_PLATFORM_NVIDIA__ if compiling with NVIDIA platform
#if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__))
#ifndef __HIP_PLATFORM_NVCC__
#define __HIP_PLATFORM_NVCC__ // To be removed
#endif
#ifndef __HIP_PLATFORM_NVIDIA__
#define __HIP_PLATFORM_NVIDIA__
#endif
+2 -54
Wyświetl plik
@@ -80,7 +80,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
hipEvent_t stopEvent = nullptr)
__attribute__((deprecated("use hipExtModuleLaunchKernel instead")));
#if defined(__HIP_ROCclr__) && defined(__cplusplus)
#if defined(__cplusplus)
extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numBlocks,
dim3 dimBlocks, void** args, size_t sharedMemBytes,
@@ -102,60 +102,8 @@ inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& d
hipExtLaunchKernel(k, numBlocks, dimBlocks, _Args, sharedMemBytes, stream, startEvent,
stopEvent, (int)flags);
}
#elif (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
//kernel_descriptor and hip_impl::make_kernarg are in "grid_launch_GGL.hpp"
namespace hip_impl {
inline
__attribute__((visibility("hidden")))
void hipExtLaunchKernelGGLImpl(
std::uintptr_t function_address,
const dim3& numBlocks,
const dim3& dimBlocks,
std::uint32_t sharedMemBytes,
hipStream_t stream,
hipEvent_t startEvent,
hipEvent_t stopEvent,
std::uint32_t flags,
void** kernarg) {
const auto& kd = hip_impl::get_program_state()
.kernel_descriptor(function_address, target_agent(stream));
hipExtModuleLaunchKernel(kd, numBlocks.x * dimBlocks.x,
numBlocks.y * dimBlocks.y,
numBlocks.z * dimBlocks.z,
dimBlocks.x, dimBlocks.y, dimBlocks.z,
sharedMemBytes, stream, nullptr, kernarg,
startEvent, stopEvent, flags);
}
} // namespace hip_impl
template <typename... Args, typename F = void (*)(Args...)>
inline
void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks,
const dim3& dimBlocks, std::uint32_t sharedMemBytes,
hipStream_t stream, hipEvent_t startEvent,
hipEvent_t stopEvent, std::uint32_t flags,
Args... args) {
hip_impl::hip_init();
auto kernarg =
hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
std::size_t kernarg_size = kernarg.size();
void* config[]{
HIP_LAUNCH_PARAM_BUFFER_POINTER,
kernarg.data(),
HIP_LAUNCH_PARAM_BUFFER_SIZE,
&kernarg_size,
HIP_LAUNCH_PARAM_END};
hip_impl::hipExtLaunchKernelGGLImpl(reinterpret_cast<std::uintptr_t>(kernel),
numBlocks, dimBlocks, sharedMemBytes,
stream, startEvent, stopEvent, flags,
&config[0]);
}
#endif // !__HIP_ROCclr__ && defined(__cplusplus)
#endif // defined(__cplusplus)
// doxygen end AMD-specific features
/**
+41 -1
Wyświetl plik
@@ -23,7 +23,7 @@ THE SOFTWARE.
//! HIP = Heterogeneous-compute Interface for Portability
//!
//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
//! through either AMD HCC or NVCC. Key features tend to be in the spirit
//! through either AMD CLANG or NVCC. Key features tend to be in the spirit
//! and terminology of CUDA, but with a portable path to other accelerators as well:
//
//! Both paths support rich C++ features including classes, templates, lambdas, etc.
@@ -64,6 +64,46 @@ THE SOFTWARE.
#error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
#endif
// The following are deprecation notices.
// They will be removed after upstream updation.
#if defined(__clang__)
//The following work for clang rather than for gnu gcc/g++/c++
#pragma GCC diagnostic push
#pragma GCC diagnostic warning "-Wcpp"
#ifdef __HCC__
#warning("__HCC__ is deprecated, please don't use it")
#endif
#ifdef __HIP_ROCclr__
#warning("__HIP_ROCclr__ is deprecated, please don't use it")
#endif
#ifdef __HIP_PLATFORM_HCC__
#warning("__HIP_PLATFORM_HCC__ is deprecated, please use __HIP_PLATFORM_AMD__ instead")
#endif
#ifdef __HIP_PLATFORM_NVCC_
#warning("__HIP_PLATFORM_NVCC_ is deprecated, please use __HIP_PLATFORM_NVIDIA__ instead")
#endif
#pragma GCC diagnostic pop
#elif defined(__GNUC__)
//The following work for gnu gcc/g++/c++ rather than for clang
#ifdef __HCC__
#pragma message ("__HCC__ is deprecated, please don't use it")
#endif
#ifdef __HIP_ROCclr__
#pragma message ("__HIP_ROCclr__ is deprecated, please don't use it")
#endif
#ifdef __HIP_PLATFORM_HCC__
#pragma message ("__HIP_PLATFORM_HCC__ is deprecated, please use __HIP_PLATFORM_AMD__ instead")
#endif
#ifdef __HIP_PLATFORM_NVCC_
#pragma message ("__HIP_PLATFORM_NVCC_ is deprecated, please use __HIP_PLATFORM_NVIDIA__ instead")
#endif
#endif // defined(__clang__)
#include <hip/hip_runtime_api.h>
#include <hip/hip_vector_types.h>
+1 -1
Wyświetl plik
@@ -15,7 +15,7 @@ set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip)
find_package(PythonInterp REQUIRED)
add_definitions(-D__HIP_ROCclr__ -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DCL_TARGET_OPENCL_VERSION=220 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER -DUSE_PROF_API)
add_definitions( -D__HIP_PLATFORM_AMD__ -DLINUX -D__x86_64__ -D__AMD64__ -DUNIX_OS -DqLittleEndian -DOPENCL_MAJOR=2 -DOPENCL_MINOR=0 -DCL_TARGET_OPENCL_VERSION=220 -DWITH_AQL -DWITH_ONLINE_COMPILER -DATI_OS_LINUX -DATI_ARCH_X86 -DLITTLEENDIAN_CPU -DATI_BITS_64 -DATI_COMP_GCC -DWITH_HSA_DEVICE -DWITH_TARGET_AMDGCN -DOPENCL_EXPORTS -DCL_USE_DEPRECATED_OPENCL_1_0_APIS -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS -DVEGA10_ONLY=false -DWITH_LIGHTNING_COMPILER -DUSE_PROF_API)
if(CMAKE_BUILD_TYPE MATCHES "^Debug$")
add_definitions(-DDEBUG)
@@ -1,5 +0,0 @@
vadd_amp_arrayview
vadd_hc_am
vadd_hc_array
vadd_hc_arrayview
vadd_hip
@@ -1,70 +0,0 @@
HCC_HOME?=/opt/rocm/hcc
HCC = $(HCC_HOME)/bin/hcc
OPT=-O2
HCC_CFLAGS= `$(HCC_HOME)/bin/hcc-config --cxxflags` ${OPT}
HCC_LDFLAGS= `$(HCC_HOME)/bin/hcc-config --ldflags` ${OPT}
CPPAMP_CFLAGS= `$(HCC_HOME)/bin/clamp-config --cxxflags`
CPPAMP_LDFLAGS= `$(HCC_HOME)/bin/clamp-config --ldflags`
HIP_PATH?= $(wildcard /opt/rocm/hip)
ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
ifneq (${HIP_PLATFORM}, hcc)
$(error hcc_dialects requires hcc compiler and only runs on hcc platform)
endif
TARGETS=vadd_hc_arrayview vadd_hc_array vadd_hc_am vadd_amp_arrayview vadd_hip
all: $(TARGETS)
clean:
rm -f $(TARGETS) *.o
run: $(TARGETS)
@for t in $(TARGETS); do\
echo "Running $$t"; \
./$$t; \
done
# HCC version:
vadd_hc_arrayview.o: vadd_hc_arrayview.cpp
$(HCC) $(HCC_CFLAGS) -c $< -o $@
vadd_hc_arrayview: vadd_hc_arrayview.o
$(HCC) $(HCC_LDFLAGS) $< -o $@
# HCC version, using explicit arrays:
vadd_hc_array.o: vadd_hc_array.cpp
$(HCC) $(HCC_CFLAGS) -c $< -o $@
vadd_hc_array: vadd_hc_array.o
$(HCC) $(HCC_LDFLAGS) $< -o $@
# HCC version, using AM (accelerator memory) pointer
vadd_hc_am.o: vadd_hc_am.cpp
$(HCC) $(HCC_CFLAGS) -c $< -o $@
vadd_hc_am: vadd_hc_am.o
$(HCC) $(HCC_LDFLAGS) -lhc_am $< -o $@
# HIP version:
vadd_hip.o: vadd_hip.cpp
$(HIPCC) -c $< -o $@
vadd_hip: vadd_hip.o
$(HIPCC) $< -o $@
# AMP version:
vadd_amp_arrayview.o: vadd_amp_arrayview.cpp
$(HCC) $(CPPAMP_CFLAGS) -c $< -o $@
vadd_amp_arrayview: vadd_amp_arrayview.o
$(HCC) $(CPPAMP_LDFLAGS) $< -o $@
@@ -1,91 +0,0 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Simple test showing how to use HC syntax with AM (accelerator memory).
// AM provides a set of c-style memory management routines for allocating,
// freeing, and copying memory. am_alloc returns a device pointer
// which can only be used on the device. The programmer has full control
// over when data is copied.
#if defined(HC_NEXT)
#include <hc/hc.hpp>
#include <hc/hc_am.hpp>
#else
#include <hc.hpp>
#include <hc_am.hpp>
#endif
int main(int argc, char* argv[]) {
int sizeElements = 1000000;
size_t sizeBytes = sizeElements * sizeof(float);
bool pass = true;
// Allocate host memory
float* A_h = (float*)malloc(sizeBytes);
float* B_h = (float*)malloc(sizeBytes);
float* C_h = (float*)malloc(sizeBytes);
// Allocate device pointers:
// Unlike array_view, these must be explicitly managed by user:
hc::accelerator acc; // grab default accelerator where we want to allocate memory:
hc::accelerator_view av = acc.get_default_view();
float *A_d, *B_d, *C_d;
A_d = hc::am_alloc(sizeBytes, acc, 0);
B_d = hc::am_alloc(sizeBytes, acc, 0);
C_d = hc::am_alloc(sizeBytes, acc, 0);
// Initialize host data
for (int i = 0; i < sizeElements; i++) {
A_h[i] = 1.618f * i;
B_h[i] = 3.142f * i;
C_h[i] = 0;
}
av.copy(A_h, A_d, sizeBytes); // C++ copy H2D
av.copy(B_h, B_d, sizeBytes); // C++ copy H2D
// Launch kernel onto AV.
// Because the kernel PFE and the copies are submitted to same AV, they will execute in order
// and we don't need additional synchronization to ensure the copies complete before the PFE
// begins.
hc::completion_future cf =
hc::parallel_for_each(av, hc::extent<1>(sizeElements), [=](hc::index<1> idx)[[hc]] {
int i = idx[0];
C_d[i] = A_d[i] + B_d[i];
});
// This copy is in same AV as the kernel and thus will wait for the kernel to finish before
// executing.
av.copy(C_d, C_h, sizeBytes); // C++ copy D2H
for (int i = 0; i < sizeElements; i++) {
float ref = 1.618f * i + 3.142f * i;
if (C_h[i] != ref) {
printf("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref);
pass = false;
}
};
if (pass) printf("PASSED!\n");
}
@@ -1,81 +0,0 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Simple test showing how to use HC syntax with array.
// Array provides a type-safe C++ mechanism to allocate accelerator memory.
// Like array_view, hc::array provides multi-dimensional indexing capability,
// and is typed. However, unlike array_view, hc::array does not provide
// automatic data management capabilities - instead the programmer
// takes the reins and controls when copies are executed.
#if defined(HC_NEXT)
#include <hc/hc.hpp>
#else
#include <hc.hpp>
#endif
int main(int argc, char* argv[]) {
int sizeElements = 1000000;
size_t sizeBytes = sizeElements * sizeof(float);
bool pass = true;
// Allocate host memory
float* A_h = (float*)malloc(sizeBytes);
float* B_h = (float*)malloc(sizeBytes);
float* C_h = (float*)malloc(sizeBytes);
// Allocate device arrays<>
// Unlike array_view, these must be explicitly managed by user:
hc::array<float> A_d(sizeElements);
hc::array<float> B_d(sizeElements);
hc::array<float> C_d(sizeElements);
// Initialize host data
for (int i = 0; i < sizeElements; i++) {
A_h[i] = 1.618f * i;
B_h[i] = 3.142f * i;
}
hc::copy(A_h, A_d); // C++ copy H2D
hc::copy(B_h, B_d); // C++ copy H2D
// Launch kernel onto default accelerator:
// array<> types are not implicitly copied, so we performed copies above.
hc::parallel_for_each(hc::extent<1>(sizeElements), [&](hc::index<1> idx)[[hc]] {
int i = idx[0];
C_d[i] = A_d[i] + B_d[i];
});
// HCC runtime knows that C_d depends on previous PFE and will force the copy to wait for the
// PFE to complte.
hc::copy(C_d, C_h); // C++ copy D2H
for (int i = 0; i < sizeElements; i++) {
float ref = 1.618f * i + 3.142f * i;
if (C_h[i] != ref) {
printf("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref);
pass = false;
}
};
if (pass) printf("PASSED!\n");
}
@@ -1,62 +0,0 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#if defined(HC_NEXT)
#include <hc/hc.hpp>
#else
#include <hc.hpp>
#endif
int main(int argc, char *argv[])
{
int size = 1000000;
bool pass = true;
// Allocate auto-managed host/device views of data:
hc::array_view<float> A(size);
hc::array_view<float> B(size);
hc::array_view<float> C(size);
// Initialize host data
for (int i=0; i<size; i++) {
A[i] = 1.618f * i;
B[i] = 3.142f * i;
}
C.discard_data(); // tell runtime not to copy CPU host data.
// Launch kernel onto default accelerator:
hc::parallel_for_each(hc::extent<1> (size),
[=] (hc::index<1> idx) [[hc]] {
int i = idx[0];
C[i] = A[i] + B[i];
});
for (int i=0; i<size; i++) {
float ref= 1.618f * i + 3.142f * i;
if (C[i] != ref) {
printf ("error:%d computed=%6.2f, reference=%6.2f\n", i, C[i], ref);
pass = false;
}
};
if (pass) printf ("PASSED!\n");
}
@@ -1,77 +0,0 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Simple test showing how to use HC syntax with array_view.
// The code uses AMP's array_view class, which provides automatic data synchronization
// of data between the host and the accelerator. As noted below, the HCC runtime
// will automatically copy data to and from the host, without the user needing
// to manually perform such copies. This is an excellent mode for developers
// new to GPU programming and matches the memory models provided by recent systems where
// CPU and GPU share the same memory pool. Advanced programmers may prefer
// more explicit control over the data movement - shown in the other vadd_hc_array and
// vadd_hc_am examples.
// This example shows the similarity between C++AMP and and HC for simple cases where
// implicit data transfer is used - really the only difference is the namespace.
// Other examples show some of the more advanced controls.
#if defined(HC_NEXT)
#include <hc/hc.hpp>
#else
#include <hc.hpp>
#endif
int main(int argc, char* argv[]) {
int sizeElements = 1000000;
bool pass = true;
// Allocate auto-managed host/device views of data:
hc::array_view<float> A(sizeElements);
hc::array_view<float> B(sizeElements);
hc::array_view<float> C(sizeElements);
// Initialize host data
for (int i = 0; i < sizeElements; i++) {
A[i] = 1.618f * i;
B[i] = 3.142f * i;
}
C.discard_data(); // tell runtime not to copy CPU host data.
// Launch kernel onto default accelerator:
// The HCC runtime will ensure that A and B are available on the accelerator before launching
// the kernel.
hc::parallel_for_each(hc::extent<1>(sizeElements), [=](hc::index<1> idx)[[hc]] {
int i = idx[0];
C[i] = A[i] + B[i];
});
for (int i = 0; i < sizeElements; i++) {
float ref = 1.618f * i + 3.142f * i;
// Because C is an array_view, the HCC runtime will copy C back to host at first access
// here:
if (C[i] != ref) {
printf("error:%d computed=%6.2f, reference=%6.2f\n", i, C[i], ref);
pass = false;
}
};
if (pass) printf("PASSED!\n");
}
@@ -1,77 +0,0 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
__global__ void vadd_hip(const float* a, const float* b, float* c, int N) {
int idx = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
if (idx < N) {
c[idx] = a[idx] + b[idx];
}
}
int main(int argc, char* argv[]) {
int sizeElements = 1000000;
size_t sizeBytes = sizeElements * sizeof(float);
bool pass = true;
// Allocate host memory
float* A_h = (float*)malloc(sizeBytes);
float* B_h = (float*)malloc(sizeBytes);
float* C_h = (float*)malloc(sizeBytes);
// Allocate device memory:
float *A_d, *B_d, *C_d;
hipMalloc(&A_d, sizeBytes);
hipMalloc(&B_d, sizeBytes);
hipMalloc(&C_d, sizeBytes);
// Initialize host memory
for (int i = 0; i < sizeElements; i++) {
A_h[i] = 1.618f * i;
B_h[i] = 3.142f * i;
}
// H2D Copy
hipMemcpy(A_d, A_h, sizeBytes, hipMemcpyHostToDevice);
hipMemcpy(B_d, B_h, sizeBytes, hipMemcpyHostToDevice);
// Launch kernel onto default accelerator
int blockSize = 256; // pick arbitrary block size
int blocks = (sizeElements + blockSize - 1) / blockSize; // round up to launch enough blocks
hipLaunchKernelGGL(vadd_hip, dim3(blocks), dim3(blockSize), 0, 0, A_d, B_d, C_d, sizeElements);
// D2H Copy
hipMemcpy(C_h, C_d, sizeBytes, hipMemcpyDeviceToHost);
// Verify
for (int i = 0; i < sizeElements; i++) {
float ref = 1.618f * i + 3.142f * i;
if (C_h[i] != ref) {
printf("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref);
pass = false;
}
};
if (pass) printf("PASSED!\n");
}
@@ -108,16 +108,6 @@ int main() {
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
/***********************************************************************************/
//Timing directly the dispatch
#if defined(__HIP_PLATFORM_AMD__) && GENERIC_GRID_LAUNCH == 1 && defined(__HCC__)
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
hipExtLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, start, stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&results[i], start, stop);
}
print_timing("Timing directly single dispatch latency", results);
#endif
//Timing around the dispatch
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
hipEventRecord(start, 0);
-3
Wyświetl plik
@@ -48,9 +48,6 @@ THE SOFTWARE.
}
void printCompilerInfo() {
#ifdef __HCC__
printf("compiler: hcc version=%s, workweek (YYWWD) = %u\n", __hcc_version__, __hcc_workweek__);
#endif
#ifdef __NVCC__
printf("compiler: nvcc\n");
#endif
@@ -28,7 +28,7 @@ If your project already modifies ```CMAKE_MODULE_PATH```, you will need to appen
## Using the hip_add_executable macro
FindHIP provides the ```hip_add_executable``` macro that is similar to the ```cuda_add_executable``` macro that is provided by FindCUDA.
The syntax is also similar. The ```hip_add_executable``` macro uses the hipcc wrapper as the compiler.
The macro supports specifying HCC-specific, CLANG-specific, NVCC-specific compiler options using the ```HCC_OPTIONS```, ```CLANG_OPTIONS``` and ```NVCC_OPTIONS``` keywords.
The macro supports specifying CLANG-specific, NVCC-specific compiler options using the ```CLANG_OPTIONS``` and ```NVCC_OPTIONS``` keywords.
Common options targeting both compilers can be specificed after the ```HIPCC_OPTIONS``` keyword.
## How to build and run:
+10 -11
Wyświetl plik
@@ -47,17 +47,16 @@ In the above, BUILD commands provide instructions on how to build the test case
The supported syntax for the BUILD command is:
```
BUILD: %t %s HIPCC_OPTIONS <hipcc_specific_options> HCC_OPTIONS <hcc_specific_options> CLANG_OPTIONS <clang_specific_options> NVCC_OPTIONS <nvcc_specific_options> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <hcc|rocclr> EXCLUDE_HIP_COMPILER <hcc|clang> DEPENDS EXCLUDE_HIP_LIB_TYPE <static|shared> <dependencies>
BUILD: %t %s HIPCC_OPTIONS <hipcc_specific_options> CLANG_OPTIONS <clang_specific_options> NVCC_OPTIONS <nvcc_specific_options> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <rocclr> EXCLUDE_HIP_COMPILER <clang> DEPENDS EXCLUDE_HIP_LIB_TYPE <static|shared> <dependencies>
```
%s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path).
%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified.
HIPCC_OPTIONS: All options specified after this delimiter are passed to hipcc on both amd and nvidia platforms.
HCC_OPTIONS: All options specified after this delimiter are passed to hipcc on hcc compiler only.
CLANG_OPTIONS: All options specified after this delimiter are passed to hipcc on HIP-Clang compiler only.
NVCC_OPTIONS: All options specified after this delimiter are passed to hipcc on nvidia platform only.
EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from amd, nvidia or both platforms.
EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from hcc or rocclr runtime.
EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from hcc or clang compiler.
EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from rocclr runtime.
EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from clang compiler.
EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test case from particular runtime and compiler.
EXCLUDE_HIP_LIB_TYPE: This can be used to exclude a test case from static or shared libs.
DEPENDS: This can be used to specify dependencies that need to be built before building the current target.
@@ -67,7 +66,7 @@ DEPENDS: This can be used to specify dependencies that need to be built before b
The supported syntax for the BUILD_CMD command is:
```
BUILD_CMD: <targetname> <build_command> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <hcc|rocclr> EXCLUDE_HIP_COMPILER <hcc|clang> EXCLUDE_HIP_LIB_TYPE <static|shared> DEPENDS <dependencies>
BUILD_CMD: <targetname> <build_command> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <rocclr> EXCLUDE_HIP_COMPILER <clang> EXCLUDE_HIP_LIB_TYPE <static|shared> DEPENDS <dependencies>
```
%s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path).
%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified.
@@ -78,8 +77,8 @@ BUILD_CMD: <targetname> <build_command> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EX
%S: refers to path to current source file.
%T: refers to path to current build target.
EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from amd, nvidia or both platforms.
EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from hcc or rocclr runtime.
EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from hcc or clang compiler.
EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from rocclr runtime.
EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from clang compiler.
EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test from particular runtime and compiler.
EXCLUDE_HIP_LIB_TYPE: This can be used to exclude a test case from static or shared libs.
DEPENDS: This can be used to specify dependencies that need to be built before building the current target.
@@ -89,12 +88,12 @@ DEPENDS: This can be used to specify dependencies that need to be built before b
The supported syntax for the TEST command is:
```
TEST: %t <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <hcc|rocclr> EXCLUDE_HIP_COMPILER <hcc|clang> EXCLUDE_HIP_LIB_TYPE <static|shared>
TEST: %t <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <rocclr> EXCLUDE_HIP_COMPILER <clang> EXCLUDE_HIP_LIB_TYPE <static|shared>
```
%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified.
EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from amd, nvidia or both platforms.
EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from hcc or rocclr runtime.
EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from hcc or clang compiler.
EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from rocclr runtime.
EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from clang compiler.
EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test from particular runtime and compiler.
EXCLUDE_HIP_LIB_TYPE: This can be used to exclude a test case from static or shared libs.
@@ -104,7 +103,7 @@ Note that if the test has been excluded for a specific platform/runtime/compiler
When using the TEST command, HIT will squash and append the arguments specified to the test executable name to generate the CMAKE test name. Sometimes we might want to specify a more descriptive name. The TEST_NAMED command is used for that. The supported syntax for the TEST_NAMED command is:
```
TEST: %t CMAKE_TEST_NAME <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <hcc|rocclr> EXCLUDE_HIP_COMPILER <hcc|clang> EXCLUDE_HIP_LIB_TYPE <static|shared>
TEST: %t CMAKE_TEST_NAME <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <amd|nvidia|all> EXCLUDE_HIP_RUNTIME <rocclr> EXCLUDE_HIP_COMPILER <clang> EXCLUDE_HIP_LIB_TYPE <static|shared>
```
+3 -7
Wyświetl plik
@@ -11,11 +11,10 @@ message(STATUS "HIP runtime lib type - ${HIP_LIB_TYPE}")
message(STATUS "CMAKE_TESTING_TOOL: ${CMAKE_TESTING_TOOL}")
#-------------------------------------------------------------------------------
# Helper macro to parse BUILD instructions
macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends _dir)
macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends _dir)
set(${_target})
set(${_sources})
set(${_hipcc_options})
set(${_hcc_options})
set(${_clang_options})
set(${_nvcc_options})
set(${_link_options})
@@ -33,7 +32,6 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _clang_op
set(_target_found TRUE)
set(${_target} ${arg})
elseif("x${arg}" STREQUAL "xHIPCC_OPTIONS"
OR "x${arg}" STREQUAL "xHCC_OPTIONS"
OR "x${arg}" STREQUAL "xCLANG_OPTIONS"
OR "x${arg}" STREQUAL "xNVCC_OPTIONS"
OR "x${arg}" STREQUAL "xLINK_OPTIONS"
@@ -45,8 +43,6 @@ macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _hcc_options _clang_op
set(_flag ${arg})
elseif("x${_flag}" STREQUAL "xHIPCC_OPTIONS")
list(APPEND ${_hipcc_options} ${arg})
elseif("x${_flag}" STREQUAL "xHCC_OPTIONS")
list(APPEND ${_hcc_options} ${arg})
elseif("x${_flag}" STREQUAL "xCLANG_OPTIONS")
list(APPEND ${_clang_options} ${arg})
elseif("x${_flag}" STREQUAL "xNVCC_OPTIONS")
@@ -282,7 +278,7 @@ macro(HIT_ADD_FILES _config _dir _label _parent)
string(REGEX REPLACE "\n" ";" _contents "${_contents}")
foreach(_cmd ${_contents})
string(REGEX REPLACE " " ";" _cmd "${_cmd}")
parse_build_command(_target _sources _hipcc_options _hcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends ${_dir} ${_cmd})
parse_build_command(_target _sources _hipcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends ${_dir} ${_cmd})
string(REGEX REPLACE "/" "." target ${_label}/${_target})
if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms)
insert_into_map("_exclude" "${target}" TRUE)
@@ -297,7 +293,7 @@ macro(HIT_ADD_FILES _config _dir _label _parent)
else()
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_reset_flags()
hip_add_executable(${target} ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options} EXCLUDE_FROM_ALL)
hip_add_executable(${target} ${_sources} HIPCC_OPTIONS ${_hipcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options} EXCLUDE_FROM_ALL)
target_link_libraries(${target} PRIVATE ${_link_options})
set_target_properties(${target} PROPERTIES OUTPUT_NAME ${_target} RUNTIME_OUTPUT_DIRECTORY ${_label} LINK_DEPENDS "${HIP_LIB_FILES}")
add_dependencies(${_parent} ${target})
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvidia
* BUILD: %t %s ../test_common.cpp CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp HCC_OPTIONS -Xclang -fallow-half-arguments-and-returns CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvidia
* BUILD: %t %s ../test_common.cpp CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t
* HIT_END
*/
@@ -151,11 +151,8 @@ void test() {
}
int main() {
// ToDo: Fix bug in HCC causing linking error at -O0.
#ifndef __HCC__
test<float>();
test<double>();
#endif
passed();
return 0;
}
@@ -32,7 +32,7 @@ THE SOFTWARE.
__global__ void DotFunctions(bool* result) {
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
#if __HIP_CLANG_ONLY__
// Dot Functions
short2 sa{1}, sb{1};
result[0] = amd_mixed_dot(sa, sb, 1, result[0]) && result[0];
@@ -155,7 +155,7 @@ void __half2Test(bool* result, __half2 a) {
result[0] = !(a < a) && result[0];
result[0] = !(a > a) && result[0];
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
#if __HIP_CLANG_ONLY__
// Dot Functions
result[0] = amd_mixed_dot(a, a, 1, 1) && result[0];
#endif
+1 -1
Wyświetl plik
@@ -35,7 +35,7 @@ THE SOFTWARE.
#include "hip/hip_vector_types.h"
#include "test_common.h"
#if (__hcc_workweek__ >= 16164) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_CLANG_ONLY__)
#if defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_CLANG_ONLY__)
#define HIP_ASSERT(x) (assert((x) == hipSuccess))
-10
Wyświetl plik
@@ -48,15 +48,5 @@ int main(int argc, char* argv[]) {
CHECK(hipGetDeviceProperties(&props, deviceId));
printf("info: running on device #%d %s\n", deviceId, props.name);
#ifdef __HCC__
hc::accelerator acc;
CHECK(hipHccGetAccelerator(deviceId, &acc));
std::wcout << "device_path=" << acc.get_device_path() << "\n";
hc::accelerator_view* av;
CHECK(hipHccGetAcceleratorView(0 /*nullStream*/, &av));
#endif
passed();
};
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
+1 -1
Wyświetl plik
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_RUNTIME hcc
* BUILD: %t %s ../test_common.cpp
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
+1 -1
Wyświetl plik
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_RUNTIME hcc
* BUILD: %t %s ../test_common.cpp
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
@@ -31,10 +31,6 @@ THE SOFTWARE.
#include <test_common.h>
#ifdef __HCC__
#include <hc.hpp>
#endif
// cudaA
// Simple tests for variable type qualifiers:
@@ -89,14 +85,6 @@ __global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
float fastZ = __sin(x);
#endif
#ifdef __HCC__
int b = threadIdx.x;
int c;
atomicAdd(&c, b);
#endif
__syncthreads();
+2 -2
Wyświetl plik
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
+2 -2
Wyświetl plik
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
+2 -2
Wyświetl plik
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
+2 -2
Wyświetl plik
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
+2 -2
Wyświetl plik
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
@@ -21,8 +21,8 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME hcc EXCLUDE_HIP_COMPILER hcc
* BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia
* TEST: %t EXCLUDE_HIP_PLATFORM nvidia
* HIT_END
*/
@@ -39,7 +39,7 @@ void runTest(int width,int height,int depth, hipChannelFormatKind formatKind)
}
}
}
printf("test- sizeof(T) =%d\n", sizeof(T));
printf("test- sizeof(T) =%zu\n", sizeof(T));
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8, 0, 0, 0, formatKind);
hipArray *arr,*arr1;
@@ -32,14 +32,6 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include "test_common.h"
#define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */
#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_ROCclr__)
#include <hc_am.hpp>
#else
#define USE_HCC_MEMTRACKER 0
#endif
int elementSizes[] = {1, 16, 1024, 524288, 16 * 1000 * 1000};
int nSizes = sizeof(elementSizes) / sizeof(int);
@@ -166,10 +158,6 @@ void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync) {
HIPCHECK(hipHostMalloc(&dataHost, sizeElements));
memset(dataHost, 13, sizeElements);
#if USE_HCC_MEMTRACKER
hc::am_memtracker_print(0x0);
#endif
printf(" test: init complete\n");
runTestImpl(true, hostSync, gpu0Stream, gpu1Stream, numElements, dataGpu0_0, dataGpu0_1,
dataGpu1, dataHost, expected);