Merge branch 'master' into tex_unbind_issue_fix
Dieser Commit ist enthalten in:
+28
-13
@@ -22,8 +22,10 @@ use Cwd 'abs_path';
|
||||
# Other environment variable controls:
|
||||
# HIP_PATH : Path to HIP directory, default is one dir level above location of this script
|
||||
# CUDA_PATH : Path to CUDA SDK (default /usr/local/cuda). Used on NVIDIA platforms only.
|
||||
# HCC_HOME : Path to HCC SDK (default /opt/rocm/hcc). Used on AMD platforms only.
|
||||
# HSA_PATH : Path to HSA dir (default /opt/rocm/hsa). Used on AMD platforms only.
|
||||
# HCC_HOME : Path to HCC SDK (defaults to ../../hcc relative to this
|
||||
# script's abs_path). Used on AMD platforms only.
|
||||
# HSA_PATH : Path to HSA dir (defaults to ../../hsa relative to abs_path
|
||||
# of this script). Used on AMD platforms only.
|
||||
# HIP_VDI_HOME : Path to HIP/VDI directory. Used on AMD platforms only.
|
||||
|
||||
if(scalar @ARGV == 0){
|
||||
@@ -58,7 +60,23 @@ $isWindows = $^O eq 'MSWin32';
|
||||
$HIPCC_COMPILE_FLAGS_APPEND=$ENV{'HIPCC_COMPILE_FLAGS_APPEND'};
|
||||
$HIPCC_LINK_FLAGS_APPEND=$ENV{'HIPCC_LINK_FLAGS_APPEND'};
|
||||
|
||||
$HIP_PATH=$ENV{'HIP_PATH'} // dirname (dirname $0); # use parent directory of hipcc
|
||||
#
|
||||
# TODO: Fix rpath LDFLAGS settings
|
||||
#
|
||||
# Since this hipcc script gets installed at two uneven hierarchical levels,
|
||||
# linked by symlink, the absolute path of this script should be used to
|
||||
# derive HIP_PATH, as dirname $0 could be /opt/rocm/bin or /opt/rocm/hip/bin
|
||||
# depending on how it gets invoked.
|
||||
# ROCM_PATH which points to <rocm_install_dir> is determined based on whether
|
||||
# we find .info/version in the parent of HIP_PATH or not. If it is found,
|
||||
# ROCM_PATH is defined relative to HIP_PATH else it is hardcoded to /opt/rocm.
|
||||
#
|
||||
$HIP_PATH=$ENV{'HIP_PATH'} // dirname(Cwd::abs_path("$0/../")); # use parent directory of hipcc
|
||||
if (-e "$HIP_PATH/../.info/version") {
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // dirname("$HIP_PATH"); # use parent directory of HIP_PATH
|
||||
} else {
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
|
||||
}
|
||||
$HIP_VDI_HOME=$ENV{'HIP_VDI_HOME'};
|
||||
$HIP_LIB_PATH=$ENV{'HIP_LIB_PATH'};
|
||||
$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'};
|
||||
@@ -68,7 +86,7 @@ $HIP_CLANG_HCC_COMPAT_MODE=$ENV{'HIP_CLANG_HCC_COMPAT_MODE'}; # HCC compatibilit
|
||||
if (defined $HIP_VDI_HOME) {
|
||||
$HIP_INFO_PATH= "$HIP_VDI_HOME/lib/.hipInfo";
|
||||
} else {
|
||||
$HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo";
|
||||
$HIP_INFO_PATH= "$HIP_PATH/lib/.hipInfo"; # use actual file
|
||||
}
|
||||
|
||||
#---
|
||||
@@ -112,7 +130,7 @@ if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) {
|
||||
if (-e "$hipcc_dir/../lib/bitcode") {
|
||||
$HIP_VDI_HOME = abs_path($hipcc_dir . "/..");
|
||||
} else {
|
||||
$HIP_VDI_HOME = "/opt/rocm/hip";
|
||||
$HIP_VDI_HOME = $HIP_PATH; # use HIP_PATH
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,10 +154,10 @@ if (defined $HIP_VDI_HOME) {
|
||||
if (defined $HIP_COMPILER and $HIP_COMPILER eq "clang") {
|
||||
$HIP_PLATFORM = "clang";
|
||||
if (!defined $HIP_CLANG_PATH) {
|
||||
$HIP_CLANG_PATH = "/opt/rocm/llvm/bin";
|
||||
$HIP_CLANG_PATH = "$ROCM_PATH/llvm/bin";
|
||||
}
|
||||
if (!defined $DEVICE_LIB_PATH) {
|
||||
$DEVICE_LIB_PATH = "/opt/rocm/lib";
|
||||
$DEVICE_LIB_PATH = "$ROCM_PATH/lib";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,7 +181,6 @@ $target_gfx1012 = 0;
|
||||
$default_amdgpu_target = 1;
|
||||
|
||||
if ($HIP_PLATFORM eq "clang") {
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
|
||||
$HIPCC="$HIP_CLANG_PATH/clang++";
|
||||
|
||||
# If $HIPCC clang++ is not compiled, use clang instead
|
||||
@@ -215,7 +232,7 @@ if ($HIP_PLATFORM eq "clang") {
|
||||
}
|
||||
|
||||
if ($HIP_RUNTIME eq "HCC" ) {
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "/opt/rocm/hsa";
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
|
||||
$HIPCXXFLAGS .= " -isystem $HSA_PATH/include";
|
||||
}
|
||||
|
||||
@@ -224,9 +241,9 @@ if ($HIP_PLATFORM eq "clang") {
|
||||
if (! defined $HIP_LIB_PATH) {
|
||||
$HIP_LIB_PATH = "$HIP_PATH/lib";
|
||||
}
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "/opt/rocm/hsa";
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
|
||||
|
||||
$HCC_HOME=$ENV{'HCC_HOME'} // $hipConfig{'HCC_HOME'} // "/opt/rocm/hcc";
|
||||
$HCC_HOME=$ENV{'HCC_HOME'} // $hipConfig{'HCC_HOME'} // "$ROCM_PATH/hcc";
|
||||
|
||||
$HCC_VERSION=`${HCC_HOME}/bin/hcc --version`;
|
||||
$HCC_VERSION=~/.*based on HCC ([^ ]+).*/;
|
||||
@@ -234,8 +251,6 @@ if ($HIP_PLATFORM eq "clang") {
|
||||
$HCC_VERSION_MAJOR=$HCC_VERSION;
|
||||
$HCC_VERSION_MAJOR=~s/\..*//;
|
||||
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
|
||||
|
||||
$HIP_ATP_MARKER=$ENV{'HIP_ATP_MARKER'} // 1;
|
||||
$marker_path = "$ROCM_PATH/profiler/CXLActivityLogger";
|
||||
|
||||
|
||||
@@ -71,9 +71,17 @@ sub can_run {
|
||||
}
|
||||
}
|
||||
|
||||
# Define HIP_PATH based on location of the script. Same as hipcc.
|
||||
# Derive ROCM_PATH same as hipcc does. Others are relative to ROCM_PATH.
|
||||
$HIP_PATH=$ENV{'HIP_PATH'} // dirname(Cwd::abs_path("$0/../")); # use parent directory of hipcc
|
||||
if (-e "$HIP_PATH/../.info/version") {
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // dirname("$HIP_PATH"); # use parent directory of HIP_PATH
|
||||
} else {
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
|
||||
}
|
||||
$CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda';
|
||||
$HCC_HOME=$ENV{'HCC_HOME'} // '/opt/rocm/hcc';
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // '/opt/rocm/hsa';
|
||||
$HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc";
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
|
||||
|
||||
#---
|
||||
#HIP_PLATFORM controls whether to use NVCC or HCC for compilation:
|
||||
@@ -89,8 +97,6 @@ if (not defined $HIP_PLATFORM) {
|
||||
}
|
||||
}
|
||||
|
||||
$HIP_PATH=$ENV{'HIP_PATH'} // Cwd::realpath (dirname (dirname $0)); # use parent directory of this tool
|
||||
|
||||
if ($HIP_PLATFORM eq "hcc") {
|
||||
$CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include";
|
||||
}
|
||||
@@ -181,7 +187,8 @@ if ($p_check) {
|
||||
print "\nCheck system installation:\n";
|
||||
|
||||
printf ("%-70s", "check hipconfig in PATH...");
|
||||
if (system ("hipconfig > /dev/null 2>&1") != 0) {
|
||||
# Safer to use which hipconfig instead of invoking hipconfig
|
||||
if (system ("which hipconfig > /dev/null 2>&1") != 0) {
|
||||
print "FAIL\n";
|
||||
} else {
|
||||
printf "good\n";
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
# THE SOFTWARE.
|
||||
##
|
||||
|
||||
# IMPORTANT: Do not change this file manually: it is generated by hipify-clang --perl
|
||||
|
||||
#usage hipify-perl [OPTIONS] INPUT_FILE
|
||||
|
||||
use Getopt::Long;
|
||||
@@ -767,6 +769,7 @@ sub simpleSubstitutions {
|
||||
$ft{'include'} += s/\btexture_fetch_functions.h\b//g;
|
||||
$ft{'include'} += s/\bvector_types.h\b/hip\/hip_vector_types.h/g;
|
||||
$ft{'include_cuda_main_header'} += s/\bcuComplex.h\b/hip\/hip_complex.h/g;
|
||||
$ft{'include_cuda_main_header'} += s/\bcub\/cub.cuh\b/hipcub\/hipcub.hpp/g;
|
||||
$ft{'include_cuda_main_header'} += s/\bcublas.h\b/hipblas.h/g;
|
||||
$ft{'include_cuda_main_header'} += s/\bcublas_v2.h\b/hipblas.h/g;
|
||||
$ft{'include_cuda_main_header'} += s/\bcuda.h\b/hip\/hip_runtime.h/g;
|
||||
|
||||
@@ -329,7 +329,6 @@ def generate_prof_header(f, api_map, opts_map):
|
||||
f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n')
|
||||
cb_id += 1
|
||||
f.write(' HIP_API_ID_NUMBER = ' + str(cb_id) + ',\n')
|
||||
f.write(' HIP_API_ID_ANY = ' + str(cb_id + 1) + ',\n')
|
||||
f.write('\n')
|
||||
f.write(' HIP_API_ID_NONE = HIP_API_ID_NUMBER,\n')
|
||||
for name in priv_lst:
|
||||
|
||||
@@ -119,7 +119,8 @@ To run it:
|
||||
- **Windows**:
|
||||
```shell
|
||||
cmake \
|
||||
-G "Visual Studio 16 2019 Win64" \
|
||||
-G "Visual Studio 16 2019" \
|
||||
-A x64 \
|
||||
-DCMAKE_INSTALL_PREFIX=../dist \
|
||||
-DLLVM_SOURCE_DIR=../llvm \
|
||||
-DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \
|
||||
@@ -149,6 +150,14 @@ To run it:
|
||||
|
||||
- Windows: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.4.38`
|
||||
|
||||
5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed.
|
||||
|
||||
* Path to CUB should be specified by the `CUDA_CUB_ROOT_DIR` option:
|
||||
|
||||
- Linux: `-DCUDA_CUB_ROOT_DIR=/srv/CUB`
|
||||
|
||||
- Windows: `-DCUDA_CUB_ROOT_DIR=f:/GIT/cub`
|
||||
|
||||
5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed.
|
||||
|
||||
6. Ensure `lit` and `FileCheck` are installed - these are distributed with LLVM.
|
||||
@@ -199,6 +208,7 @@ cmake
|
||||
-DCMAKE_PREFIX_PATH=/srv/git/LLVM/9.0.0/dist \
|
||||
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1 \
|
||||
-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.4.38 \
|
||||
-DCUDA_CUB_ROOT_DIR=/srv/CUB \
|
||||
-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.0/build/bin/llvm-lit \
|
||||
..
|
||||
```
|
||||
@@ -254,72 +264,73 @@ Linux 5.2.0 - Platform OS
|
||||
64 - hipify-clang binary bitness
|
||||
64 - python 2.7.12 binary bitness
|
||||
========================================
|
||||
-- Testing: 63 tests, 12 threads --
|
||||
PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_01.cu (2 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_03.cu (3 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_02.cu (4 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_05.cu (5 of 63)
|
||||
PASS: hipify :: unit_tests/device/math_functions.cu (6 of 63)
|
||||
PASS: hipify :: unit_tests/device/atomics.cu (7 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_07.cu (8 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_06.cu (9 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_04.cu (10 of 63)
|
||||
PASS: hipify :: unit_tests/device/device_symbols.cu (11 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_10.cu (12 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_11.cu (13 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (15 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (16 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (17 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (18 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (19 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (20 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (21 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (22 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (23 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (24 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (25 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (26 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (27 of 63)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_09.cu (28 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (29 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (30 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (31 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (32 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (33 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (34 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (35 of 63)
|
||||
PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (36 of 63)
|
||||
PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (37 of 63)
|
||||
PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (38 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (39 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (40 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (41 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (42 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (44 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (45 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (46 of 63)
|
||||
PASS: hipify :: unit_tests/samples/allocators.cu (47 of 63)
|
||||
PASS: hipify :: unit_tests/samples/MallocManaged.cpp (48 of 63)
|
||||
PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (49 of 63)
|
||||
PASS: hipify :: unit_tests/samples/coalescing.cu (50 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (51 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (52 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (53 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (54 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (55 of 63)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (56 of 63)
|
||||
PASS: hipify :: unit_tests/samples/intro.cu (57 of 63)
|
||||
PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (58 of 63)
|
||||
PASS: hipify :: unit_tests/samples/axpy.cu (59 of 63)
|
||||
PASS: hipify :: unit_tests/samples/square.cu (60 of 63)
|
||||
PASS: hipify :: unit_tests/samples/vec_add.cu (61 of 63)
|
||||
PASS: hipify :: unit_tests/samples/static_shared_memory.cu (62 of 63)
|
||||
PASS: hipify :: unit_tests/samples/cudaRegister.cu (63 of 63)
|
||||
Testing Time: 2.91s
|
||||
Expected Passes : 63
|
||||
-- Testing: 64 tests, 12 threads --
|
||||
PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 64)
|
||||
PASS: hipify :: unit_tests/device/math_functions.cu (2 of 64)
|
||||
PASS: hipify :: unit_tests/device/atomics.cu (3 of 64)
|
||||
PASS: hipify :: unit_tests/device/device_symbols.cu (4 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_02.cu (5 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_03.cu (6 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_01.cu (7 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_04.cu (8 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_05.cu (9 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_07.cu (10 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_06.cu (11 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_11.cu (12 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_08.cu (13 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_10.cu (14 of 64)
|
||||
PASS: hipify :: unit_tests/headers/headers_test_09.cu (15 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (16 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (17 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (18 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (19 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (20 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (21 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (22 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (23 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (24 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (25 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (26 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (27 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (28 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (29 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (30 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (31 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (32 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (33 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (34 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (35 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (36 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (37 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (38 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (39 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (40 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (41 of 64)
|
||||
PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (42 of 64)
|
||||
PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (43 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (44 of 64)
|
||||
PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (45 of 64)
|
||||
PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (46 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (47 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (48 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (49 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (50 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (51 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (52 of 64)
|
||||
PASS: hipify :: unit_tests/samples/MallocManaged.cpp (53 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (54 of 64)
|
||||
PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (55 of 64)
|
||||
PASS: hipify :: unit_tests/samples/allocators.cu (56 of 64)
|
||||
PASS: hipify :: unit_tests/samples/coalescing.cu (57 of 64)
|
||||
PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (58 of 64)
|
||||
PASS: hipify :: unit_tests/samples/axpy.cu (59 of 64)
|
||||
PASS: hipify :: unit_tests/samples/cudaRegister.cu (60 of 64)
|
||||
PASS: hipify :: unit_tests/samples/intro.cu (61 of 64)
|
||||
PASS: hipify :: unit_tests/samples/square.cu (62 of 64)
|
||||
PASS: hipify :: unit_tests/samples/static_shared_memory.cu (63 of 64)
|
||||
PASS: hipify :: unit_tests/samples/vec_add.cu (64 of 64)
|
||||
Testing Time: 2.98s
|
||||
Expected Passes : 64
|
||||
[100%] Built target test-hipify
|
||||
```
|
||||
### <a name="windows"></a >Windows
|
||||
@@ -334,13 +345,14 @@ LLVM 7.0.0 - 9.0.0, CUDA 7.5 - 10.1, cudnn-7.0.5.15 - cudnn-7.6.4.38
|
||||
|
||||
Build system for the above configurations:
|
||||
|
||||
Python 3.6 - 3.7.4, cmake 3.12.3 - 3.15.4, Visual Studio 2017 (15.5.2) - 2019 (16.3.2).
|
||||
Python 3.6 - 3.7.4, cmake 3.12.3 - 3.15.5, Visual Studio 2017 (15.5.2) - 2019 (16.3.4).
|
||||
|
||||
Here is an example of building `hipify-clang` with testing support on `Windows 10` by `Visual Studio 16 2019`:
|
||||
|
||||
```shell
|
||||
cmake
|
||||
-G "Visual Studio 16 2019 Win64" \
|
||||
-G "Visual Studio 16 2019" \
|
||||
-A x64 \
|
||||
-DHIPIFY_CLANG_TESTS=1 \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=../dist \
|
||||
@@ -348,6 +360,7 @@ cmake
|
||||
-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1" \
|
||||
-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1" \
|
||||
-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.4.38 \
|
||||
-DCUDA_CUB_ROOT_DIR=f:/GIT/cub \
|
||||
-DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.0/build/Release/bin/llvm-lit.py \
|
||||
-Thost=x64
|
||||
..
|
||||
|
||||
@@ -67,6 +67,8 @@ const std::map <llvm::StringRef, hipCounter> CUDA_INCLUDE_MAP{
|
||||
// cuSPARSE includes
|
||||
{"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}},
|
||||
{"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}},
|
||||
// CUB includes
|
||||
{"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB}},
|
||||
// CAFFE2 includes
|
||||
{"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}},
|
||||
{"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}},
|
||||
|
||||
@@ -67,6 +67,8 @@ extern const std::map<llvm::StringRef, hipCounter> CUDA_CAFFE2_TYPE_NAME_MAP;
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_CAFFE2_FUNCTION_MAP;
|
||||
// Maps the names of CUDA Device functions to the corresponding HIP functions
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNC_MAP;
|
||||
// Maps the names of CUDA CUB API types to the corresponding HIP types
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_CUB_TYPE_NAME_MAP;
|
||||
|
||||
/**
|
||||
* The union of all the above maps, except includes.
|
||||
|
||||
@@ -60,6 +60,7 @@ namespace perl {
|
||||
"# THE SOFTWARE.\n"
|
||||
"##\n";
|
||||
|
||||
const string sImportant = "# IMPORTANT: Do not change this file manually: it is generated by hipify-clang --perl";
|
||||
const string tab = " ";
|
||||
const string tab_2 = tab + tab;
|
||||
const string tab_3 = tab_2 + tab;
|
||||
@@ -119,6 +120,7 @@ namespace perl {
|
||||
void generateHeader(unique_ptr<ostream>& streamPtr) {
|
||||
*streamPtr.get() << "#!/usr/bin/perl -w" << endl_2;
|
||||
*streamPtr.get() << sCopyright << endl;
|
||||
*streamPtr.get() << sImportant << endl_2;
|
||||
*streamPtr.get() << "#usage " << hipify_perl << " [OPTIONS] INPUT_FILE" << endl_2;
|
||||
*streamPtr.get() << "use Getopt::Long;" << endl;
|
||||
*streamPtr.get() << my << "$whitelist = \"\";" << endl;
|
||||
|
||||
@@ -443,6 +443,47 @@ bool HipifyAction::cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::Ma
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HipifyAction::cubNamespacePrefix(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (const clang::TypedefNameDecl *decl = Result.Nodes.getNodeAs<clang::TypedefNameDecl>("cubNamespacePrefix")) {
|
||||
if (!decl) {
|
||||
return false;
|
||||
}
|
||||
clang::QualType QT = decl->getUnderlyingType();
|
||||
const clang::Type* t = QT.getTypePtr();
|
||||
if (!t) {
|
||||
return false;
|
||||
}
|
||||
const clang::ElaboratedType* et = t->getAs<clang::ElaboratedType>();
|
||||
if (!et) {
|
||||
return false;
|
||||
}
|
||||
const clang::NestedNameSpecifier *nns = et->getQualifier();
|
||||
if (!nns) {
|
||||
return false;
|
||||
}
|
||||
const clang::NamespaceDecl *nsd = nns->getAsNamespace();
|
||||
if (!nsd) {
|
||||
return false;
|
||||
}
|
||||
const clang::TypeSourceInfo *si = decl->getTypeSourceInfo();
|
||||
const clang::TypeLoc tloc = si->getTypeLoc();
|
||||
const clang::SourceRange sr = tloc.getSourceRange();
|
||||
clang::SourceLocation sl(sr.getBegin());
|
||||
clang::SourceLocation end(sr.getEnd());
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
size_t length = SM.getCharacterData(end) - SM.getCharacterData(sl);
|
||||
StringRef sfull = StringRef(SM.getCharacterData(sl), length);
|
||||
std::string name = nsd->getDeclName().getAsString();
|
||||
size_t offset = sfull.find(name);
|
||||
if (offset > 0) {
|
||||
sl = sl.getLocWithOffset(offset);
|
||||
}
|
||||
FindAndReplace(name, sl, CUDA_CUB_TYPE_NAME_MAP);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HipifyAction::cudaHostFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (const clang::CallExpr * call = Result.Nodes.getNodeAs<clang::CallExpr>("cudaHostFuncCall")) {
|
||||
if (!call->getNumArgs()) {
|
||||
@@ -538,6 +579,21 @@ std::unique_ptr<clang::ASTConsumer> HipifyAction::CreateASTConsumer(clang::Compi
|
||||
).bind("cudaDeviceFuncCall"),
|
||||
this
|
||||
);
|
||||
Finder->addMatcher(
|
||||
mat::typedefDecl(
|
||||
mat::isExpansionInMainFile(),
|
||||
mat::hasType(
|
||||
mat::elaboratedType(
|
||||
mat::hasQualifier(
|
||||
mat::specifiesNamespace(
|
||||
mat::hasName("cub")
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
).bind("cubNamespacePrefix"),
|
||||
this
|
||||
);
|
||||
// Ownership is transferred to the caller.
|
||||
return Finder->newASTConsumer();
|
||||
}
|
||||
@@ -658,4 +714,5 @@ void HipifyAction::run(const clang::ast_matchers::MatchFinder::MatchResult& Resu
|
||||
if (cudaSharedIncompleteArrayVar(Result)) return;
|
||||
if (cudaHostFuncCall(Result)) return;
|
||||
if (cudaDeviceFuncCall(Result)) return;
|
||||
if (cubNamespacePrefix(Result)) return;
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ public:
|
||||
bool cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaHostFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cubNamespacePrefix(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
// Called by the preprocessor for each include directive during the non-raw lexing pass.
|
||||
void InclusionDirective(clang::SourceLocation hash_loc,
|
||||
const clang::Token &include_token,
|
||||
|
||||
@@ -129,6 +129,7 @@ const char *apiTypes[NUM_API_TYPES] = {
|
||||
"API_RAND",
|
||||
"API_DNN",
|
||||
"API_FFT",
|
||||
"API_CUB",
|
||||
"API_SPARSE",
|
||||
"API_CAFFE2"
|
||||
};
|
||||
|
||||
@@ -134,6 +134,7 @@ enum ApiTypes {
|
||||
API_DNN,
|
||||
API_FFT,
|
||||
API_SPARSE,
|
||||
API_CUB,
|
||||
API_CAFFE2,
|
||||
API_LAST
|
||||
};
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_LIBRARY_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_LIBRARY_TYPES_H
|
||||
|
||||
typedef enum hipDataType {
|
||||
HIP_R_16F = 2,
|
||||
HIP_R_32F = 0,
|
||||
HIP_R_64F = 1,
|
||||
HIP_C_16F = 6,
|
||||
HIP_C_32F = 4,
|
||||
HIP_C_64F = 5
|
||||
} hipDataType;
|
||||
|
||||
typedef enum hipLibraryPropertyType {
|
||||
HIP_LIBRARY_MAJOR_VERSION,
|
||||
HIP_LIBRARY_MINOR_VERSION,
|
||||
HIP_LIBRARY_PATCH_LEVEL
|
||||
} hipLibraryPropertyType;
|
||||
|
||||
#endif
|
||||
@@ -63,5 +63,6 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <hip/library_types.h>
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H
|
||||
#define HIP_INCLUDE_HIP_LIBRARY_TYPES_H
|
||||
|
||||
#include <hip/hip_common.h>
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
|
||||
#include <hip/hcc_detail/library_types.h>
|
||||
#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
|
||||
#include "library_types.h"
|
||||
#else
|
||||
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -50,6 +50,21 @@ typedef enum hipMemcpyKind {
|
||||
hipMemcpyDefault
|
||||
} hipMemcpyKind;
|
||||
|
||||
// hipDataType
|
||||
#define hipDataType cudaDataType
|
||||
#define HIP_R_16F CUDA_R_16F
|
||||
#define HIP_R_32F CUDA_R_32F
|
||||
#define HIP_R_64F CUDA_R_64F
|
||||
#define HIP_C_16F CUDA_C_16F
|
||||
#define HIP_C_32F CUDA_C_32F
|
||||
#define HIP_C_64F CUDA_C_64F
|
||||
|
||||
// hipLibraryPropertyType
|
||||
#define hipLibraryPropertyType libraryPropertyType
|
||||
#define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION
|
||||
#define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION
|
||||
#define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL
|
||||
|
||||
// hipTextureAddressMode
|
||||
#define hipTextureAddressMode cudaTextureAddressMode
|
||||
#define hipAddressModeWrap cudaAddressModeWrap
|
||||
|
||||
@@ -17,9 +17,9 @@ mkdir -p $ROCMBINDIR
|
||||
pushd $ROCMBINDIR
|
||||
for f in $HIPBINFILES
|
||||
do
|
||||
ln -s -f $f $(basename $f)
|
||||
ln -r -s -f $f $(basename $f)
|
||||
done
|
||||
ln -s -f $HIPDIR/bin/.hipVersion .hipVersion
|
||||
ln -r -s -f $HIPDIR/bin/.hipVersion .hipVersion
|
||||
popd
|
||||
|
||||
# Soft-link to headers
|
||||
@@ -27,5 +27,5 @@ HIPINCDIR=$HIPDIR/include/hip
|
||||
ROCMINCDIR=$ROCMDIR/include
|
||||
mkdir -p $ROCMINCDIR
|
||||
pushd $ROCMINCDIR
|
||||
ln -s -f $HIPINCDIR hip
|
||||
ln -r -s -f $HIPINCDIR hip
|
||||
popd
|
||||
|
||||
@@ -13,17 +13,17 @@ HIPDIR=$ROCMDIR/hip
|
||||
HIPLIBDIR=$ROCMDIR/hip/lib
|
||||
|
||||
# Soft-link to library files
|
||||
HIPLIBFILES=$(ls -aF $HIPLIBDIR | grep -v [-/$])
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$])
|
||||
mkdir -p $ROCMLIBDIR
|
||||
mkdir -p $ROCMLIBDIR/cmake
|
||||
pushd $ROCMLIBDIR
|
||||
for f in $HIPLIBFILES
|
||||
do
|
||||
ln -s -f $HIPLIBDIR/$f $(basename $f)
|
||||
ln -s -r -f $HIPLIBDIR/$f $(basename $f)
|
||||
done
|
||||
# Make the hip cmake directory link.
|
||||
pushd cmake
|
||||
ln -s -f $HIPLIBDIR/cmake/hip hip
|
||||
ln -s -r -f $HIPLIBDIR/cmake/hip hip
|
||||
popd
|
||||
popd
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ HIPLIBDIR=$ROCMDIR/hip/lib
|
||||
([ ! -d $ROCMLIBDIR ] || [ ! -d $HIPLIBDIR ]) && exit 0
|
||||
|
||||
# Remove soft-links to libraries
|
||||
HIPLIBFILES=$(ls -aF $HIPLIBDIR | grep -v [-/$])
|
||||
HIPLIBFILES=$(ls -A $HIPLIBDIR | grep -v [-/$])
|
||||
pushd $ROCMLIBDIR
|
||||
for f in $HIPLIBFILES; do
|
||||
[ -e $f ] || continue
|
||||
|
||||
@@ -38,9 +38,7 @@ class api_callbacks_table_templ {
|
||||
bool set_activity(uint32_t id, act_t fun, void* arg) {
|
||||
std::lock_guard<mutex_t> lock(mutex_);
|
||||
bool ret = true;
|
||||
if (id == HIP_API_ID_ANY) {
|
||||
for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_activity(i, fun, arg);
|
||||
} else if (id < HIP_API_ID_NUMBER) {
|
||||
if (id < HIP_API_ID_NUMBER) {
|
||||
cb_sync(id);
|
||||
callbacks_table_.arr[id].act = fun;
|
||||
callbacks_table_.arr[id].a_arg = arg;
|
||||
@@ -54,9 +52,7 @@ class api_callbacks_table_templ {
|
||||
bool set_callback(uint32_t id, fun_t fun, void* arg) {
|
||||
std::lock_guard<mutex_t> lock(mutex_);
|
||||
bool ret = true;
|
||||
if (id == HIP_API_ID_ANY) {
|
||||
for (unsigned i = 0; i < HIP_API_ID_NUMBER; ++i) set_callback(i, fun, arg);
|
||||
} else if (id < HIP_API_ID_NUMBER) {
|
||||
if (id < HIP_API_ID_NUMBER) {
|
||||
cb_sync(id);
|
||||
callbacks_table_.arr[id].fun = fun;
|
||||
callbacks_table_.arr[id].arg = arg;
|
||||
|
||||
@@ -84,17 +84,23 @@ clang_arguments = "-v"
|
||||
if sys.platform in ['win32']:
|
||||
run_test_ext = ".bat"
|
||||
hipify_path += "/" + config.build_type
|
||||
# CUDA SDK ROOT
|
||||
clang_arguments += " -isystem'%s'/common/inc"
|
||||
else:
|
||||
run_test_ext = ".sh"
|
||||
# CUDA SDK ROOT
|
||||
clang_arguments += " -isystem'%s'/samples/common/inc"
|
||||
# cuDNN ROOT
|
||||
clang_arguments += " -I'%s'/include"
|
||||
if config.pointer_size == 8:
|
||||
clang_arguments += " -D__LP64__"
|
||||
|
||||
# CUB ROOT
|
||||
clang_arguments += " -I'%s'"
|
||||
|
||||
hipify_arguments = "--cuda-path='%s'"
|
||||
|
||||
config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root)))
|
||||
config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root, config.cuda_cub_root)))
|
||||
config.substitutions.append(("%hipify_args", hipify_arguments % (config.cuda_root)))
|
||||
config.substitutions.append(("hipify", '"' + hipify_path + "/hipify-clang" + '"'))
|
||||
config.substitutions.append(("%run_test", '"' + config.test_source_root + "/run_test" + run_test_ext + '"'))
|
||||
|
||||
@@ -8,6 +8,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@"
|
||||
config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
|
||||
config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@"
|
||||
config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@"
|
||||
config.cuda_cub_root = "@CUDA_CUB_ROOT_DIR@"
|
||||
config.cuda_version_major = int("@CUDA_VERSION_MAJOR@")
|
||||
config.cuda_version_minor = int("@CUDA_VERSION_MINOR@")
|
||||
config.cuda_version = "@CUDA_VERSION@"
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <iostream>
|
||||
// CHECK: #include <hiprand.h>
|
||||
#include <curand.h>
|
||||
// CHECK: #include <hipcub/hipcub.hpp>
|
||||
#include <cub/cub.cuh>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// TODO:
|
||||
// using namespace cub;
|
||||
|
||||
template <typename T>
|
||||
__global__ void sort(const T* data_in, T* data_out){
|
||||
// CHECK: typedef ::hipcub::BlockRadixSort<T, 1024, 4> BlockRadixSortT;
|
||||
typedef ::cub::BlockRadixSort<T, 1024, 4> BlockRadixSortT;
|
||||
__shared__ typename BlockRadixSortT::TempStorage tmp_sort;
|
||||
double items[4];
|
||||
int i0 = 4 * (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
for (int i = 0; i < 4; ++i){
|
||||
items[i] = data_in[i0 + i];
|
||||
}
|
||||
BlockRadixSortT(tmp_sort).Sort(items);
|
||||
for (int i = 0; i < 4; ++i){
|
||||
data_out[i0 + i] = items[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
double* d_gpu = NULL;
|
||||
double* result_gpu = NULL;
|
||||
double* data_sorted = new double[4096];
|
||||
// Allocate memory on the GPU
|
||||
// CHECK: hipMalloc(&d_gpu, 4096 * sizeof(double));
|
||||
cudaMalloc(&d_gpu, 4096 * sizeof(double));
|
||||
// CHECK: hipMalloc(&result_gpu, 4096 * sizeof(double));
|
||||
cudaMalloc(&result_gpu, 4096 * sizeof(double));
|
||||
// CHECK: hiprandGenerator_t gen;
|
||||
curandGenerator_t gen;
|
||||
// Create generator
|
||||
// CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT);
|
||||
curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
|
||||
// Fill array with random numbers
|
||||
// CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0);
|
||||
curandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0);
|
||||
// Destroy generator
|
||||
// CHECK: hiprandDestroyGenerator(gen);
|
||||
curandDestroyGenerator(gen);
|
||||
// Sort data
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort), dim3(1), dim3(1024), 0, 0, d_gpu, result_gpu);
|
||||
sort<<<1, 1024>>>(d_gpu, result_gpu);
|
||||
// CHECK: hipMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), hipMemcpyDeviceToHost);
|
||||
cudaMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
// Write the sorted data to standard out
|
||||
for (int i = 0; i < 4096; ++i){
|
||||
std::cout << data_sorted[i] << ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
|
||||
|
||||
#pragma once
|
||||
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <math.h>
|
||||
|
||||
/**
|
||||
* Allocate GPU memory for `count` elements of type `T`.
|
||||
@@ -16,3 +16,38 @@ static T* gpuMalloc(size_t count) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void add(int n, T* x, T* y) {
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < n; i += stride)
|
||||
y[i] = x[i] + y[i];
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
size_t numElements = 50;
|
||||
float *A = gpuMalloc<float>(numElements);
|
||||
float* B = gpuMalloc<float>(numElements);
|
||||
for (int i = 0; i < numElements; ++i) {
|
||||
A[i] = 1.0f;
|
||||
B[i] = 2.0f;
|
||||
}
|
||||
int blockSize = 512;
|
||||
int numBlocks = (numElements + blockSize - 1) / blockSize;
|
||||
dim3 dimGrid(numBlocks, 1, 1);
|
||||
dim3 dimBlock(blockSize, 1, 1);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(add<float>), dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B);
|
||||
add<float><<<dimGrid, dimBlock>>>(numElements, A, B);
|
||||
// CHECK: hipDeviceSynchronize();
|
||||
cudaDeviceSynchronize();
|
||||
float maxError = 0.0f;
|
||||
for (int i = 0; i < numElements; ++i)
|
||||
maxError = fmax(maxError, fabs(B[i] - 3.0f));
|
||||
// CHECK: hipFree(A);
|
||||
cudaFree(A);
|
||||
// CHECK: hipFree(B);
|
||||
cudaFree(B);
|
||||
if (maxError == 0.0f)
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -80,7 +80,16 @@ int main()
|
||||
|
||||
for (auto&& x : variable_name_vec) hiprtcAddNameExpression(prog, x.c_str());
|
||||
|
||||
hiprtcResult compileResult = hiprtcCompileProgram(prog, 0, nullptr);
|
||||
hipDeviceProp_t props;
|
||||
int device = 0;
|
||||
hipGetDeviceProperties(&props, device);
|
||||
std::string gfxName = "gfx" + std::to_string(props.gcnArch);
|
||||
std::string sarg = "--gpu-architecture=" + gfxName;
|
||||
const char* options[] = {
|
||||
sarg.c_str()
|
||||
};
|
||||
|
||||
hiprtcResult compileResult = hiprtcCompileProgram(prog, 1, options);
|
||||
|
||||
// Obtain compilation log from the program.
|
||||
size_t logSize;
|
||||
|
||||
@@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -76,7 +76,16 @@ int main()
|
||||
|
||||
for (auto&& x : name_vec) hiprtcAddNameExpression(prog, x.c_str());
|
||||
|
||||
hiprtcResult compileResult = hiprtcCompileProgram(prog, 0, nullptr);
|
||||
hipDeviceProp_t props;
|
||||
int device = 0;
|
||||
hipGetDeviceProperties(&props, device);
|
||||
std::string gfxName = "gfx" + std::to_string(props.gcnArch);
|
||||
std::string sarg = "--gpu-architecture=" + gfxName;
|
||||
const char* options[] = {
|
||||
sarg.c_str()
|
||||
};
|
||||
|
||||
hiprtcResult compileResult = hiprtcCompileProgram(prog, 1, options);
|
||||
|
||||
size_t logSize;
|
||||
hiprtcGetProgramLogSize(prog, &logSize);
|
||||
|
||||
@@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM all
|
||||
* BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -66,7 +66,16 @@ int main()
|
||||
nullptr, // headers
|
||||
nullptr); // includeNames
|
||||
|
||||
hiprtcResult compileResult{hiprtcCompileProgram(prog, 0, nullptr)};
|
||||
hipDeviceProp_t props;
|
||||
int device = 0;
|
||||
hipGetDeviceProperties(&props, device);
|
||||
std::string gfxName = "gfx" + std::to_string(props.gcnArch);
|
||||
std::string sarg = "--gpu-architecture=" + gfxName;
|
||||
const char* options[] = {
|
||||
sarg.c_str()
|
||||
};
|
||||
|
||||
hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, options)};
|
||||
|
||||
size_t logSize;
|
||||
hiprtcGetProgramLogSize(prog, &logSize);
|
||||
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
// Simple test for hipLaunchCooperativeKernel API.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren