diff --git a/projects/hip/bin/hipify-perl b/projects/hip/bin/hipify-perl
index 25e02352ad..2e391ab8d2 100755
--- a/projects/hip/bin/hipify-perl
+++ b/projects/hip/bin/hipify-perl
@@ -769,6 +769,7 @@ sub simpleSubstitutions {
$ft{'include'} += s/\btexture_fetch_functions.h\b//g;
$ft{'include'} += s/\bvector_types.h\b/hip\/hip_vector_types.h/g;
$ft{'include_cuda_main_header'} += s/\bcuComplex.h\b/hip\/hip_complex.h/g;
+ $ft{'include_cuda_main_header'} += s/\bcub\/cub.cuh\b/hipcub\/hipcub.hpp/g;
$ft{'include_cuda_main_header'} += s/\bcublas.h\b/hipblas.h/g;
$ft{'include_cuda_main_header'} += s/\bcublas_v2.h\b/hipblas.h/g;
$ft{'include_cuda_main_header'} += s/\bcuda.h\b/hip\/hip_runtime.h/g;
diff --git a/projects/hip/hipify-clang/README.md b/projects/hip/hipify-clang/README.md
index ab52c288c8..07466dbe62 100644
--- a/projects/hip/hipify-clang/README.md
+++ b/projects/hip/hipify-clang/README.md
@@ -119,7 +119,8 @@ To run it:
- **Windows**:
```shell
cmake \
- -G "Visual Studio 16 2019 Win64" \
+ -G "Visual Studio 16 2019" \
+ -A x64 \
-DCMAKE_INSTALL_PREFIX=../dist \
-DLLVM_SOURCE_DIR=../llvm \
-DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \
@@ -149,6 +150,14 @@ To run it:
- Windows: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.4.38`
+5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed.
+
+ * Path to CUB should be specified by the `CUDA_CUB_ROOT_DIR` option:
+
+ - Linux: `-DCUDA_CUB_ROOT_DIR=/srv/CUB`
+
+ - Windows: `-DCUDA_CUB_ROOT_DIR=f:/GIT/cub`
+
5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed.
6. Ensure `lit` and `FileCheck` are installed - these are distributed with LLVM.
@@ -199,6 +208,7 @@ cmake
-DCMAKE_PREFIX_PATH=/srv/git/LLVM/9.0.0/dist \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1 \
-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.4.38 \
+ -DCUDA_CUB_ROOT_DIR=/srv/CUB \
-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.0/build/bin/llvm-lit \
..
```
@@ -254,72 +264,73 @@ Linux 5.2.0 - Platform OS
64 - hipify-clang binary bitness
64 - python 2.7.12 binary bitness
========================================
--- Testing: 63 tests, 12 threads --
-PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_01.cu (2 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_03.cu (3 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_02.cu (4 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_05.cu (5 of 63)
-PASS: hipify :: unit_tests/device/math_functions.cu (6 of 63)
-PASS: hipify :: unit_tests/device/atomics.cu (7 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_07.cu (8 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_06.cu (9 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_04.cu (10 of 63)
-PASS: hipify :: unit_tests/device/device_symbols.cu (11 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_10.cu (12 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_11.cu (13 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 63)
-PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (15 of 63)
-PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (16 of 63)
-PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (17 of 63)
-PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (18 of 63)
-PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (19 of 63)
-PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (20 of 63)
-PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (21 of 63)
-PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (22 of 63)
-PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (23 of 63)
-PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (24 of 63)
-PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (25 of 63)
-PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (26 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (27 of 63)
-PASS: hipify :: unit_tests/headers/headers_test_09.cu (28 of 63)
-PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (29 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (30 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (31 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (32 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (33 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (34 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (35 of 63)
-PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (36 of 63)
-PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (37 of 63)
-PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (38 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (39 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (40 of 63)
-PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (41 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (42 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (44 of 63)
-PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (45 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (46 of 63)
-PASS: hipify :: unit_tests/samples/allocators.cu (47 of 63)
-PASS: hipify :: unit_tests/samples/MallocManaged.cpp (48 of 63)
-PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (49 of 63)
-PASS: hipify :: unit_tests/samples/coalescing.cu (50 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (51 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (52 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (53 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (54 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (55 of 63)
-PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (56 of 63)
-PASS: hipify :: unit_tests/samples/intro.cu (57 of 63)
-PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (58 of 63)
-PASS: hipify :: unit_tests/samples/axpy.cu (59 of 63)
-PASS: hipify :: unit_tests/samples/square.cu (60 of 63)
-PASS: hipify :: unit_tests/samples/vec_add.cu (61 of 63)
-PASS: hipify :: unit_tests/samples/static_shared_memory.cu (62 of 63)
-PASS: hipify :: unit_tests/samples/cudaRegister.cu (63 of 63)
-Testing Time: 2.91s
- Expected Passes : 63
+-- Testing: 64 tests, 12 threads --
+PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 64)
+PASS: hipify :: unit_tests/device/math_functions.cu (2 of 64)
+PASS: hipify :: unit_tests/device/atomics.cu (3 of 64)
+PASS: hipify :: unit_tests/device/device_symbols.cu (4 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_02.cu (5 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_03.cu (6 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_01.cu (7 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_04.cu (8 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_05.cu (9 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_07.cu (10 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_06.cu (11 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_11.cu (12 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_08.cu (13 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_10.cu (14 of 64)
+PASS: hipify :: unit_tests/headers/headers_test_09.cu (15 of 64)
+PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (16 of 64)
+PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (17 of 64)
+PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (18 of 64)
+PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (19 of 64)
+PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (20 of 64)
+PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (21 of 64)
+PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (22 of 64)
+PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (23 of 64)
+PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (24 of 64)
+PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (25 of 64)
+PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (26 of 64)
+PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (27 of 64)
+PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (28 of 64)
+PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (29 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (30 of 64)
+PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (31 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (32 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (33 of 64)
+PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (34 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (35 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (36 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (37 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (38 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (39 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (40 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (41 of 64)
+PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (42 of 64)
+PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (43 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (44 of 64)
+PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (45 of 64)
+PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (46 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (47 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (48 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (49 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (50 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (51 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (52 of 64)
+PASS: hipify :: unit_tests/samples/MallocManaged.cpp (53 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (54 of 64)
+PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (55 of 64)
+PASS: hipify :: unit_tests/samples/allocators.cu (56 of 64)
+PASS: hipify :: unit_tests/samples/coalescing.cu (57 of 64)
+PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (58 of 64)
+PASS: hipify :: unit_tests/samples/axpy.cu (59 of 64)
+PASS: hipify :: unit_tests/samples/cudaRegister.cu (60 of 64)
+PASS: hipify :: unit_tests/samples/intro.cu (61 of 64)
+PASS: hipify :: unit_tests/samples/square.cu (62 of 64)
+PASS: hipify :: unit_tests/samples/static_shared_memory.cu (63 of 64)
+PASS: hipify :: unit_tests/samples/vec_add.cu (64 of 64)
+Testing Time: 2.98s
+ Expected Passes : 64
[100%] Built target test-hipify
```
### Windows
@@ -334,13 +345,14 @@ LLVM 7.0.0 - 9.0.0, CUDA 7.5 - 10.1, cudnn-7.0.5.15 - cudnn-7.6.4.38
Build system for the above configurations:
-Python 3.6 - 3.7.4, cmake 3.12.3 - 3.15.4, Visual Studio 2017 (15.5.2) - 2019 (16.3.2).
+Python 3.6 - 3.7.4, cmake 3.12.3 - 3.15.5, Visual Studio 2017 (15.5.2) - 2019 (16.3.4).
Here is an example of building `hipify-clang` with testing support on `Windows 10` by `Visual Studio 16 2019`:
```shell
cmake
- -G "Visual Studio 16 2019 Win64" \
+ -G "Visual Studio 16 2019" \
+ -A x64 \
-DHIPIFY_CLANG_TESTS=1 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=../dist \
@@ -348,6 +360,7 @@ cmake
-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1" \
-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1" \
-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.4.38 \
+ -DCUDA_CUB_ROOT_DIR=f:/GIT/cub \
-DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.0/build/Release/bin/llvm-lit.py \
-Thost=x64
..
diff --git a/projects/hip/hipify-clang/src/CUDA2HIP.cpp b/projects/hip/hipify-clang/src/CUDA2HIP.cpp
index d2f11df458..50f4682af2 100644
--- a/projects/hip/hipify-clang/src/CUDA2HIP.cpp
+++ b/projects/hip/hipify-clang/src/CUDA2HIP.cpp
@@ -67,6 +67,8 @@ const std::map CUDA_INCLUDE_MAP{
// cuSPARSE includes
{"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}},
{"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}},
+ // CUB includes
+ {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB}},
// CAFFE2 includes
{"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}},
{"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}},
diff --git a/projects/hip/hipify-clang/src/CUDA2HIP.h b/projects/hip/hipify-clang/src/CUDA2HIP.h
index acddd23a0d..b02e7f1f3e 100644
--- a/projects/hip/hipify-clang/src/CUDA2HIP.h
+++ b/projects/hip/hipify-clang/src/CUDA2HIP.h
@@ -67,6 +67,8 @@ extern const std::map CUDA_CAFFE2_TYPE_NAME_MAP;
extern const std::map CUDA_CAFFE2_FUNCTION_MAP;
// Maps the names of CUDA Device functions to the corresponding HIP functions
extern const std::map CUDA_DEVICE_FUNC_MAP;
+// Maps the names of CUDA CUB API types to the corresponding HIP types
+extern const std::map CUDA_CUB_TYPE_NAME_MAP;
/**
* The union of all the above maps, except includes.
diff --git a/projects/hip/hipify-clang/src/HipifyAction.cpp b/projects/hip/hipify-clang/src/HipifyAction.cpp
index 5c5663985a..c6d10f0cdc 100644
--- a/projects/hip/hipify-clang/src/HipifyAction.cpp
+++ b/projects/hip/hipify-clang/src/HipifyAction.cpp
@@ -443,6 +443,47 @@ bool HipifyAction::cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::Ma
return false;
}
+bool HipifyAction::cubNamespacePrefix(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
+ if (const clang::TypedefNameDecl *decl = Result.Nodes.getNodeAs("cubNamespacePrefix")) {
+ if (!decl) {
+ return false;
+ }
+ clang::QualType QT = decl->getUnderlyingType();
+ const clang::Type* t = QT.getTypePtr();
+ if (!t) {
+ return false;
+ }
+ const clang::ElaboratedType* et = t->getAs();
+ if (!et) {
+ return false;
+ }
+ const clang::NestedNameSpecifier *nns = et->getQualifier();
+ if (!nns) {
+ return false;
+ }
+ const clang::NamespaceDecl *nsd = nns->getAsNamespace();
+ if (!nsd) {
+ return false;
+ }
+ const clang::TypeSourceInfo *si = decl->getTypeSourceInfo();
+ const clang::TypeLoc tloc = si->getTypeLoc();
+ const clang::SourceRange sr = tloc.getSourceRange();
+ clang::SourceLocation sl(sr.getBegin());
+ clang::SourceLocation end(sr.getEnd());
+ clang::SourceManager& SM = getCompilerInstance().getSourceManager();
+ size_t length = SM.getCharacterData(end) - SM.getCharacterData(sl);
+ StringRef sfull = StringRef(SM.getCharacterData(sl), length);
+ std::string name = nsd->getDeclName().getAsString();
+ size_t offset = sfull.find(name);
+ if (offset > 0) {
+ sl = sl.getLocWithOffset(offset);
+ }
+ FindAndReplace(name, sl, CUDA_CUB_TYPE_NAME_MAP);
+ return true;
+ }
+ return false;
+}
+
bool HipifyAction::cudaHostFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
if (const clang::CallExpr * call = Result.Nodes.getNodeAs("cudaHostFuncCall")) {
if (!call->getNumArgs()) {
@@ -538,6 +579,21 @@ std::unique_ptr HipifyAction::CreateASTConsumer(clang::Compi
).bind("cudaDeviceFuncCall"),
this
);
+ Finder->addMatcher(
+ mat::typedefDecl(
+ mat::isExpansionInMainFile(),
+ mat::hasType(
+ mat::elaboratedType(
+ mat::hasQualifier(
+ mat::specifiesNamespace(
+ mat::hasName("cub")
+ )
+ )
+ )
+ )
+ ).bind("cubNamespacePrefix"),
+ this
+ );
// Ownership is transferred to the caller.
return Finder->newASTConsumer();
}
@@ -658,4 +714,5 @@ void HipifyAction::run(const clang::ast_matchers::MatchFinder::MatchResult& Resu
if (cudaSharedIncompleteArrayVar(Result)) return;
if (cudaHostFuncCall(Result)) return;
if (cudaDeviceFuncCall(Result)) return;
+ if (cubNamespacePrefix(Result)) return;
}
diff --git a/projects/hip/hipify-clang/src/HipifyAction.h b/projects/hip/hipify-clang/src/HipifyAction.h
index 3c85604ced..a24404deee 100644
--- a/projects/hip/hipify-clang/src/HipifyAction.h
+++ b/projects/hip/hipify-clang/src/HipifyAction.h
@@ -72,6 +72,7 @@ public:
bool cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result);
bool cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result);
bool cudaHostFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result);
+ bool cubNamespacePrefix(const clang::ast_matchers::MatchFinder::MatchResult& Result);
// Called by the preprocessor for each include directive during the non-raw lexing pass.
void InclusionDirective(clang::SourceLocation hash_loc,
const clang::Token &include_token,
diff --git a/projects/hip/hipify-clang/src/Statistics.cpp b/projects/hip/hipify-clang/src/Statistics.cpp
index 9751763be3..70a75ac0cd 100644
--- a/projects/hip/hipify-clang/src/Statistics.cpp
+++ b/projects/hip/hipify-clang/src/Statistics.cpp
@@ -129,6 +129,7 @@ const char *apiTypes[NUM_API_TYPES] = {
"API_RAND",
"API_DNN",
"API_FFT",
+ "API_CUB",
"API_SPARSE",
"API_CAFFE2"
};
diff --git a/projects/hip/hipify-clang/src/Statistics.h b/projects/hip/hipify-clang/src/Statistics.h
index 051f680fb1..9b9889d0e5 100644
--- a/projects/hip/hipify-clang/src/Statistics.h
+++ b/projects/hip/hipify-clang/src/Statistics.h
@@ -134,6 +134,7 @@ enum ApiTypes {
API_DNN,
API_FFT,
API_SPARSE,
+ API_CUB,
API_CAFFE2,
API_LAST
};
diff --git a/projects/hip/tests/hipify-clang/lit.cfg b/projects/hip/tests/hipify-clang/lit.cfg
index 64f82e57fa..1d092a4327 100644
--- a/projects/hip/tests/hipify-clang/lit.cfg
+++ b/projects/hip/tests/hipify-clang/lit.cfg
@@ -84,17 +84,23 @@ clang_arguments = "-v"
if sys.platform in ['win32']:
run_test_ext = ".bat"
hipify_path += "/" + config.build_type
+ # CUDA SDK ROOT
clang_arguments += " -isystem'%s'/common/inc"
else:
run_test_ext = ".sh"
+ # CUDA SDK ROOT
clang_arguments += " -isystem'%s'/samples/common/inc"
+# cuDNN ROOT
clang_arguments += " -I'%s'/include"
if config.pointer_size == 8:
clang_arguments += " -D__LP64__"
+# CUB ROOT
+clang_arguments += " -I'%s'"
+
hipify_arguments = "--cuda-path='%s'"
-config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root)))
+config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root, config.cuda_cub_root)))
config.substitutions.append(("%hipify_args", hipify_arguments % (config.cuda_root)))
config.substitutions.append(("hipify", '"' + hipify_path + "/hipify-clang" + '"'))
config.substitutions.append(("%run_test", '"' + config.test_source_root + "/run_test" + run_test_ext + '"'))
diff --git a/projects/hip/tests/hipify-clang/lit.site.cfg.in b/projects/hip/tests/hipify-clang/lit.site.cfg.in
index 6ef4dc007a..3c17567903 100644
--- a/projects/hip/tests/hipify-clang/lit.site.cfg.in
+++ b/projects/hip/tests/hipify-clang/lit.site.cfg.in
@@ -8,6 +8,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@"
config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@"
config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@"
+config.cuda_cub_root = "@CUDA_CUB_ROOT_DIR@"
config.cuda_version_major = int("@CUDA_VERSION_MAJOR@")
config.cuda_version_minor = int("@CUDA_VERSION_MINOR@")
config.cuda_version = "@CUDA_VERSION@"
diff --git a/projects/hip/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu b/projects/hip/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu
new file mode 100644
index 0000000000..4646015e74
--- /dev/null
+++ b/projects/hip/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu
@@ -0,0 +1,60 @@
+// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
+// CHECK: #include
+#include
+// CHECK: #include
+#include
+// CHECK: #include
+#include
+
+#include
+
+// TODO:
+// using namespace cub;
+
+template
+__global__ void sort(const T* data_in, T* data_out){
+ // CHECK: typedef ::hipcub::BlockRadixSort BlockRadixSortT;
+ typedef ::cub::BlockRadixSort BlockRadixSortT;
+ __shared__ typename BlockRadixSortT::TempStorage tmp_sort;
+ double items[4];
+ int i0 = 4 * (blockIdx.x * blockDim.x + threadIdx.x);
+ for (int i = 0; i < 4; ++i){
+ items[i] = data_in[i0 + i];
+ }
+ BlockRadixSortT(tmp_sort).Sort(items);
+ for (int i = 0; i < 4; ++i){
+ data_out[i0 + i] = items[i];
+ }
+}
+
+int main(){
+ double* d_gpu = NULL;
+ double* result_gpu = NULL;
+ double* data_sorted = new double[4096];
+ // Allocate memory on the GPU
+ // CHECK: hipMalloc(&d_gpu, 4096 * sizeof(double));
+ cudaMalloc(&d_gpu, 4096 * sizeof(double));
+ // CHECK: hipMalloc(&result_gpu, 4096 * sizeof(double));
+ cudaMalloc(&result_gpu, 4096 * sizeof(double));
+ // CHECK: hiprandGenerator_t gen;
+ curandGenerator_t gen;
+ // Create generator
+ // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT);
+ curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
+ // Fill array with random numbers
+ // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0);
+ curandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0);
+ // Destroy generator
+ // CHECK: hiprandDestroyGenerator(gen);
+ curandDestroyGenerator(gen);
+ // Sort data
+ // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort), dim3(1), dim3(1024), 0, 0, d_gpu, result_gpu);
+ sort<<<1, 1024>>>(d_gpu, result_gpu);
+ // CHECK: hipMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), hipMemcpyDeviceToHost);
+ cudaMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), cudaMemcpyDeviceToHost);
+ // Write the sorted data to standard out
+ for (int i = 0; i < 4096; ++i){
+ std::cout << data_sorted[i] << ", ";
+ }
+ std::cout << std::endl;
+}