From 85281b1d8657cf105ef456167e8a364640c93c1e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 18 Oct 2019 18:51:40 +0300 Subject: [PATCH] [HIPIFY][CUB][#1460] Add "using namespace cub" translation support + Add cub_03.cu [ROCm/clr commit: 86f6756b0281a9136186c0eb8828a904ebd5f284] --- .../hipamd/hipify-clang/src/HipifyAction.cpp | 19 ++++++++++ .../hipamd/hipify-clang/src/HipifyAction.h | 1 + .../unit_tests/libraries/CUB/cub_03.cu | 37 +++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 projects/clr/hipamd/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu diff --git a/projects/clr/hipamd/hipify-clang/src/HipifyAction.cpp b/projects/clr/hipamd/hipify-clang/src/HipifyAction.cpp index f63da1b2de..510d91978a 100644 --- a/projects/clr/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/projects/clr/hipamd/hipify-clang/src/HipifyAction.cpp @@ -63,6 +63,7 @@ const StringRef sCudaHostFuncCall = "cudaHostFuncCall"; const StringRef sCudaDeviceFuncCall = "cudaDeviceFuncCall"; const StringRef sCubNamespacePrefix = "cubNamespacePrefix"; const StringRef sCubFunctionTemplateDecl = "cubFunctionTemplateDecl"; +const StringRef sCubUsingNamespaceDecl = "cubUsingNamespaceDecl"; std::set DeviceSymbolFunctions0 { {sCudaMemcpyToSymbol}, @@ -472,6 +473,16 @@ bool HipifyAction::cubNamespacePrefix(const mat::MatchFinder::MatchResult &Resul return false; } +bool HipifyAction::cubUsingNamespaceDecl(const mat::MatchFinder::MatchResult &Result) { + if (auto *decl = Result.Nodes.getNodeAs(sCubUsingNamespaceDecl)) { + if (auto nsd = decl->getNominatedNamespace()) { + FindAndReplace(nsd->getDeclName().getAsString(), decl->getIdentLocation(), CUDA_CUB_TYPE_NAME_MAP); + return true; + } + } + return false; +} + bool HipifyAction::cubFunctionTemplateDecl(const mat::MatchFinder::MatchResult &Result) { if (auto *decl = Result.Nodes.getNodeAs(sCubFunctionTemplateDecl)) { auto *Tparams = decl->getTemplateParameters(); @@ -611,6 +622,13 @@ std::unique_ptr HipifyAction::CreateASTConsumer(clang::Compi ).bind(sCubFunctionTemplateDecl), this ); + // TODO: Maybe worth to make it more concrete + Finder->addMatcher( + mat::usingDirectiveDecl( + mat::isExpansionInMainFile() + ).bind(sCubUsingNamespaceDecl), + this + ); // Ownership is transferred to the caller. return Finder->newASTConsumer(); } @@ -725,4 +743,5 @@ void HipifyAction::run(const mat::MatchFinder::MatchResult &Result) { if (cudaDeviceFuncCall(Result)) return; if (cubNamespacePrefix(Result)) return; if (cubFunctionTemplateDecl(Result)) return; + if (cubUsingNamespaceDecl(Result)) return; } diff --git a/projects/clr/hipamd/hipify-clang/src/HipifyAction.h b/projects/clr/hipamd/hipify-clang/src/HipifyAction.h index 73879bfd14..f70d17dd0b 100644 --- a/projects/clr/hipamd/hipify-clang/src/HipifyAction.h +++ b/projects/clr/hipamd/hipify-clang/src/HipifyAction.h @@ -76,6 +76,7 @@ public: bool cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result); bool cubNamespacePrefix(const mat::MatchFinder::MatchResult &Result); bool cubFunctionTemplateDecl(const mat::MatchFinder::MatchResult &Result); + bool cubUsingNamespaceDecl(const mat::MatchFinder::MatchResult &Result); // Called by the preprocessor for each include directive during the non-raw lexing pass. void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token, diff --git a/projects/clr/hipamd/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu b/projects/clr/hipamd/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu new file mode 100644 index 0000000000..8f68bb40c6 --- /dev/null +++ b/projects/clr/hipamd/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu @@ -0,0 +1,37 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args +// CHECK: #include +#include +// CHECK: #include +#include +// CHECK: #include +#include + +#include + +// using namespace hipcub; +using namespace cub; + +/** +* Simple CUDA kernel for computing tiled partial sums +*/ +template +__global__ void ScanTilesKernel(int *d_in, int *d_out) { + // Specialize collective types for problem context + // TODO: typedef cub::BlockLoad BlockLoadT; + typedef BlockLoad BlockLoadT; + typedef BlockScan BlockScanT; + // Allocate on-chip temporary storage + __shared__ union { + typename BlockLoadT::TempStorage load; + typename BlockScanT::TempStorage reduce; + } temp_storage; + // Load data per thread + int thread_data[ITEMS_PER_THREAD]; + int offset = blockIdx.x * (BLOCK_THREADS * ITEMS_PER_THREAD); + BlockLoadT(temp_storage.load).Load(d_in + offset, offset); + __syncthreads(); + // Compute the block-wide prefix sum + BlockScanT(temp_storage).Sum(thread_data); +}