[HIPIFY][CUB][#1460] Add cub:: namespace support in TemplateInstantiation of cudaLaunchKernel

+ Update cub_02.cu test accordingly
2019-10-16 19:02:13 +03:00
@@ -36,6 +36,7 @@ THE SOFTWARE.
 const std::string sHIP = "HIP";
 const std::string sROC = "ROC";
 const std::string sCub = "cub";
+const std::string sHipcub = "hipcub";
 const std::string sHIP_DYNAMIC_SHARED = "HIP_DYNAMIC_SHARED";
 const std::string sHIP_KERNEL_NAME = "HIP_KERNEL_NAME";
 std::string sHIP_SYMBOL = "HIP_SYMBOL";
@@ -345,9 +346,26 @@ bool HipifyAction::cudaLaunchKernel(const mat::MatchFinder::MatchResult &Result)
  llvm::raw_svector_ostream OS(XStr);
  clang::LangOptions DefaultLangOptions;
  auto *SM = Result.SourceManager;
+  clang::SourceRange sr = calleeExpr->getSourceRange();
+  std::string kern = readSourceText(*SM, sr).str();
  OS << sHipLaunchKernelGGL << "(";
-  if (caleeDecl->isTemplateInstantiation()) OS << sHIP_KERNEL_NAME << "(";
-  OS << readSourceText(*SM, calleeExpr->getSourceRange());
+  if (caleeDecl->isTemplateInstantiation()) {
+    OS << sHIP_KERNEL_NAME << "(";
+    std::string cub = sCub + "::";
+    std::string hipcub;
+    const auto found = CUDA_CUB_TYPE_NAME_MAP.find(sCub);
+    if (found != CUDA_CUB_TYPE_NAME_MAP.end()) {
+      hipcub = found->second.hipName.str() + "::";
+    } else {
+      hipcub = sHipcub + "::";
+    }
+    size_t pos = kern.find(cub);
+    while (pos != std::string::npos) {
+      kern.replace(pos, cub.size(), hipcub);
+      pos = kern.find(cub, pos + hipcub.size());
+    }
+  }
+  OS << kern;
  if (caleeDecl->isTemplateInstantiation()) OS << ")";
  OS << ", ";
  // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero.
@@ -55,10 +55,9 @@ int main() {
    // CHECK: hiprandDestroyGenerator(gen);
    curandDestroyGenerator(gen);
    // Sort data
-    // TODO: Substitution of cub namespace in CUDAKernelCallExpr
-    // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu);
+    // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, hipcub::BLOCK_LOAD_TRANSPOSE, hipcub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu);
    sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE><<<1000, 512>>>(d_gpu, result_gpu);
-    // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu);
+    // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, hipcub::BLOCK_LOAD_DIRECT, hipcub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu);
    sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT><<<1000, 256>>>(d_gpu, result_gpu);
    // CHECK: hipMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), hipMemcpyDeviceToHost);
    cudaMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), cudaMemcpyDeviceToHost);