[HIPIFY][CUB][#1460] Add cub:: namespace support in TemplateInstantiation of cudaLaunchKernel

+ Update cub_02.cu test accordingly
Этот коммит содержится в:
Evgeny Mankov
2019-10-16 19:02:13 +03:00
родитель e805e7d8cb
Коммит edfd05a86d
2 изменённых файлов: 22 добавлений и 5 удалений
+20 -2
Просмотреть файл
@@ -36,6 +36,7 @@ THE SOFTWARE.
const std::string sHIP = "HIP";
const std::string sROC = "ROC";
const std::string sCub = "cub";
const std::string sHipcub = "hipcub";
const std::string sHIP_DYNAMIC_SHARED = "HIP_DYNAMIC_SHARED";
const std::string sHIP_KERNEL_NAME = "HIP_KERNEL_NAME";
std::string sHIP_SYMBOL = "HIP_SYMBOL";
@@ -345,9 +346,26 @@ bool HipifyAction::cudaLaunchKernel(const mat::MatchFinder::MatchResult &Result)
llvm::raw_svector_ostream OS(XStr);
clang::LangOptions DefaultLangOptions;
auto *SM = Result.SourceManager;
clang::SourceRange sr = calleeExpr->getSourceRange();
std::string kern = readSourceText(*SM, sr).str();
OS << sHipLaunchKernelGGL << "(";
if (caleeDecl->isTemplateInstantiation()) OS << sHIP_KERNEL_NAME << "(";
OS << readSourceText(*SM, calleeExpr->getSourceRange());
if (caleeDecl->isTemplateInstantiation()) {
OS << sHIP_KERNEL_NAME << "(";
std::string cub = sCub + "::";
std::string hipcub;
const auto found = CUDA_CUB_TYPE_NAME_MAP.find(sCub);
if (found != CUDA_CUB_TYPE_NAME_MAP.end()) {
hipcub = found->second.hipName.str() + "::";
} else {
hipcub = sHipcub + "::";
}
size_t pos = kern.find(cub);
while (pos != std::string::npos) {
kern.replace(pos, cub.size(), hipcub);
pos = kern.find(cub, pos + hipcub.size());
}
}
OS << kern;
if (caleeDecl->isTemplateInstantiation()) OS << ")";
OS << ", ";
// Next up are the four kernel configuration parameters, the last two of which are optional and default to zero.
+2 -3
Просмотреть файл
@@ -55,10 +55,9 @@ int main() {
// CHECK: hiprandDestroyGenerator(gen);
curandDestroyGenerator(gen);
// Sort data
// TODO: Substitution of cub namespace in CUDAKernelCallExpr
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, hipcub::BLOCK_LOAD_TRANSPOSE, hipcub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu);
sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE><<<1000, 512>>>(d_gpu, result_gpu);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, hipcub::BLOCK_LOAD_DIRECT, hipcub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu);
sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT><<<1000, 256>>>(d_gpu, result_gpu);
// CHECK: hipMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), hipMemcpyDeviceToHost);
cudaMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), cudaMemcpyDeviceToHost);