[HIPIFY] Return to wrapping with HIP_KERNEL_NAME(...) macro of a template instantiation kernel launch

[REASON]

1. hip-clang is fine with the templated kernel launch, brackets are unneeded: HIP_KERNEL_NAME(...) __VA_ARGS__
2. HCC is not, thus: HIP_KERNEL_NAME(...) (__VA_ARGS__)

[TODO] Clean-up entirely kernel name wrapping when HCC is finally obsolete.

+ Update perl generation, hipify-perl, and affected tests accordingly.
Este commit está contenido en:
Evgeny Mankov
2019-10-02 16:01:07 +03:00
padre 108992428d
commit 6bb9913e8a
Se han modificado 7 ficheros con 28 adiciones y 27 borrados
+6 -6
Ver fichero
@@ -1610,32 +1610,32 @@ sub transformKernelLaunch {
my $k = 0;
# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6)/g;
# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;
# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6, /g;
# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;
# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0)/g;
# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;
# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0, /g;
# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;
# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0)/g;
# Handle the kern<<<Dg, Db>>>() syntax with empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;
# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0, /g;
# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;
+6 -6
Ver fichero
@@ -197,32 +197,32 @@ namespace perl {
*streamPtr.get() << tab << sMy_k << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>() syntax with empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;" << std::endl << std::endl;
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << std::endl;
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;" << std::endl << std::endl;
+2 -1
Ver fichero
@@ -38,6 +38,7 @@ namespace mat = clang::ast_matchers;
const std::string sHIP_DYNAMIC_SHARED = "HIP_DYNAMIC_SHARED";
std::string sHIP_SYMBOL = "HIP_SYMBOL";
std::string sHIP_KERNEL_NAME = "HIP_KERNEL_NAME";
std::string s_reinterpret_cast = "reinterpret_cast<const void*>";
const std::string sHipLaunchKernelGGL = "hipLaunchKernelGGL(";
const std::string sDim3 = "dim3(";
@@ -348,7 +349,7 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc
clang::LangOptions DefaultLangOptions;
clang::SourceManager* SM = Result.SourceManager;
OS << sHipLaunchKernelGGL;
if (caleeDecl->isTemplateInstantiation()) OS << "(";
if (caleeDecl->isTemplateInstantiation()) OS << sHIP_KERNEL_NAME << "(";
OS << readSourceText(*SM, calleeExpr->getSourceRange());
if (caleeDecl->isTemplateInstantiation()) OS << ")";
OS << ", ";
+1 -1
Ver fichero
@@ -257,7 +257,7 @@ void runTest() {
// CHECK: hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice);
cudaMemcpy(dOData, hOData, memSize, cudaMemcpyHostToDevice);
// Execute the kernel
// CHECK: hipLaunchKernelGGL((testKernel), dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(testKernel), dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
testKernel<<<numBlocks, numThreads>>>(dOData);
// Copy result from device to host
// CHECK: hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost);
@@ -121,7 +121,7 @@ struct runner
const size_t states_size = blocks * threads;
// CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(GeneratorState)));
CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(GeneratorState)));
// CHECK: hipLaunchKernelGGL((init_kernel), dim3(blocks), dim3(threads), 0, 0, states, seed, offset);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks), dim3(threads), 0, 0, states, seed, offset);
init_kernel<<<blocks, threads>>>(states, seed, offset);
// CHECK: CUDA_CALL(hipPeekAtLastError());
// CHECK: CUDA_CALL(hipDeviceSynchronize());
@@ -142,7 +142,7 @@ struct runner
const GenerateFunc& generate_func,
const Extra extra)
{
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra);
generate_kernel<<<blocks, threads>>>(states, data, size, generate_func, extra);
}
};
@@ -223,7 +223,7 @@ struct runner<curandStateMtgp32_t>
const GenerateFunc& generate_func,
const Extra extra)
{
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra);
generate_kernel<<<std::min((size_t)200, blocks), 256>>>(states, data, size, generate_func, extra);
}
};
@@ -304,7 +304,7 @@ struct runner<curandStateSobol32_t>
CUDA_CALL(cudaMemcpy(directions, h_directions, size, cudaMemcpyHostToDevice));
const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions);
// CHECK: hipLaunchKernelGGL((init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset);
init_kernel<<<dim3(blocks_x, dimensions), threads>>>(states, directions, offset);
// CHECK: CUDA_CALL(hipPeekAtLastError());
// CHECK: CUDA_CALL(hipDeviceSynchronize());
@@ -329,7 +329,7 @@ struct runner<curandStateSobol32_t>
const Extra extra)
{
const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions);
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra);
generate_kernel<<<dim3(blocks_x, dimensions), threads>>>(states, data, size / dimensions, generate_func, extra);
}
};
+7 -7
Ver fichero
@@ -10,7 +10,7 @@
#define KERNEL_CALL_AS_MACRO axpy<float><<<1, kDataLen>>>
#define KERNEL_NAME_MACRO axpy<float>
// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y)
// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y)
#define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y)
@@ -48,22 +48,22 @@ int main(int argc, char* argv[]) {
cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice);
// Launch the kernel in numerous different strange ways to exercise the prerocessor.
// CHECK: hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
axpy<<<1, kDataLen>>>(a, device_x, device_y);
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
axpy<float><<<1, kDataLen>>>(a, device_x, device_y);
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y);
axpy<float><<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y);
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
axpy<float><<<1, kDataLen>>>(ARG_LIST_AS_MACRO);
// CHECK: hipLaunchKernelGGL((KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO);
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO);
// CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0);
+1 -1
Ver fichero
@@ -94,7 +94,7 @@ int main(int argc, char *argv[])
const unsigned threadsPerBlock = 256;
printf ("info: launch 'vector_square' kernel\n");
// CHECK: hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
printf ("info: copy Device2Host\n");