diff --git a/bin/hipify-perl b/bin/hipify-perl index 00ff620983..50808563ff 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1662,18 +1662,37 @@ while (@ARGV) { # match uses ? for <.*> which will be unitialized if this is not present in launch syntax. no warnings qw/uninitialized/; my $k = 0; - # Handle the <>> syntax with empty args: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1$2, dim3($3), dim3($4), $5, $6)/g; - # Handle the <>> syntax with non-empty args: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1$2, dim3($3), dim3($4), $5, $6, /g; - # Handle the <>> syntax with empty args: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1$2, dim3($3), dim3($4), $5, 0)/g; - # Handle the <>> syntax with non-empty args: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1$2, dim3($3), dim3($4), $5, 0, /g; - # Handle the <>> syntax with empty args: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1$2, dim3($3), dim3($4), 0, 0)/g; - # Handle the <>> syntax with non-empty args: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1$2, dim3($3), dim3($4), 0, 0, /g; + + # Handle the kern<...><<>> syntax with empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g; + # Handle the kern<<>> syntax with empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g; + + # Handle the kern<...><<>> syntax with non-empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g; + # Handle the kern<<>> syntax with non-empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g; + + # Handle the kern<...><<>> syntax with empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g; + # Handle the kern<<>> syntax with empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g; + + # Handle the kern<...><<>> syntax with non-empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g; + # Handle the kern<<>> syntax with non-empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g; + + # Handle the kern<...><<>> syntax with empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g; + # Handle the kern<<>> syntax with empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g; + + # Handle the kern<...><<>> syntax with non-empty args: + $k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g; + # Handle the kern<<>> syntax with non-empty args: + $k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g; + if ($k) { $ft{'kern_launch'} += $k; $Tkernels{$1} ++; diff --git a/hipify-clang/src/HipifyAction.cpp b/hipify-clang/src/HipifyAction.cpp index 6b12aaa8c3..80d80b5f2c 100644 --- a/hipify-clang/src/HipifyAction.cpp +++ b/hipify-clang/src/HipifyAction.cpp @@ -289,24 +289,35 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc if (!launchKernel) { return false; } + const clang::Expr* calleeExpr = launchKernel->getCallee(); + if (!calleeExpr) { + return false; + } + const clang::FunctionDecl *caleeDecl = launchKernel->getDirectCallee(); + if (!caleeDecl) { + return false; + } + const clang::CallExpr* config = launchKernel->getConfig(); + if (!config) { + return false; + } clang::SmallString<40> XStr; llvm::raw_svector_ostream OS(XStr); clang::LangOptions DefaultLangOptions; clang::SourceManager* SM = Result.SourceManager; - - const clang::Expr& calleeExpr = *(launchKernel->getCallee()); - OS << "hipLaunchKernelGGL(" << readSourceText(*SM, calleeExpr.getSourceRange()) << ", "; + OS << "hipLaunchKernelGGL("; + if (caleeDecl->isTemplateInstantiation()) OS << "("; + OS << readSourceText(*SM, calleeExpr->getSourceRange()); + if (caleeDecl->isTemplateInstantiation()) OS << ")"; + OS << ", "; // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero. - const clang::CallExpr& config = *(launchKernel->getConfig()); - // Copy the two dimensional arguments verbatim. - OS << "dim3(" << readSourceText(*SM, config.getArg(0)->getSourceRange()) << "), "; - OS << "dim3(" << readSourceText(*SM, config.getArg(1)->getSourceRange()) << "), "; - + OS << "dim3(" << readSourceText(*SM, config->getArg(0)->getSourceRange()) << "), "; + OS << "dim3(" << readSourceText(*SM, config->getArg(1)->getSourceRange()) << "), "; // The stream/memory arguments default to zero if omitted. - OS << stringifyZeroDefaultedArg(*SM, config.getArg(2)) << ", "; - OS << stringifyZeroDefaultedArg(*SM, config.getArg(3)); + OS << stringifyZeroDefaultedArg(*SM, config->getArg(2)) << ", "; + OS << stringifyZeroDefaultedArg(*SM, config->getArg(3)); // If there are ordinary arguments to the kernel, just copy them verbatim into our new call. int numArgs = launchKernel->getNumArgs(); diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp b/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp index b1cdec702a..74bf4b5a0d 100644 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp +++ b/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp @@ -121,7 +121,7 @@ struct runner const size_t states_size = blocks * threads; // CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(GeneratorState))); CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(GeneratorState))); - // CHECK: hipLaunchKernelGGL(init_kernel, dim3(blocks), dim3(threads), 0, 0, states, seed, offset); + // CHECK: hipLaunchKernelGGL((init_kernel), dim3(blocks), dim3(threads), 0, 0, states, seed, offset); init_kernel<<>>(states, seed, offset); // CHECK: CUDA_CALL(hipPeekAtLastError()); // CHECK: CUDA_CALL(hipDeviceSynchronize()); @@ -142,7 +142,7 @@ struct runner const GenerateFunc& generate_func, const Extra extra) { - // CHECK: hipLaunchKernelGGL(generate_kernel, dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra); + // CHECK: hipLaunchKernelGGL((generate_kernel), dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra); generate_kernel<<>>(states, data, size, generate_func, extra); } }; @@ -223,7 +223,7 @@ struct runner const GenerateFunc& generate_func, const Extra extra) { - // CHECK: hipLaunchKernelGGL(generate_kernel, dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra); + // CHECK: hipLaunchKernelGGL((generate_kernel), dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra); generate_kernel<<>>(states, data, size, generate_func, extra); } }; @@ -304,7 +304,7 @@ struct runner CUDA_CALL(cudaMemcpy(directions, h_directions, size, cudaMemcpyHostToDevice)); const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL(init_kernel, dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, directions, offset); + // CHECK: hipLaunchKernelGGL((init_kernel), dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, directions, offset); init_kernel<<>>(states, directions, offset); // CHECK: CUDA_CALL(hipPeekAtLastError()); // CHECK: CUDA_CALL(hipDeviceSynchronize()); @@ -329,7 +329,7 @@ struct runner const Extra extra) { const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL(generate_kernel, dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra); + // CHECK: hipLaunchKernelGGL((generate_kernel), dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra); generate_kernel<<>>(states, data, size / dimensions, generate_func, extra); } }; diff --git a/tests/hipify-clang/unit_tests/samples/axpy.cu b/tests/hipify-clang/unit_tests/samples/axpy.cu index b315715ff9..549dc442aa 100644 --- a/tests/hipify-clang/unit_tests/samples/axpy.cu +++ b/tests/hipify-clang/unit_tests/samples/axpy.cu @@ -10,7 +10,7 @@ #define KERNEL_CALL_AS_MACRO axpy<<<1, kDataLen>>> #define KERNEL_NAME_MACRO axpy -// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y) +// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y) #define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y) @@ -48,22 +48,22 @@ int main(int argc, char* argv[]) { cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); // Launch the kernel in numerous different strange ways to exercise the prerocessor. - // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); + // CHECK: hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); axpy<<<1, kDataLen>>>(a, device_x, device_y); - // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); + // CHECK: hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); axpy<<<1, kDataLen>>>(a, device_x, device_y); - // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y); + // CHECK: hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y); axpy<<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y); - // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); + // CHECK: hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); axpy<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); - // CHECK: hipLaunchKernelGGL(KERNEL_NAME_MACRO, dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); + // CHECK: hipLaunchKernelGGL((KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); - // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); + // CHECK: hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO); // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); diff --git a/tests/hipify-clang/unit_tests/samples/square.cu b/tests/hipify-clang/unit_tests/samples/square.cu index 4609fd37e4..690d99f848 100644 --- a/tests/hipify-clang/unit_tests/samples/square.cu +++ b/tests/hipify-clang/unit_tests/samples/square.cu @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) const unsigned threadsPerBlock = 256; printf ("info: launch 'vector_square' kernel\n"); - // CHECK: hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N); + // CHECK: hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N); vector_square <<>> (C_d, A_d, N); printf ("info: copy Device2Host\n");