[HIPIFY] Return to wrapping with HIP_KERNEL_NAME(...) macro of a template instantiation kernel launch
[REASON] 1. hip-clang is fine with the templated kernel launch, brackets are unneeded: HIP_KERNEL_NAME(...) __VA_ARGS__ 2. HCC is not, thus: HIP_KERNEL_NAME(...) (__VA_ARGS__) [TODO] Clean-up entirely kernel name wrapping when HCC is finally obsolete. + Update perl generation, hipify-perl, and affected tests accordingly.
Este commit está contenido en:
+6
-6
@@ -1610,32 +1610,32 @@ sub transformKernelLaunch {
|
||||
my $k = 0;
|
||||
|
||||
# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6)/g;
|
||||
# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;
|
||||
|
||||
# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6, /g;
|
||||
# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;
|
||||
|
||||
# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0)/g;
|
||||
# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;
|
||||
|
||||
# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0, /g;
|
||||
# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;
|
||||
|
||||
# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0)/g;
|
||||
# Handle the kern<<<Dg, Db>>>() syntax with empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\((\s*)\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;
|
||||
|
||||
# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;
|
||||
$k += s/(\w+)\s*<(.+)>\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0, /g;
|
||||
# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:
|
||||
$k += s/(\w+)\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>(\s*)\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;
|
||||
|
||||
|
||||
@@ -197,32 +197,32 @@ namespace perl {
|
||||
*streamPtr.get() << tab << sMy_k << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns, S>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db, Ns>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>() syntax with empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;" << std::endl << std::endl;
|
||||
|
||||
*streamPtr.get() << tab << "# Handle the kern<...><<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << std::endl;
|
||||
*streamPtr.get() << tab << "# Handle the kern<<<Dg, Db>>>(...) syntax with non-empty args:" << std::endl;
|
||||
*streamPtr.get() << tab << "$k += s/(\\w+)\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;" << std::endl << std::endl;
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ namespace mat = clang::ast_matchers;
|
||||
|
||||
const std::string sHIP_DYNAMIC_SHARED = "HIP_DYNAMIC_SHARED";
|
||||
std::string sHIP_SYMBOL = "HIP_SYMBOL";
|
||||
std::string sHIP_KERNEL_NAME = "HIP_KERNEL_NAME";
|
||||
std::string s_reinterpret_cast = "reinterpret_cast<const void*>";
|
||||
const std::string sHipLaunchKernelGGL = "hipLaunchKernelGGL(";
|
||||
const std::string sDim3 = "dim3(";
|
||||
@@ -348,7 +349,7 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc
|
||||
clang::LangOptions DefaultLangOptions;
|
||||
clang::SourceManager* SM = Result.SourceManager;
|
||||
OS << sHipLaunchKernelGGL;
|
||||
if (caleeDecl->isTemplateInstantiation()) OS << "(";
|
||||
if (caleeDecl->isTemplateInstantiation()) OS << sHIP_KERNEL_NAME << "(";
|
||||
OS << readSourceText(*SM, calleeExpr->getSourceRange());
|
||||
if (caleeDecl->isTemplateInstantiation()) OS << ")";
|
||||
OS << ", ";
|
||||
|
||||
@@ -257,7 +257,7 @@ void runTest() {
|
||||
// CHECK: hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice);
|
||||
cudaMemcpy(dOData, hOData, memSize, cudaMemcpyHostToDevice);
|
||||
// Execute the kernel
|
||||
// CHECK: hipLaunchKernelGGL((testKernel), dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(testKernel), dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
|
||||
testKernel<<<numBlocks, numThreads>>>(dOData);
|
||||
// Copy result from device to host
|
||||
// CHECK: hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost);
|
||||
|
||||
@@ -121,7 +121,7 @@ struct runner
|
||||
const size_t states_size = blocks * threads;
|
||||
// CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(GeneratorState)));
|
||||
CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(GeneratorState)));
|
||||
// CHECK: hipLaunchKernelGGL((init_kernel), dim3(blocks), dim3(threads), 0, 0, states, seed, offset);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks), dim3(threads), 0, 0, states, seed, offset);
|
||||
init_kernel<<<blocks, threads>>>(states, seed, offset);
|
||||
// CHECK: CUDA_CALL(hipPeekAtLastError());
|
||||
// CHECK: CUDA_CALL(hipDeviceSynchronize());
|
||||
@@ -142,7 +142,7 @@ struct runner
|
||||
const GenerateFunc& generate_func,
|
||||
const Extra extra)
|
||||
{
|
||||
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra);
|
||||
generate_kernel<<<blocks, threads>>>(states, data, size, generate_func, extra);
|
||||
}
|
||||
};
|
||||
@@ -223,7 +223,7 @@ struct runner<curandStateMtgp32_t>
|
||||
const GenerateFunc& generate_func,
|
||||
const Extra extra)
|
||||
{
|
||||
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra);
|
||||
generate_kernel<<<std::min((size_t)200, blocks), 256>>>(states, data, size, generate_func, extra);
|
||||
}
|
||||
};
|
||||
@@ -304,7 +304,7 @@ struct runner<curandStateSobol32_t>
|
||||
CUDA_CALL(cudaMemcpy(directions, h_directions, size, cudaMemcpyHostToDevice));
|
||||
|
||||
const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions);
|
||||
// CHECK: hipLaunchKernelGGL((init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset);
|
||||
init_kernel<<<dim3(blocks_x, dimensions), threads>>>(states, directions, offset);
|
||||
// CHECK: CUDA_CALL(hipPeekAtLastError());
|
||||
// CHECK: CUDA_CALL(hipDeviceSynchronize());
|
||||
@@ -329,7 +329,7 @@ struct runner<curandStateSobol32_t>
|
||||
const Extra extra)
|
||||
{
|
||||
const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions);
|
||||
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra);
|
||||
generate_kernel<<<dim3(blocks_x, dimensions), threads>>>(states, data, size / dimensions, generate_func, extra);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#define KERNEL_CALL_AS_MACRO axpy<float><<<1, kDataLen>>>
|
||||
#define KERNEL_NAME_MACRO axpy<float>
|
||||
|
||||
// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y)
|
||||
// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y)
|
||||
#define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y)
|
||||
|
||||
|
||||
@@ -48,22 +48,22 @@ int main(int argc, char* argv[]) {
|
||||
cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch the kernel in numerous different strange ways to exercise the prerocessor.
|
||||
// CHECK: hipLaunchKernelGGL((axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
|
||||
axpy<<<1, kDataLen>>>(a, device_x, device_y);
|
||||
|
||||
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);
|
||||
axpy<float><<<1, kDataLen>>>(a, device_x, device_y);
|
||||
|
||||
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y);
|
||||
axpy<float><<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y);
|
||||
|
||||
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
|
||||
axpy<float><<<1, kDataLen>>>(ARG_LIST_AS_MACRO);
|
||||
|
||||
// CHECK: hipLaunchKernelGGL((KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
|
||||
KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO);
|
||||
|
||||
// CHECK: hipLaunchKernelGGL((axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy<float>), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO);
|
||||
KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO);
|
||||
|
||||
// CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0);
|
||||
|
||||
@@ -94,7 +94,7 @@ int main(int argc, char *argv[])
|
||||
const unsigned threadsPerBlock = 256;
|
||||
|
||||
printf ("info: launch 'vector_square' kernel\n");
|
||||
// CHECK: hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
// CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
vector_square <<<blocks, threadsPerBlock>>> (C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
|
||||
Referencia en una nueva incidencia
Block a user