From 14e235378faaee8ab5eee7ff832f7c8a9f02b439 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Tue, 4 Feb 2020 09:07:16 -0500 Subject: [PATCH] Fix C-style hipLaunchKernel (#1835) * Fix bug in LaunchKernel test Instead of passing the address of the gpu buffer, pass the address of the pointer that holds the address of the gpu buffer * Fix hipLaunchKernel's kernarg buffer construction. The hipLaunchKernel implementation should rely on ihipModuleLaunchKernel to construct the kernarg buffer correctly based on kernel metadata. * Fix a bug in get_functions where the Kernel_descriptor wasn't constructed with the correct kernarg layout information. * Fix a bug in kernarg layout parsing dealing with kernel without any arg * teach ihipModuleLaunchKernel to handle kernel without any arg * Add a more interesting test --- hipamd/src/hip_module.cpp | 23 +++---------- hipamd/src/program_state.inl | 8 +++-- hipamd/tests/src/gcc/LaunchKernel.c | 53 ++++++++++++++++++++++------- hipamd/tests/src/gcc/LaunchKernel.h | 16 +++++++++ hipamd/tests/src/gcc/gpu.cpp | 6 ++++ 5 files changed, 73 insertions(+), 33 deletions(-) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index 415b93e457..7d4c97a7fd 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -188,7 +188,8 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t gridSi return hipErrorNotInitialized; } - } else { + } + else if (f->_kernarg_layout.size() != 0) { return hipErrorInvalidValue; } @@ -1470,21 +1471,7 @@ hipError_t hipLaunchKernel( hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)func_addr, hip_impl::target_agent(stream)); - if(kd == nullptr || kd->_header == nullptr) - return ihipLogStatus(hipErrorInvalidValue); - - size_t szKernArg = kd->_header->kernarg_segment_byte_size; - - if(args == NULL && szKernArg != 0) - return ihipLogStatus(hipErrorInvalidValue); - - void* config[]{ - HIP_LAUNCH_PARAM_BUFFER_POINTER, - args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &szKernArg, - HIP_LAUNCH_PARAM_END}; - - return ihipLogStatus(ihipModuleLaunchKernel(tls, kd, numBlocks.x, numBlocks.y, numBlocks.z, - dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, stream, nullptr, (void**)&config, nullptr, nullptr, 0)); + return hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z, + dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, + stream, args, nullptr); } diff --git a/hipamd/src/program_state.inl b/hipamd/src/program_state.inl index fcc1eb762a..272addd053 100644 --- a/hipamd/src/program_state.inl +++ b/hipamd/src/program_state.inl @@ -613,7 +613,8 @@ public: for (auto&& kernel_symbol : it->second) { functions[aa].second.emplace( function.first, - Kernel_descriptor{kernel_object(kernel_symbol), it->first}); + Kernel_descriptor{kernel_object(kernel_symbol), it->first, + kernargs_size_align(function.first)}); } } }, agent); @@ -672,11 +673,12 @@ public: auto dx1 = kernels_md.find("CodeProps", dx); dx = kernels_md.find("Args:", dx); - if (dx1 < dx) { + if (dx1 < dx || dx == std::string::npos) { dx = dx1; + // create an empty kernarg laybout vector for kernels without any arg + kernargs[fn]; continue; } - if (dx == std::string::npos) break; static constexpr decltype(kernels_md.size()) args_sz{5}; dx = parse_args_v2(kernels_md, dx + args_sz, dx1, kernargs[fn]); diff --git a/hipamd/tests/src/gcc/LaunchKernel.c b/hipamd/tests/src/gcc/LaunchKernel.c index 189d3ce614..fc59fa9d30 100644 --- a/hipamd/tests/src/gcc/LaunchKernel.c +++ b/hipamd/tests/src/gcc/LaunchKernel.c @@ -36,7 +36,7 @@ bool LaunchKernelArg() dim3 blocks = {1,1,1}; dim3 threads = {1,1,1}; - HIPCHECK(hipLaunchKernel(kernel, blocks, threads,NULL, 0, 0)); + HIPCHECK(hipLaunchKernel(kernel, blocks, threads, NULL, 0, 0)); return true; } @@ -50,9 +50,9 @@ bool LaunchKernelArg1() // Allocate Device memory HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); - - void* Args[]={A_d}; - HIPCHECK(hipLaunchKernel(kernel1, blocks, threads, Args,0,0)); + + void* Args[]={&A_d}; + HIPCHECK(hipLaunchKernel(kernel1, blocks, threads, Args, 0, 0)); // Get the result back to host memory HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); @@ -60,7 +60,7 @@ bool LaunchKernelArg1() HIPCHECK(hipFree(A_d)); if(A != 333) - return false; + return false; return true; } @@ -81,9 +81,9 @@ bool LaunchKernelArg2() HIPCHECK(hipMalloc((void**)&B_d, sizeof(int))); // Copy data from host memory to device memory - HIPCHECK(hipMemcpy(B_d,&B, sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); - void* Args[]={A_d,B_d}; + void* Args[]={&A_d, &B_d}; HIPCHECK(hipLaunchKernel(kernel2, blocks, threads, Args,0,0)); // Get the result back to host memory @@ -118,11 +118,11 @@ bool LaunchKernelArg3() HIPCHECK(hipMalloc((void**)&C_d, sizeof(int))); // Copy data from host memory to device memory - HIPCHECK(hipMemcpy(A_d,&A, sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(A_d, &A, sizeof(int), hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d,&B, sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); - void* Args[]={A_d,B_d,C_d}; + void* Args[]={&A_d, &B_d, &C_d}; HIPCHECK(hipLaunchKernel(kernel3, blocks, threads, Args,0,0)); // Get the result back to host memory @@ -138,14 +138,43 @@ bool LaunchKernelArg3() return true; } +bool LaunchKernelArg4() +{ + int A = 0; + int *A_d = NULL; + dim3 blocks = {1,1,1}; + dim3 threads = {1,1,1}; + + // Allocate Device memory + HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); + + char c = 1; + short s = 10; + int i = 100; + struct things t = {2,20,200}; + + void* Args[]={&A_d, &c, &s, &i, &t}; + HIPCHECK(hipLaunchKernel(kernel4, blocks, threads, Args, 0, 0)); + + // Get the result back to host memory + HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); + + HIPCHECK(hipFree(A_d)); + + if (A != (c + s + i + t.c + t.s + t.i)) + return false; + + return true; +} + int main() { - if( LaunchKernelArg() && LaunchKernelArg1() && LaunchKernelArg2() && - LaunchKernelArg3()) + LaunchKernelArg3() && + LaunchKernelArg4()) { printf("PASSED!\n"); } diff --git a/hipamd/tests/src/gcc/LaunchKernel.h b/hipamd/tests/src/gcc/LaunchKernel.h index b7424d8b46..e326e46635 100644 --- a/hipamd/tests/src/gcc/LaunchKernel.h +++ b/hipamd/tests/src/gcc/LaunchKernel.h @@ -17,7 +17,23 @@ * THE SOFTWARE. */ +#ifdef __cplusplus +extern "C" +{ +#endif + extern __global__ void kernel(); extern __global__ void kernel1(int*); extern __global__ void kernel2(int*,int*); extern __global__ void kernel3(int*,int*,int*); + +struct things { + char c; + short s; + int i; +}; +extern __global__ void kernel4(int*, char, short, int, struct things); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/hipamd/tests/src/gcc/gpu.cpp b/hipamd/tests/src/gcc/gpu.cpp index 00223478fa..6baeab80bf 100644 --- a/hipamd/tests/src/gcc/gpu.cpp +++ b/hipamd/tests/src/gcc/gpu.cpp @@ -19,6 +19,7 @@ #include +#include "LaunchKernel.h" extern "C" { @@ -43,4 +44,9 @@ __global__ void kernel3(int *a, int*b, int* c) *c = *a+*b; } +__global__ void kernel4(int *a, char c, short s, int i, struct things t) +{ + *a = c + s + i + t.c + t.s + t.i; +} + }//extern "C"