Fix C-style hipLaunchKernel (#1835)

* Fix bug in LaunchKernel test
Instead of passing the address of the gpu buffer, pass the address
of the pointer that holds the address of the gpu buffer

* Fix hipLaunchKernel's kernarg buffer construction.
The hipLaunchKernel implementation should rely on ihipModuleLaunchKernel
to construct the kernarg buffer correctly based on kernel metadata.

* Fix a bug in get_functions where the Kernel_descriptor wasn't constructed with the correct kernarg layout information.

* Fix a bug in kernarg layout parsing dealing with kernel without any arg

* teach ihipModuleLaunchKernel to handle kernel without any arg

* Add a more interesting test
This commit is contained in:
Siu Chi Chan
2020-02-04 09:07:16 -05:00
کامیت شده توسط GitHub
والد 6e62ea5ee3
کامیت 14e235378f
5فایلهای تغییر یافته به همراه73 افزوده شده و 33 حذف شده
+5 -18
مشاهده پرونده
@@ -188,7 +188,8 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t gridSi
return hipErrorNotInitialized;
}
} else {
}
else if (f->_kernarg_layout.size() != 0) {
return hipErrorInvalidValue;
}
@@ -1470,21 +1471,7 @@ hipError_t hipLaunchKernel(
hipFunction_t kd = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)func_addr,
hip_impl::target_agent(stream));
if(kd == nullptr || kd->_header == nullptr)
return ihipLogStatus(hipErrorInvalidValue);
size_t szKernArg = kd->_header->kernarg_segment_byte_size;
if(args == NULL && szKernArg != 0)
return ihipLogStatus(hipErrorInvalidValue);
void* config[]{
HIP_LAUNCH_PARAM_BUFFER_POINTER,
args,
HIP_LAUNCH_PARAM_BUFFER_SIZE,
&szKernArg,
HIP_LAUNCH_PARAM_END};
return ihipLogStatus(ihipModuleLaunchKernel(tls, kd, numBlocks.x, numBlocks.y, numBlocks.z,
dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes, stream, nullptr, (void**)&config, nullptr, nullptr, 0));
return hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z,
dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
stream, args, nullptr);
}
+5 -3
مشاهده پرونده
@@ -613,7 +613,8 @@ public:
for (auto&& kernel_symbol : it->second) {
functions[aa].second.emplace(
function.first,
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
Kernel_descriptor{kernel_object(kernel_symbol), it->first,
kernargs_size_align(function.first)});
}
}
}, agent);
@@ -672,11 +673,12 @@ public:
auto dx1 = kernels_md.find("CodeProps", dx);
dx = kernels_md.find("Args:", dx);
if (dx1 < dx) {
if (dx1 < dx || dx == std::string::npos) {
dx = dx1;
// create an empty kernarg laybout vector for kernels without any arg
kernargs[fn];
continue;
}
if (dx == std::string::npos) break;
static constexpr decltype(kernels_md.size()) args_sz{5};
dx = parse_args_v2(kernels_md, dx + args_sz, dx1, kernargs[fn]);
@@ -36,7 +36,7 @@ bool LaunchKernelArg()
dim3 blocks = {1,1,1};
dim3 threads = {1,1,1};
HIPCHECK(hipLaunchKernel(kernel, blocks, threads,NULL, 0, 0));
HIPCHECK(hipLaunchKernel(kernel, blocks, threads, NULL, 0, 0));
return true;
}
@@ -50,9 +50,9 @@ bool LaunchKernelArg1()
// Allocate Device memory
HIPCHECK(hipMalloc((void**)&A_d, sizeof(int)));
void* Args[]={A_d};
HIPCHECK(hipLaunchKernel(kernel1, blocks, threads, Args,0,0));
void* Args[]={&A_d};
HIPCHECK(hipLaunchKernel(kernel1, blocks, threads, Args, 0, 0));
// Get the result back to host memory
HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost));
@@ -60,7 +60,7 @@ bool LaunchKernelArg1()
HIPCHECK(hipFree(A_d));
if(A != 333)
return false;
return false;
return true;
}
@@ -81,9 +81,9 @@ bool LaunchKernelArg2()
HIPCHECK(hipMalloc((void**)&B_d, sizeof(int)));
// Copy data from host memory to device memory
HIPCHECK(hipMemcpy(B_d,&B, sizeof(int), hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice));
void* Args[]={A_d,B_d};
void* Args[]={&A_d, &B_d};
HIPCHECK(hipLaunchKernel(kernel2, blocks, threads, Args,0,0));
// Get the result back to host memory
@@ -118,11 +118,11 @@ bool LaunchKernelArg3()
HIPCHECK(hipMalloc((void**)&C_d, sizeof(int)));
// Copy data from host memory to device memory
HIPCHECK(hipMemcpy(A_d,&A, sizeof(int), hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(A_d, &A, sizeof(int), hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(B_d,&B, sizeof(int), hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice));
void* Args[]={A_d,B_d,C_d};
void* Args[]={&A_d, &B_d, &C_d};
HIPCHECK(hipLaunchKernel(kernel3, blocks, threads, Args,0,0));
// Get the result back to host memory
@@ -138,14 +138,43 @@ bool LaunchKernelArg3()
return true;
}
bool LaunchKernelArg4()
{
int A = 0;
int *A_d = NULL;
dim3 blocks = {1,1,1};
dim3 threads = {1,1,1};
// Allocate Device memory
HIPCHECK(hipMalloc((void**)&A_d, sizeof(int)));
char c = 1;
short s = 10;
int i = 100;
struct things t = {2,20,200};
void* Args[]={&A_d, &c, &s, &i, &t};
HIPCHECK(hipLaunchKernel(kernel4, blocks, threads, Args, 0, 0));
// Get the result back to host memory
HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost));
HIPCHECK(hipFree(A_d));
if (A != (c + s + i + t.c + t.s + t.i))
return false;
return true;
}
int main()
{
if( LaunchKernelArg() &&
LaunchKernelArg1() &&
LaunchKernelArg2() &&
LaunchKernelArg3())
LaunchKernelArg3() &&
LaunchKernelArg4())
{
printf("PASSED!\n");
}
@@ -17,7 +17,23 @@
* THE SOFTWARE.
*/
#ifdef __cplusplus
extern "C"
{
#endif
extern __global__ void kernel();
extern __global__ void kernel1(int*);
extern __global__ void kernel2(int*,int*);
extern __global__ void kernel3(int*,int*,int*);
struct things {
char c;
short s;
int i;
};
extern __global__ void kernel4(int*, char, short, int, struct things);
#ifdef __cplusplus
}
#endif
@@ -19,6 +19,7 @@
#include<hip/hip_runtime.h>
#include "LaunchKernel.h"
extern "C"
{
@@ -43,4 +44,9 @@ __global__ void kernel3(int *a, int*b, int* c)
*c = *a+*b;
}
__global__ void kernel4(int *a, char c, short s, int i, struct things t)
{
*a = c + s + i + t.c + t.s + t.i;
}
}//extern "C"