From 214ec53da3f670caaeea06fbefda4a6594918958 Mon Sep 17 00:00:00 2001 From: cdevadas Date: Thu, 2 May 2019 16:10:07 +0530 Subject: [PATCH] Runtime changes to append implicit kernel arguments. Appended 48 empty bytes to the kernarg area at runtime. The implicit arguments are enabled primarily for the hostcall services and it is completely abstracted from the user code. Enabled it for both hip-clang and hip-hcc. --- hipamd/src/hip_module.cpp | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index e8a8801e98..91544b82a8 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -55,6 +55,10 @@ THE SOFTWARE. using namespace ELFIO; using namespace std; +// For HIP implicit kernargs. +static const size_t HIP_IMPLICIT_KERNARG_SIZE = 48; +static const size_t HIP_IMPLICIT_KERNARG_ALIGNMENT = 8; + // calculate MD5 checksum inline std::string checksum(size_t size, const char *source) { // FNV-1a hashing, 64-bit version @@ -146,33 +150,28 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, ihipDevice_t* currentDevice = ihipGetDevice(deviceId); hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; - void* config[5] = {0}; - size_t kernArgSize; - - std::vector tmp{}; + std::vector kernargs{}; if (kernelParams) { if (extra) return hipErrorInvalidValue; for (auto&& x : f->_kernarg_layout) { const auto p{static_cast(*kernelParams)}; - tmp.insert( - tmp.cend(), + kernargs.insert( + kernargs.cend(), round_up_to_next_multiple_nonnegative( - tmp.size(), x.second) - tmp.size(), + kernargs.size(), x.second) - kernargs.size(), '\0'); - tmp.insert(tmp.cend(), p, p + x.first); + kernargs.insert(kernargs.cend(), p, p + x.first); ++kernelParams; } - config[1] = static_cast(tmp.data()); - - kernArgSize = tmp.size(); } else if (extra) { - memcpy(config, extra, sizeof(size_t) * 5); - if (config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && - config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END) { - kernArgSize = *(size_t*)(config[3]); + if (extra[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && + extra[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && extra[4] == HIP_LAUNCH_PARAM_END) { + auto args = (char*)extra[1]; + size_t argSize = *(size_t*)(extra[3]); + kernargs.insert(kernargs.end(), args, args+argSize); } else { return hipErrorNotInitialized; } @@ -181,6 +180,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, return hipErrorInvalidValue; } + // Insert 48-bytes at the end for implicit kernel arguments and fill with value zero. + size_t padSize = (~kernargs.size() + 1) & (HIP_IMPLICIT_KERNARG_ALIGNMENT - 1); + kernargs.insert(kernargs.end(), padSize + HIP_IMPLICIT_KERNARG_SIZE, 0); /* Kernel argument preparation. @@ -230,7 +232,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hc::completion_future cf; - lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize, + lp.av->dispatch_hsa_kernel(&aql, kernargs.data(), kernargs.size(), (startEvent || stopEvent) ? &cf : nullptr #if (__hcc_workweek__ > 17312) ,