From 68cc78778192581c9f99a142d4a017aca0a97caa Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Wed, 4 Dec 2019 01:20:51 -0500 Subject: [PATCH] [hip] refactoring hipExtLaunchMultiKernelMultiDevice API (#1698) [Background] it was found that if lazy linking used for a library that calls hipExtLaunchMultiKernelMultiDevice API then this API can get the wrong program_state object for looking up device kernels leading to a "No device code available" error in this API. To fix this issue, the API was refactored to be inline and get and pass the correct program_state to an internal hip API to request a multi-device kernel launch. --- .../hip/hcc_detail/functional_grid_launch.hpp | 14 +++++++++++++ include/hip/hcc_detail/hip_runtime_api.h | 4 ++++ src/hip_module.cpp | 21 ++++++++++--------- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index c9953c27d4..e7d8cc623f 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -34,6 +34,9 @@ THE SOFTWARE. #include #include +hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, + unsigned int flags, hip_impl::program_state& ps); + #pragma GCC visibility push(hidden) namespace hip_impl { @@ -176,4 +179,15 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, numBlocks, dimBlocks, sharedMemBytes, stream, &config[0]); } + +inline +__attribute__((visibility("hidden"))) +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + hip_impl::hip_init(); + auto& ps = hip_impl::get_program_state(); + return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps); + +} + #pragma GCC visibility pop diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 48cec81da4..d25d0f3425 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -2960,6 +2960,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +#if defined(__clang__) && defined(__HIP__) /** * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched * on respective streams before enqueuing any other work on the specified streams from any other threads @@ -2974,6 +2975,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags); +#endif // doxygen end Version Management /** @@ -3395,12 +3397,14 @@ inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchP return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); } +#if defined(__clang__) && defined(__HIP__) template inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, unsigned int numDevices, unsigned int flags = 0) { return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags); } +#endif /* * @brief Unbinds the textuer bound to @p tex diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 498ee5bb03..552ccfccac 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -311,18 +311,18 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0)); } -hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags) { - HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); +__attribute__((visibility("default"))) +hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags, hip_impl::program_state& ps) { hipError_t result; if ((numDevices > g_deviceCnt) || (launchParamsList == nullptr)) { - return ihipLogStatus(hipErrorInvalidValue); + return hipErrorInvalidValue; } hipFunction_t* kds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); if (kds == nullptr) { - return ihipLogStatus(hipErrorNotInitialized); + return hipErrorNotInitialized; } // prepare all kernel descriptors for each device as all streams will be locked in the next loop @@ -330,15 +330,15 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, const hipLaunchParams& lp = launchParamsList[i]; if (lp.stream == nullptr) { free(kds); - return ihipLogStatus(hipErrorNotInitialized); + return hipErrorNotInitialized; } - kds[i] = hip_impl::get_program_state().kernel_descriptor(reinterpret_cast(lp.func), + kds[i] = ps.kernel_descriptor(reinterpret_cast(lp.func), hip_impl::target_agent(lp.stream)); if (kds[i] == nullptr) { free(kds); - return ihipLogStatus(hipErrorInvalidValue); + return hipErrorInvalidValue; } - hip_impl::kernargs_size_align kargs = hip_impl::get_program_state().get_kernargs_size_align( + hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align( reinterpret_cast(lp.func)); kds[i]->_kernarg_layout = *reinterpret_cast>*>( kargs.getHandle()); @@ -352,6 +352,7 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, #endif } + GET_TLS(); // launch kernels for each device for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; @@ -377,7 +378,7 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, free(kds); - return ihipLogStatus(result); + return result; } namespace {