From 24d08beef8bb91c572c54d6bccf6d7144d21ee8a Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Wed, 20 Mar 2019 01:11:15 -0400 Subject: [PATCH] reimplement HIP_INIT as hip_impl::hip_init(), add hip_init() to some of the inlined API (#966) * reimplement HIP_INIT as a function, expose it as hip_impl::hip_init() so that it could be called from hipLaunchKernelGGL and other inlined HIP functions * Don't call hip_init from ihipPreLaunchKernel --- .../hip/hcc_detail/functional_grid_launch.hpp | 3 ++- include/hip/hcc_detail/hip_runtime_api.h | 8 ++++++-- src/hip_clang.cpp | 2 +- src/hip_fatbin.cpp | 2 +- src/hip_hcc.cpp | 18 ++++++++++-------- src/hip_hcc_internal.h | 11 ++--------- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index 29a389f642..0e541001bf 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -189,6 +189,7 @@ inline void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, std::uint32_t sharedMemBytes, hipStream_t stream, Args... args) { + hip_impl::hip_init(); auto kernarg = hip_impl::make_kernarg( kernel, std::tuple{std::move(args)...}); std::size_t kernarg_size = kernarg.size(); @@ -212,4 +213,4 @@ inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBloc std::uint32_t groupMemBytes, hipStream_t stream, Args... args) { hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{}, std::move(args)...); -} \ No newline at end of file +} diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index ea93d28bfe..3b4f8fac5c 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -78,6 +78,10 @@ THE SOFTWARE. #define __dparm(x) #endif +namespace hip_impl { +hipError_t hip_init(); +} // namespace hip_impl + // Structure definitions: #ifdef __cplusplus extern "C" { @@ -1396,7 +1400,7 @@ inline __attribute__((visibility("hidden"))) hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { //HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName); - + hip_impl::hip_init(); size_t size = 0; return hipModuleGetGlobal(devPtr, &size, 0, (const char*)symbolName); } @@ -1416,7 +1420,7 @@ inline __attribute__((visibility("hidden"))) hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) { // HIP_INIT_API(hipGetSymbolSize, size, symbolName); - + hip_impl::hip_init(); void* devPtr = nullptr; return hipModuleGetGlobal(&devPtr, size, 0, (const char*)symbolName); } diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index 0d210e2b8c..9af4f5a7a2 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. extern "C" std::vector* __hipRegisterFatBinary(const void* data) { - HIP_INIT(); + hip_impl::hip_init(); tprintf(DB_FB, "Enter __hipRegisterFatBinary(%p)\n", data); const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); diff --git a/src/hip_fatbin.cpp b/src/hip_fatbin.cpp index 8fe7740ed7..5aab0b7101 100644 --- a/src/hip_fatbin.cpp +++ b/src/hip_fatbin.cpp @@ -43,7 +43,7 @@ void __hipDumpCodeObject(const std::string& image) { const void* __hipExtractCodeObjectFromFatBinary(const void* data, const char* agent_name) { - HIP_INIT(); + hip_impl::hip_init(); tprintf(DB_FB, "Enter __hipExtractCodeObjectFromFatBinary(%p, \"%s\")\n", data, agent_name); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 3f0c4ba6b8..8a291f3ea4 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -119,9 +119,6 @@ int HCC_OPT_FLUSH = 1; int HCC_OPT_FLUSH = 0; #endif - -std::once_flag hip_initialized; - // Array of pointers to devices. ihipDevice_t** g_deviceArray; @@ -1442,6 +1439,15 @@ void ihipInit() { g_numLogicalThreads); } +namespace hip_impl { +hipError_t hip_init() { + static std::once_flag hip_initialized; + std::call_once(hip_initialized, ihipInit); + ihipCtxStackUpdate(); + return hipSuccess; +} +} + hipError_t ihipStreamSynchronize(hipStream_t stream) { hipError_t e = hipSuccess; @@ -1561,7 +1567,6 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp, // Allows runtime to track some information about the stream. hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm* lp, const char* kernelNameStr) { - HIP_INIT(); stream = ihipSyncAndResolveStream(stream); lp->grid_dim.x = grid.x; lp->grid_dim.y = grid.y; @@ -1583,7 +1588,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm* lp, const char* kernelNameStr) { - HIP_INIT(); stream = ihipSyncAndResolveStream(stream); lp->grid_dim.x = grid; lp->grid_dim.y = 1; @@ -1604,7 +1608,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm* lp, const char* kernelNameStr) { - HIP_INIT(); stream = ihipSyncAndResolveStream(stream); lp->grid_dim.x = grid.x; lp->grid_dim.y = grid.y; @@ -1625,7 +1628,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm* lp, const char* kernelNameStr) { - HIP_INIT(); stream = ihipSyncAndResolveStream(stream); lp->grid_dim.x = grid; lp->grid_dim.y = 1; @@ -2485,4 +2487,4 @@ namespace hip_impl { std::terminate(); #endif } -} // Namespace hip_impl. \ No newline at end of file +} // Namespace hip_impl. diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index be794956f6..95a65b55c9 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -288,19 +288,13 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr); #define API_TRACE(IS_CMD, ...) tls_tidInfo.incApiSeqNum(); #endif - -// Just initialize the HIP runtime, but don't log any trace information. -#define HIP_INIT() \ - std::call_once(hip_initialized, ihipInit); \ - ihipCtxStackUpdate(); #define HIP_SET_DEVICE() ihipDeviceSetState(); - // This macro should be called at the beginning of every HIP API. // It initializes the hip runtime (exactly once), and // generates a trace string that can be output to stderr or to ATP file. #define HIP_INIT_API(cid, ...) \ - HIP_INIT() \ + hip_impl::hip_init(); \ API_TRACE(0, __VA_ARGS__); \ HIP_CB_SPAWNER_OBJECT(cid); @@ -309,7 +303,7 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr); // Replace HIP_INIT_API with this call inside HIP APIs that launch work on the GPU: // kernel launches, copy commands, memory sets, etc. #define HIP_INIT_SPECIAL_API(cid, tbit, ...) \ - HIP_INIT() \ + hip_impl::hip_init(); \ API_TRACE((HIP_TRACE_API & (1 << tbit)), __VA_ARGS__); \ HIP_CB_SPAWNER_OBJECT(cid); @@ -933,7 +927,6 @@ class ihipCtx_t { //================================================================================================= // Global variable definition: -extern std::once_flag hip_initialized; extern unsigned g_deviceCnt; extern hsa_agent_t g_cpu_agent; // the CPU agent. extern hsa_agent_t* g_allAgents; // CPU agents + all the visible GPU agents.