reimplement HIP_INIT as hip_impl::hip_init(), add hip_init() to some of the inlined API (#966)
* reimplement HIP_INIT as a function, expose it as hip_impl::hip_init() so that it could be called from hipLaunchKernelGGL and other inlined HIP functions * Don't call hip_init from ihipPreLaunchKernel
이 커밋은 다음에 포함됨:
@@ -189,6 +189,7 @@ inline
|
||||
void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream,
|
||||
Args... args) {
|
||||
hip_impl::hip_init();
|
||||
auto kernarg = hip_impl::make_kernarg(
|
||||
kernel, std::tuple<Args...>{std::move(args)...});
|
||||
std::size_t kernarg_size = kernarg.size();
|
||||
@@ -212,4 +213,4 @@ inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBloc
|
||||
std::uint32_t groupMemBytes, hipStream_t stream, Args... args) {
|
||||
hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{},
|
||||
std::move(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,6 +78,10 @@ THE SOFTWARE.
|
||||
#define __dparm(x)
|
||||
#endif
|
||||
|
||||
namespace hip_impl {
|
||||
hipError_t hip_init();
|
||||
} // namespace hip_impl
|
||||
|
||||
// Structure definitions:
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -1396,7 +1400,7 @@ inline
|
||||
__attribute__((visibility("hidden")))
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
|
||||
//HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName);
|
||||
|
||||
hip_impl::hip_init();
|
||||
size_t size = 0;
|
||||
return hipModuleGetGlobal(devPtr, &size, 0, (const char*)symbolName);
|
||||
}
|
||||
@@ -1416,7 +1420,7 @@ inline
|
||||
__attribute__((visibility("hidden")))
|
||||
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
|
||||
// HIP_INIT_API(hipGetSymbolSize, size, symbolName);
|
||||
|
||||
hip_impl::hip_init();
|
||||
void* devPtr = nullptr;
|
||||
return hipModuleGetGlobal(&devPtr, size, 0, (const char*)symbolName);
|
||||
}
|
||||
|
||||
+1
-1
@@ -32,7 +32,7 @@ THE SOFTWARE.
|
||||
extern "C" std::vector<hipModule_t>*
|
||||
__hipRegisterFatBinary(const void* data)
|
||||
{
|
||||
HIP_INIT();
|
||||
hip_impl::hip_init();
|
||||
|
||||
tprintf(DB_FB, "Enter __hipRegisterFatBinary(%p)\n", data);
|
||||
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
|
||||
|
||||
+1
-1
@@ -43,7 +43,7 @@ void __hipDumpCodeObject(const std::string& image) {
|
||||
const void* __hipExtractCodeObjectFromFatBinary(const void* data,
|
||||
const char* agent_name)
|
||||
{
|
||||
HIP_INIT();
|
||||
hip_impl::hip_init();
|
||||
|
||||
tprintf(DB_FB, "Enter __hipExtractCodeObjectFromFatBinary(%p, \"%s\")\n",
|
||||
data, agent_name);
|
||||
|
||||
+10
-8
@@ -119,9 +119,6 @@ int HCC_OPT_FLUSH = 1;
|
||||
int HCC_OPT_FLUSH = 0;
|
||||
#endif
|
||||
|
||||
|
||||
std::once_flag hip_initialized;
|
||||
|
||||
// Array of pointers to devices.
|
||||
ihipDevice_t** g_deviceArray;
|
||||
|
||||
@@ -1442,6 +1439,15 @@ void ihipInit() {
|
||||
g_numLogicalThreads);
|
||||
}
|
||||
|
||||
namespace hip_impl {
|
||||
hipError_t hip_init() {
|
||||
static std::once_flag hip_initialized;
|
||||
std::call_once(hip_initialized, ihipInit);
|
||||
ihipCtxStackUpdate();
|
||||
return hipSuccess;
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t ihipStreamSynchronize(hipStream_t stream) {
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
@@ -1561,7 +1567,6 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp,
|
||||
// Allows runtime to track some information about the stream.
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm* lp,
|
||||
const char* kernelNameStr) {
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
lp->grid_dim.x = grid.x;
|
||||
lp->grid_dim.y = grid.y;
|
||||
@@ -1583,7 +1588,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_
|
||||
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm* lp,
|
||||
const char* kernelNameStr) {
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
lp->grid_dim.x = grid;
|
||||
lp->grid_dim.y = 1;
|
||||
@@ -1604,7 +1608,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri
|
||||
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm* lp,
|
||||
const char* kernelNameStr) {
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
lp->grid_dim.x = grid.x;
|
||||
lp->grid_dim.y = grid.y;
|
||||
@@ -1625,7 +1628,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri
|
||||
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm* lp,
|
||||
const char* kernelNameStr) {
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
lp->grid_dim.x = grid;
|
||||
lp->grid_dim.y = 1;
|
||||
@@ -2485,4 +2487,4 @@ namespace hip_impl {
|
||||
std::terminate();
|
||||
#endif
|
||||
}
|
||||
} // Namespace hip_impl.
|
||||
} // Namespace hip_impl.
|
||||
|
||||
+2
-9
@@ -288,19 +288,13 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
|
||||
#define API_TRACE(IS_CMD, ...) tls_tidInfo.incApiSeqNum();
|
||||
#endif
|
||||
|
||||
|
||||
// Just initialize the HIP runtime, but don't log any trace information.
|
||||
#define HIP_INIT() \
|
||||
std::call_once(hip_initialized, ihipInit); \
|
||||
ihipCtxStackUpdate();
|
||||
#define HIP_SET_DEVICE() ihipDeviceSetState();
|
||||
|
||||
|
||||
// This macro should be called at the beginning of every HIP API.
|
||||
// It initializes the hip runtime (exactly once), and
|
||||
// generates a trace string that can be output to stderr or to ATP file.
|
||||
#define HIP_INIT_API(cid, ...) \
|
||||
HIP_INIT() \
|
||||
hip_impl::hip_init(); \
|
||||
API_TRACE(0, __VA_ARGS__); \
|
||||
HIP_CB_SPAWNER_OBJECT(cid);
|
||||
|
||||
@@ -309,7 +303,7 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
|
||||
// Replace HIP_INIT_API with this call inside HIP APIs that launch work on the GPU:
|
||||
// kernel launches, copy commands, memory sets, etc.
|
||||
#define HIP_INIT_SPECIAL_API(cid, tbit, ...) \
|
||||
HIP_INIT() \
|
||||
hip_impl::hip_init(); \
|
||||
API_TRACE((HIP_TRACE_API & (1 << tbit)), __VA_ARGS__); \
|
||||
HIP_CB_SPAWNER_OBJECT(cid);
|
||||
|
||||
@@ -933,7 +927,6 @@ class ihipCtx_t {
|
||||
|
||||
//=================================================================================================
|
||||
// Global variable definition:
|
||||
extern std::once_flag hip_initialized;
|
||||
extern unsigned g_deviceCnt;
|
||||
extern hsa_agent_t g_cpu_agent; // the CPU agent.
|
||||
extern hsa_agent_t* g_allAgents; // CPU agents + all the visible GPU agents.
|
||||
|
||||
새 이슈에서 참조
사용자 차단