reimplement HIP_INIT as hip_impl::hip_init(), add hip_init() to some of the inlined API (#966)

* reimplement HIP_INIT as a function, expose it as hip_impl::hip_init()
so that it could be called from hipLaunchKernelGGL and other inlined
HIP functions

* Don't call hip_init from ihipPreLaunchKernel
이 커밋은 다음에 포함됨:
Siu Chi Chan
2019-03-20 01:11:15 -04:00
커밋한 사람 Maneesh Gupta
부모 b1752fc9a6
커밋 24d08beef8
6개의 변경된 파일22개의 추가작업 그리고 22개의 파일을 삭제
+2 -1
파일 보기
@@ -189,6 +189,7 @@ inline
void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream,
Args... args) {
hip_impl::hip_init();
auto kernarg = hip_impl::make_kernarg(
kernel, std::tuple<Args...>{std::move(args)...});
std::size_t kernarg_size = kernarg.size();
@@ -212,4 +213,4 @@ inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBloc
std::uint32_t groupMemBytes, hipStream_t stream, Args... args) {
hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{},
std::move(args)...);
}
}
+6 -2
파일 보기
@@ -78,6 +78,10 @@ THE SOFTWARE.
#define __dparm(x)
#endif
namespace hip_impl {
hipError_t hip_init();
} // namespace hip_impl
// Structure definitions:
#ifdef __cplusplus
extern "C" {
@@ -1396,7 +1400,7 @@ inline
__attribute__((visibility("hidden")))
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
//HIP_INIT_API(hipGetSymbolAddress, devPtr, symbolName);
hip_impl::hip_init();
size_t size = 0;
return hipModuleGetGlobal(devPtr, &size, 0, (const char*)symbolName);
}
@@ -1416,7 +1420,7 @@ inline
__attribute__((visibility("hidden")))
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
// HIP_INIT_API(hipGetSymbolSize, size, symbolName);
hip_impl::hip_init();
void* devPtr = nullptr;
return hipModuleGetGlobal(&devPtr, size, 0, (const char*)symbolName);
}
+1 -1
파일 보기
@@ -32,7 +32,7 @@ THE SOFTWARE.
extern "C" std::vector<hipModule_t>*
__hipRegisterFatBinary(const void* data)
{
HIP_INIT();
hip_impl::hip_init();
tprintf(DB_FB, "Enter __hipRegisterFatBinary(%p)\n", data);
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
+1 -1
파일 보기
@@ -43,7 +43,7 @@ void __hipDumpCodeObject(const std::string& image) {
const void* __hipExtractCodeObjectFromFatBinary(const void* data,
const char* agent_name)
{
HIP_INIT();
hip_impl::hip_init();
tprintf(DB_FB, "Enter __hipExtractCodeObjectFromFatBinary(%p, \"%s\")\n",
data, agent_name);
+10 -8
파일 보기
@@ -119,9 +119,6 @@ int HCC_OPT_FLUSH = 1;
int HCC_OPT_FLUSH = 0;
#endif
std::once_flag hip_initialized;
// Array of pointers to devices.
ihipDevice_t** g_deviceArray;
@@ -1442,6 +1439,15 @@ void ihipInit() {
g_numLogicalThreads);
}
namespace hip_impl {
hipError_t hip_init() {
static std::once_flag hip_initialized;
std::call_once(hip_initialized, ihipInit);
ihipCtxStackUpdate();
return hipSuccess;
}
}
hipError_t ihipStreamSynchronize(hipStream_t stream) {
hipError_t e = hipSuccess;
@@ -1561,7 +1567,6 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp,
// Allows runtime to track some information about the stream.
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm* lp,
const char* kernelNameStr) {
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
lp->grid_dim.x = grid.x;
lp->grid_dim.y = grid.y;
@@ -1583,7 +1588,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm* lp,
const char* kernelNameStr) {
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
lp->grid_dim.x = grid;
lp->grid_dim.y = 1;
@@ -1604,7 +1608,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm* lp,
const char* kernelNameStr) {
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
lp->grid_dim.x = grid.x;
lp->grid_dim.y = grid.y;
@@ -1625,7 +1628,6 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm* lp,
const char* kernelNameStr) {
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
lp->grid_dim.x = grid;
lp->grid_dim.y = 1;
@@ -2485,4 +2487,4 @@ namespace hip_impl {
std::terminate();
#endif
}
} // Namespace hip_impl.
} // Namespace hip_impl.
+2 -9
파일 보기
@@ -288,19 +288,13 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
#define API_TRACE(IS_CMD, ...) tls_tidInfo.incApiSeqNum();
#endif
// Just initialize the HIP runtime, but don't log any trace information.
#define HIP_INIT() \
std::call_once(hip_initialized, ihipInit); \
ihipCtxStackUpdate();
#define HIP_SET_DEVICE() ihipDeviceSetState();
// This macro should be called at the beginning of every HIP API.
// It initializes the hip runtime (exactly once), and
// generates a trace string that can be output to stderr or to ATP file.
#define HIP_INIT_API(cid, ...) \
HIP_INIT() \
hip_impl::hip_init(); \
API_TRACE(0, __VA_ARGS__); \
HIP_CB_SPAWNER_OBJECT(cid);
@@ -309,7 +303,7 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
// Replace HIP_INIT_API with this call inside HIP APIs that launch work on the GPU:
// kernel launches, copy commands, memory sets, etc.
#define HIP_INIT_SPECIAL_API(cid, tbit, ...) \
HIP_INIT() \
hip_impl::hip_init(); \
API_TRACE((HIP_TRACE_API & (1 << tbit)), __VA_ARGS__); \
HIP_CB_SPAWNER_OBJECT(cid);
@@ -933,7 +927,6 @@ class ihipCtx_t {
//=================================================================================================
// Global variable definition:
extern std::once_flag hip_initialized;
extern unsigned g_deviceCnt;
extern hsa_agent_t g_cpu_agent; // the CPU agent.
extern hsa_agent_t* g_allAgents; // CPU agents + all the visible GPU agents.