diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 7e887d1172..755d15d8aa 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -3007,10 +3007,12 @@ const char* hipKernelNameRef(const hipFunction_t f); #ifdef __cplusplus +class TlsData; + hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, const hipChannelFormatDesc* desc, size_t size = UINT_MAX); -hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, +hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t size, textureReference* tex); @@ -3032,7 +3034,7 @@ hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { - return ihipBindTextureImpl(dim, readMode, offset, devPtr, &desc, size, &tex); + return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex); } /* @@ -3052,7 +3054,7 @@ hipError_t hipBindTexture(size_t* offset, struct texture& tex, template hipError_t hipBindTexture(size_t* offset, struct texture& tex, const void* devPtr, size_t size = UINT_MAX) { - return ihipBindTextureImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex); + return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex); } // C API @@ -3082,27 +3084,27 @@ hipError_t hipBindTexture2D(size_t* offset, struct texture& te hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, const hipChannelFormatDesc* desc); -hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode, +hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex); template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array) { - return ihipBindTextureToArrayImpl(dim, readMode, array, tex.channelDesc, &tex); + return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex); } template hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array, const struct hipChannelFormatDesc& desc) { - return ihipBindTextureToArrayImpl(dim, readMode, array, desc, &tex); + return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex); } template inline static hipError_t hipBindTextureToArray(struct texture *tex, hipArray_const_t array, const struct hipChannelFormatDesc* desc) { - return ihipBindTextureToArrayImpl(dim, readMode, array, *desc, tex); + return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex); } // C API diff --git a/hipamd/src/functional_grid_launch.inl b/hipamd/src/functional_grid_launch.inl index 13679dcab3..3c611ed468 100644 --- a/hipamd/src/functional_grid_launch.inl +++ b/hipamd/src/functional_grid_launch.inl @@ -47,8 +47,8 @@ namespace hip_impl return *static_cast( stream->locked_getAv()->get_hsa_agent()); } - else if ( - ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) { + GET_TLS(); + if (ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) { return ihipGetDevice( ihipGetTlsDefaultCtx()->getDevice()->_deviceId)->_hsaAgent; } diff --git a/hipamd/src/hip_clang.cpp b/hipamd/src/hip_clang.cpp index b8beb05400..85dfb09f62 100644 --- a/hipamd/src/hip_clang.cpp +++ b/hipamd/src/hip_clang.cpp @@ -177,6 +177,7 @@ hipError_t hipConfigureCall( size_t sharedMem, hipStream_t stream) { + GET_TLS(); auto ctx = ihipGetTlsDefaultCtx(); LockedAccessor_CtxCrit_t crit(ctx->criticalData()); diff --git a/hipamd/src/hip_context.cpp b/hipamd/src/hip_context.cpp index e85c406cfc..300877f8b5 100644 --- a/hipamd/src/hip_context.cpp +++ b/hipamd/src/hip_context.cpp @@ -29,13 +29,10 @@ THE SOFTWARE. #include "hip_hcc_internal.h" #include "trace_helper.h" -// Stack of contexts -thread_local std::stack tls_ctxStack; -thread_local bool tls_getPrimaryCtx = true; - void ihipCtxStackUpdate() { - if (tls_ctxStack.empty()) { - tls_ctxStack.push(ihipGetTlsDefaultCtx()); + GET_TLS(); + if (tls->ctxStack.empty()) { + tls->ctxStack.push(ihipGetTlsDefaultCtx()); } } @@ -62,8 +59,8 @@ hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) { auto ictx = new ihipCtx_t(deviceHandle, g_deviceCnt, flags); *ctx = ictx; ihipSetTlsDefaultCtx(*ctx); - tls_ctxStack.push(*ctx); - tls_getPrimaryCtx = false; + tls->ctxStack.push(*ctx); + tls->getPrimaryCtx = false; deviceCrit->addContext(ictx); } @@ -119,7 +116,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) { } else { if (currentCtx == ctx) { // need to destroy the ctx associated with calling thread - tls_ctxStack.pop(); + tls->ctxStack.pop(); } { auto deviceHandle = ctx->getWriteableDevice(); @@ -140,12 +137,12 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { auto deviceHandle = currentCtx->getDevice(); *ctx = currentCtx; - if (!tls_ctxStack.empty()) { - tls_ctxStack.pop(); + if (!tls->ctxStack.empty()) { + tls->ctxStack.pop(); } - if (!tls_ctxStack.empty()) { - currentCtx = tls_ctxStack.top(); + if (!tls->ctxStack.empty()) { + currentCtx = tls->ctxStack.top(); } else { currentCtx = deviceHandle->_primaryCtx; } @@ -159,8 +156,8 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { hipError_t e = hipSuccess; if (ctx != NULL) { // TODO- is this check needed? ihipSetTlsDefaultCtx(ctx); - tls_ctxStack.push(ctx); - tls_getPrimaryCtx = false; + tls->ctxStack.push(ctx); + tls->getPrimaryCtx = false; } else { e = hipErrorInvalidContext; } @@ -170,10 +167,10 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) { hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { HIP_INIT_API(hipCtxGetCurrent, ctx); hipError_t e = hipSuccess; - if ((tls_getPrimaryCtx) || tls_ctxStack.empty()) { + if ((tls->getPrimaryCtx) || tls->ctxStack.empty()) { *ctx = ihipGetTlsDefaultCtx(); } else { - *ctx = tls_ctxStack.top(); + *ctx = tls->ctxStack.top(); } return ihipLogStatus(e); } @@ -182,11 +179,11 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) { HIP_INIT_API(hipCtxSetCurrent, ctx); hipError_t e = hipSuccess; if (ctx == NULL) { - tls_ctxStack.pop(); + tls->ctxStack.pop(); } else { ihipSetTlsDefaultCtx(ctx); - tls_ctxStack.push(ctx); - tls_getPrimaryCtx = false; + tls->ctxStack.push(ctx); + tls->getPrimaryCtx = false; } return ihipLogStatus(e); } @@ -251,7 +248,7 @@ hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { hipError_t hipCtxSynchronize(void) { HIP_INIT_API(hipCtxSynchronize, 1); - return ihipLogStatus(ihipSynchronize()); // TODP Shall check validity of ctx? + return ihipLogStatus(ihipSynchronize(tls)); // TODO Shall check validity of ctx? } hipError_t hipCtxGetFlags(unsigned int* flags) { diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp index 31b3997def..2d74985b64 100644 --- a/hipamd/src/hip_device.cpp +++ b/hipamd/src/hip_device.cpp @@ -136,14 +136,14 @@ hipError_t hipSetDevice(int deviceId) { return ihipLogStatus(hipErrorInvalidDevice); } else { ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(deviceId)); - tls_getPrimaryCtx = true; + tls->getPrimaryCtx = true; return ihipLogStatus(hipSuccess); } } hipError_t hipDeviceSynchronize(void) { HIP_INIT_SPECIAL_API(hipDeviceSynchronize, TRACE_SYNC); - return ihipLogStatus(ihipSynchronize()); + return ihipLogStatus(ihipSynchronize(tls)); } hipError_t hipDeviceReset(void) { @@ -171,7 +171,7 @@ hipError_t hipDeviceReset(void) { } -hipError_t ihipDeviceSetState(void) { +hipError_t ihipDeviceSetState(TlsData *tls) { hipError_t e = hipErrorInvalidContext; auto* ctx = ihipGetTlsDefaultCtx(); diff --git a/hipamd/src/hip_error.cpp b/hipamd/src/hip_error.cpp index 6f1184d92f..0f876b4f26 100644 --- a/hipamd/src/hip_error.cpp +++ b/hipamd/src/hip_error.cpp @@ -33,8 +33,8 @@ hipError_t hipGetLastError() { HIP_INIT_API(hipGetLastError); // Return last error, but then reset the state: - hipError_t e = ihipLogStatus(tls_lastHipError); - tls_lastHipError = hipSuccess; + hipError_t e = ihipLogStatus(tls->lastHipError); + tls->lastHipError = hipSuccess; return e; } @@ -42,7 +42,7 @@ hipError_t hipPeekAtLastError() { HIP_INIT_API(hipPeekAtLastError); // peek at last error, but don't reset it. - return ihipLogStatus(tls_lastHipError); + return ihipLogStatus(tls->lastHipError); } const char* hipGetErrorName(hipError_t hip_error) { diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index ff19227b57..8c47f54cf6 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -139,24 +139,12 @@ std::atomic g_lastShortTid(1); std::vector g_dbStartTriggers; std::vector g_dbStopTriggers; -//================================================================================================= -// Thread-local storage: -//================================================================================================= - -// This is the implicit context used by all HIP commands. -// It can be set by hipSetDevice or by the CTX manipulation commands: -thread_local hipError_t tls_lastHipError = hipSuccess; - - -thread_local TidInfo tls_tidInfo; - - //================================================================================================= // Top-level "free" functions: //================================================================================================= -uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr) { - auto apiSeqNum = tls_tidInfo.apiSeqNum(); - auto tid = tls_tidInfo.tid(); +uint64_t recordApiTrace(TlsData *tls, std::string* fullStr, const std::string& apiStr) { + auto apiSeqNum = tls->tidInfo.apiSeqNum(); + auto tid = tls->tidInfo.tid(); if ((tid < g_dbStartTriggers.size()) && (apiSeqNum >= g_dbStartTriggers[tid].nextTrigger())) { printf("info: resume profiling at %lu\n", apiSeqNum); @@ -179,7 +167,7 @@ uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr) { if (COMPILE_HIP_DB && HIP_TRACE_API) { - fprintf(stderr, "%s<c_str(), apiStartTick, + fprintf(stderr, "%s<tidInfo.pid(), fullStr->c_str(), apiStartTick, API_COLOR_END); } @@ -206,32 +194,18 @@ ihipCtx_t* ihipGetPrimaryCtx(unsigned deviceIndex) { return device ? device->getPrimaryCtx() : NULL; }; - -static thread_local ihipCtx_t* tls_defaultCtx = nullptr; -void ihipSetTlsDefaultCtx(ihipCtx_t* ctx) { tls_defaultCtx = ctx; } - - -//--- -// TODO - review the context creation strategy here. Really should be: -// - first "non-device" runtime call creates the context for this thread. Allowed to call -// setDevice first. -// - hipDeviceReset destroys the primary context for device? -// - Then context is created again for next usage. -ihipCtx_t* ihipGetTlsDefaultCtx() { - // Per-thread initialization of the TLS: - if ((tls_defaultCtx == nullptr) && (g_deviceCnt > 0)) { - ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(0)); - } - return tls_defaultCtx; -} - -hipError_t ihipSynchronize(void) { +hipError_t ihipSynchronize(TlsData *tls) { ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits // for all activity to finish. return (hipSuccess); } +TlsData* tls_get_ptr() { + static thread_local TlsData data; + return &data; +} + //================================================================================================= // ihipStream_t: //================================================================================================= @@ -1482,7 +1456,7 @@ hipError_t hip_init() { } } -hipError_t ihipStreamSynchronize(hipStream_t stream) { +hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream) { hipError_t e = hipSuccess; if (stream == hipStreamNull) { @@ -1503,7 +1477,8 @@ void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) { // Synchronize stream tprintf(DB_SYNC, "ihipStreamCallbackHandler wait on stream %s\n", ToString(cb->_stream).c_str()); - e = ihipStreamSynchronize(cb->_stream); + GET_TLS(); + e = ihipStreamSynchronize(tls, cb->_stream); // Call registered callback function cb->_callback(cb->_stream, e, cb->_userData); @@ -1518,6 +1493,7 @@ void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) { hipStream_t ihipSyncAndResolveStream(hipStream_t stream, bool lockAcquired) { if (stream == hipStreamNull) { // Submitting to NULL stream, call locked_syncDefaultStream to wait for all other streams: + GET_TLS(); ihipCtx_t* ctx = ihipGetTlsDefaultCtx(); tprintf(DB_SYNC, "ihipSyncAndResolveStream %s wait on default stream\n", ToString(stream).c_str()); @@ -1581,15 +1557,16 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp, const hipStream_t stream) { if ((HIP_TRACE_API & (1 << TRACE_KCMD)) || HIP_PROFILE_API || (COMPILE_HIP_DB & HIP_TRACE_API)) { + GET_TLS(); std::stringstream os; - os << tls_tidInfo.pid() << " " << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum() << " hipLaunchKernel '" + os << tls->tidInfo.pid() << " " << tls->tidInfo.tid() << "." << tls->tidInfo.apiSeqNum() << " hipLaunchKernel '" << kernelName << "'" << " gridDim:" << lp->grid_dim << " groupDim:" << lp->group_dim << " sharedMem:+" << lp->dynamic_group_mem_bytes << " " << *stream; if (COMPILE_HIP_DB && HIP_TRACE_API) { std::string fullStr; - recordApiTrace(&fullStr, os.str()); + recordApiTrace(tls, &fullStr, os.str()); } if (HIP_PROFILE_API == 0x1) { diff --git a/hipamd/src/hip_hcc_internal.h b/hipamd/src/hip_hcc_internal.h index 7ab3fa6de1..d276aa9caa 100644 --- a/hipamd/src/hip_hcc_internal.h +++ b/hipamd/src/hip_hcc_internal.h @@ -130,10 +130,26 @@ struct ProfTrigger { //--- -// Extern tls -extern thread_local hipError_t tls_lastHipError; -extern thread_local TidInfo tls_tidInfo; -extern thread_local bool tls_getPrimaryCtx; +// Extern TLS +// Use a single struct to hold all TLS data. Attempt to reduce TLS accesses. +struct TlsData { + explicit TlsData() { + lastHipError = hipSuccess; + getPrimaryCtx = true; + defaultCtx = nullptr; + } + + hipError_t lastHipError; + TidInfo tidInfo; + // This is the implicit context used by all HIP commands. + // It can be set by hipSetDevice or by the CTX manipulation commands: + ihipCtx_t* defaultCtx; + // Stack of contexts + std::stack ctxStack; + bool getPrimaryCtx; +}; +TlsData* tls_get_ptr(); +#define GET_TLS() TlsData *tls = tls_get_ptr() extern std::vector g_dbStartTriggers; extern std::vector g_dbStopTriggers; @@ -194,7 +210,7 @@ extern const char* API_COLOR_END; // Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function // boundary. // TODO - currently we print the trace message at the beginning. if we waited, we could also -// tls_tidInfo return codes, and any values returned through ptr-to-args (ie the pointers allocated +// tls->tidInfo return codes, and any values returned through ptr-to-args (ie the pointers allocated // by hipMalloc). #if COMPILE_HIP_ATP_MARKER #include "CXLActivityLogger.h" @@ -251,10 +267,11 @@ static const DbName dbName[] = { #define tprintf(trace_level, ...) \ { \ if (HIP_DB & (1 << (trace_level))) { \ + GET_TLS(); \ char msgStr[1000]; \ snprintf(msgStr, sizeof(msgStr), __VA_ARGS__); \ fprintf(stderr, " %ship-%s pid:%d tid:%d:%s%s", dbName[trace_level]._color, \ - dbName[trace_level]._shortName, tls_tidInfo.pid(), tls_tidInfo.tid(), msgStr, KNRM); \ + dbName[trace_level]._shortName, tls->tidInfo.pid(), tls->tidInfo.tid(), msgStr, KNRM); \ } \ } #else @@ -266,18 +283,19 @@ static const DbName dbName[] = { static inline uint64_t getTicks() { return hc::get_system_ticks(); } //--- -extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr); +extern uint64_t recordApiTrace(TlsData *tls, std::string* fullStr, const std::string& apiStr); #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1) #define API_TRACE(forceTrace, ...) \ + GET_TLS(); \ uint64_t hipApiStartTick = 0; \ { \ - tls_tidInfo.incApiSeqNum(); \ + tls->tidInfo.incApiSeqNum(); \ if (forceTrace || \ (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1 << TRACE_ALL))))) { \ std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')'; \ std::string fullStr; \ - hipApiStartTick = recordApiTrace(&fullStr, apiStr); \ + hipApiStartTick = recordApiTrace(tls, &fullStr, apiStr); \ if (HIP_PROFILE_API == 0x1) { \ MARKER_BEGIN(__func__, "HIP") \ } else if (HIP_PROFILE_API == 0x2) { \ @@ -288,10 +306,13 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr); #else // Swallow API_TRACE -#define API_TRACE(IS_CMD, ...) tls_tidInfo.incApiSeqNum(); +#define API_TRACE(IS_CMD, ...) GET_TLS(); tls->tidInfo.incApiSeqNum(); #endif -#define HIP_SET_DEVICE() ihipDeviceSetState(); +#define ihipGetTlsDefaultCtx() iihipGetTlsDefaultCtx(tls) +#define ihipSetTlsDefaultCtx(ctx) tls->defaultCtx = ctx + +#define HIP_SET_DEVICE() ihipDeviceSetState(tls); // This macro should be called at the beginning of every HIP API. // It initializes the hip runtime (exactly once), and @@ -314,22 +335,22 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr); // This macro should be called at the end of every HIP API, and only at the end of top-level hip // APIS (not internal hip) It has dual function: logs the last error returned for use by // hipGetLastError, and also prints the closing message when the debug trace is enabled. -#define ihipLogStatus(hipStatus) \ - ({ \ - hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \ - tls_lastHipError = localHipStatus; \ - \ - if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1 << TRACE_ALL)) { \ - auto ticks = getTicks() - hipApiStartTick; \ - fprintf(stderr, " %ship-api pid:%d tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \ - (localHipStatus == 0) ? API_COLOR : KRED, tls_tidInfo.pid(), tls_tidInfo.tid(), \ - tls_tidInfo.apiSeqNum(), __func__, localHipStatus, \ - ihipErrorString(localHipStatus), ticks, API_COLOR_END); \ - } \ - if (HIP_PROFILE_API) { \ - MARKER_END(); \ - } \ - localHipStatus; \ +#define ihipLogStatus(hipStatus) \ + ({ \ + hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \ + tls->lastHipError = localHipStatus; \ + \ + if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1 << TRACE_ALL)) { \ + auto ticks = getTicks() - hipApiStartTick; \ + fprintf(stderr, " %ship-api pid:%d tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \ + (localHipStatus == 0) ? API_COLOR : KRED, tls->tidInfo.pid(), tls->tidInfo.tid(), \ + tls->tidInfo.apiSeqNum(), __func__, localHipStatus, \ + ihipErrorString(localHipStatus), ticks, API_COLOR_END); \ + } \ + if (HIP_PROFILE_API) { \ + MARKER_END(); \ + } \ + localHipStatus; \ }) @@ -936,11 +957,9 @@ extern hsa_agent_t* g_allAgents; // CPU agents + all the visible GPU agents. // Extern functions: extern void ihipInit(); extern const char* ihipErrorString(hipError_t); -extern ihipCtx_t* ihipGetTlsDefaultCtx(); -extern void ihipSetTlsDefaultCtx(ihipCtx_t* ctx); -extern hipError_t ihipSynchronize(void); +extern hipError_t ihipSynchronize(TlsData *tls); extern void ihipCtxStackUpdate(); -extern hipError_t ihipDeviceSetState(); +extern hipError_t ihipDeviceSetState(TlsData *tls); extern ihipDevice_t* ihipGetDevice(int); ihipCtx_t* ihipGetPrimaryCtx(unsigned deviceIndex); @@ -949,7 +968,7 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0); -hipError_t ihipStreamSynchronize(hipStream_t stream); +hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream); void ihipStreamCallbackHandler(ihipStreamCallback_t* cb); // Stream printf functions: @@ -1002,5 +1021,18 @@ hipError_t memcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKi hipStream_t stream); }; +//--- +// TODO - review the context creation strategy here. Really should be: +// - first "non-device" runtime call creates the context for this thread. Allowed to call +// setDevice first. +// - hipDeviceReset destroys the primary context for device? +// - Then context is created again for next usage. +static inline ihipCtx_t* iihipGetTlsDefaultCtx(TlsData* tls) { + // Per-thread initialization of the TLS: + if ((tls->defaultCtx == nullptr) && (g_deviceCnt > 0)) { + tls->defaultCtx = ihipGetPrimaryCtx(0); + } + return tls->defaultCtx; +} #endif diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 99ea4b5b24..61f34db244 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -300,7 +300,7 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag return ihipLogStatus(hip_status); } -hipError_t ihipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { +hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hip_status = hipSuccess; if (HIP_SYNC_HOST_ALLOC) { @@ -372,7 +372,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags); HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; - hip_status = ihipHostMalloc(ptr, sizeBytes, flags); + hip_status = ihipHostMalloc(tls, ptr, sizeBytes, flags); return ihipLogStatus(hip_status); } @@ -383,7 +383,7 @@ hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) { if(flags != hipMemAttachGlobal) hip_status = hipErrorInvalidValue; else - hip_status = ihipHostMalloc(devPtr, size, hipHostMallocDefault); + hip_status = ihipHostMalloc(tls, devPtr, size, hipHostMallocDefault); return ihipLogStatus(hip_status); } @@ -397,7 +397,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { }; // width in bytes -hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) { +hipError_t ihipMallocPitch(TlsData* tls, void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) { hipError_t hip_status = hipSuccess; if(ptr==NULL || pitch == NULL) { @@ -460,7 +460,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height if (width == 0 || height == 0) return ihipLogStatus(hipErrorUnknown); - hip_status = ihipMallocPitch(ptr, pitch, width, height, 0); + hip_status = ihipMallocPitch(tls, ptr, pitch, width, height, 0); return ihipLogStatus(hip_status); } @@ -475,7 +475,7 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { size_t pitch; hip_status = - ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth); + ihipMallocPitch(tls, &pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth); if (hip_status == hipSuccess) { pitchedDevPtr->pitch = pitch; pitchedDevPtr->xsize = extent.width; diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index 496356ef6e..04cdedbb75 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -137,7 +137,7 @@ hipError_t hipModuleUnload(hipModule_t hmod) { // TODO - improve this synchronization so it is thread-safe. // Currently we want for all inflight activity to complete, but don't prevent another // thread from launching new kernels before we finish this operation. - ihipSynchronize(); + ihipSynchronize(tls); delete hmod; // The ihipModule_t dtor will clean everything up. hmod = nullptr; @@ -145,7 +145,7 @@ hipError_t hipModuleUnload(hipModule_t hmod) { return ihipLogStatus(hipSuccess); } -hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, +hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ, size_t sharedMemBytes, @@ -285,7 +285,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gr void** kernelParams, void** extra) { HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); - return ihipLogStatus(ihipModuleLaunchKernel( + return ihipLogStatus(ihipModuleLaunchKernel(tls, f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0)); } @@ -298,7 +298,7 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) { HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra); - return ihipLogStatus(ihipModuleLaunchKernel( + return ihipLogStatus(ihipModuleLaunchKernel(tls, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags)); } @@ -311,14 +311,14 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, hipEvent_t startEvent, hipEvent_t stopEvent) { HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra); - return ihipLogStatus(ihipModuleLaunchKernel( + return ihipLogStatus(ihipModuleLaunchKernel(tls, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0)); } hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags) { - HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList,numDevices,flags); + HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags); hipError_t result; if ((numDevices > g_deviceCnt) || (launchParamsList == nullptr)) { @@ -361,7 +361,7 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; - result = ihipModuleLaunchKernel(kds[i], + result = ihipModuleLaunchKernel(tls, kds[i], lp.gridDim.x * lp.blockDim.x, lp.gridDim.y * lp.blockDim.y, lp.gridDim.z * lp.blockDim.z, @@ -395,6 +395,7 @@ namespace hip_impl { } hsa_agent_t this_agent() { + GET_TLS(); auto ctx = ihipGetTlsDefaultCtx(); if (!ctx) throw runtime_error{"No active HIP context."}; @@ -574,6 +575,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, namespace { inline void track(const hip_impl::Agent_global& x, hsa_agent_t agent) { + GET_TLS(); tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name, x.address, x.byte_cnt); @@ -693,7 +695,7 @@ namespace hip_impl { } } // Namespace hip_impl. -hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const char* name, +hipError_t ihipModuleGetFunction(TlsData *tls, hipFunction_t* func, hipModule_t hmod, const char* name, hsa_agent_t *agent = nullptr) { using namespace hip_impl; @@ -728,14 +730,14 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch // Get kernel for the current hsa agent. hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) { HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); - return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name)); + return ihipLogStatus(ihipModuleGetFunction(tls, hfunc, hmod, name)); } // Get kernel for the given hsa agent. Internal use only. hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, const char* name, hsa_agent_t *agent) { HIP_INIT_API(hipModuleGetFunctionEx, hfunc, hmod, name, agent); - return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name, agent)); + return ihipLogStatus(ihipModuleGetFunction(tls, hfunc, hmod, name, agent)); } namespace { @@ -743,7 +745,7 @@ const amd_kernel_code_v3_t *header_v3(const ihipModuleSymbol_t& kd) { return reinterpret_cast(kd._header); } -hipFuncAttributes make_function_attributes(const ihipModuleSymbol_t& kd) { +hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_t& kd) { hipFuncAttributes r{}; hipDeviceProp_t prop{}; @@ -796,12 +798,12 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) if (!kd->_header) throw runtime_error{"Ill-formed Kernel_descriptor."}; - *attr = make_function_attributes(*kd); + *attr = make_function_attributes(tls, *kd); return ihipLogStatus(hipSuccess); } -hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { +hipError_t ihipModuleLoadData(TlsData *tls, hipModule_t* module, const void* image) { using namespace hip_impl; if (!module) return hipErrorInvalidValue; @@ -839,7 +841,7 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { hipError_t hipModuleLoadData(hipModule_t* module, const void* image) { HIP_INIT_API(hipModuleLoadData, module, image); - return ihipLogStatus(ihipModuleLoadData(module,image)); + return ihipLogStatus(ihipModuleLoadData(tls,module,image)); } hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { @@ -853,13 +855,13 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { vector tmp{istreambuf_iterator{file}, istreambuf_iterator{}}; - return ihipLogStatus(ihipModuleLoadData(module, tmp.data())); + return ihipLogStatus(ihipModuleLoadData(tls, module, tmp.data())); } hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions, hipJitOption* options, void** optionValues) { HIP_INIT_API(hipModuleLoadDataEx, module, image, numOptions, options, optionValues); - return ihipLogStatus(ihipModuleLoadData(module, image)); + return ihipLogStatus(ihipModuleLoadData(tls, module, image)); } hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) { @@ -903,7 +905,7 @@ void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size } } -hipError_t ihipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, +hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, uint32_t* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) { @@ -1021,12 +1023,12 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block { HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit); - return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize( + return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(tls, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit)); } hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) + TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) { using namespace hip_impl; @@ -1083,7 +1085,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - numBlocks, f, blockSize, dynSharedMemPerBlk)); + tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( @@ -1093,5 +1095,5 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - numBlocks, f, blockSize, dynSharedMemPerBlk)); + tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } diff --git a/hipamd/src/hip_peer.cpp b/hipamd/src/hip_peer.cpp index f2fca94404..7781af1dbe 100644 --- a/hipamd/src/hip_peer.cpp +++ b/hipamd/src/hip_peer.cpp @@ -84,7 +84,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t //--- // Disable visibility of this device into memory allocated on peer device. // Remove this device from peer device peerlist. -hipError_t ihipDisablePeerAccess(hipCtx_t peerCtx) { +hipError_t ihipDisablePeerAccess(TlsData* tls, hipCtx_t peerCtx) { hipError_t err = hipSuccess; auto thisCtx = ihipGetTlsDefaultCtx(); @@ -119,7 +119,7 @@ hipError_t ihipDisablePeerAccess(hipCtx_t peerCtx) { //--- // Allow the current device to see all memory allocated on peerCtx. // This should add this device to the peer-device peer list. -hipError_t ihipEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { +hipError_t ihipEnablePeerAccess(TlsData* tls, hipCtx_t peerCtx, unsigned int flags) { hipError_t err = hipSuccess; if (flags != 0) { err = hipErrorInvalidValue; @@ -186,14 +186,14 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDevi hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) { HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId); - return ihipLogStatus(ihipDisablePeerAccess(ihipGetPrimaryCtx(peerDeviceId))); + return ihipLogStatus(ihipDisablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId))); } hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) { HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags); - return ihipLogStatus(ihipEnablePeerAccess(ihipGetPrimaryCtx(peerDeviceId), flags)); + return ihipLogStatus(ihipEnablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId), flags)); } @@ -214,11 +214,11 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags); - return ihipLogStatus(ihipEnablePeerAccess(peerCtx, flags)); + return ihipLogStatus(ihipEnablePeerAccess(tls, peerCtx, flags)); } hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx); - return ihipLogStatus(ihipDisablePeerAccess(peerCtx)); + return ihipLogStatus(ihipDisablePeerAccess(tls, peerCtx)); } diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 7812530dda..e3e4975b7e 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -48,7 +48,7 @@ enum queue_priority #endif //--- -hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priority) { +hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flags, int priority) { ihipCtx_t* ctx = ihipGetTlsDefaultCtx(); hipError_t e = hipSuccess; @@ -97,7 +97,7 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) { HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); if(flags == hipStreamDefault || flags == hipStreamNonBlocking) - return ihipLogStatus(ihipStreamCreate(stream, flags, priority_normal)); + return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority_normal)); else return ihipLogStatus(hipErrorInvalidValue); } @@ -106,7 +106,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) { hipError_t hipStreamCreate(hipStream_t* stream) { HIP_INIT_API(hipStreamCreate, stream); - return ihipLogStatus(ihipStreamCreate(stream, hipStreamDefault, priority_normal)); + return ihipLogStatus(ihipStreamCreate(tls, stream, hipStreamDefault, priority_normal)); } //--- @@ -115,7 +115,7 @@ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, // clamp priority to range [priority_high:priority_low] priority = (priority < priority_high ? priority_high : (priority > priority_low ? priority_low : priority)); - return ihipLogStatus(ihipStreamCreate(stream, flags, priority)); + return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority)); } //--- @@ -183,7 +183,7 @@ hipError_t hipStreamQuery(hipStream_t stream) { hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_SPECIAL_API(hipStreamSynchronize, TRACE_SYNC, stream); - return ihipLogStatus(ihipStreamSynchronize(stream)); + return ihipLogStatus(ihipStreamSynchronize(tls, stream)); } diff --git a/hipamd/src/hip_texture.cpp b/hipamd/src/hip_texture.cpp index e6cd352008..fe97895381 100644 --- a/hipamd/src/hip_texture.cpp +++ b/hipamd/src/hip_texture.cpp @@ -381,9 +381,10 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, } // Texture Reference APIs -hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, +hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t size, textureReference* tex) { + TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_; hipError_t hip_status = hipSuccess; enum hipTextureAddressMode addressMode = tex->addressMode[0]; enum hipTextureFilterMode filterMode = tex->filterMode; @@ -447,12 +448,12 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev HIP_INIT_API(hipBindTexture, offset, tex, devPtr, desc, size); hipError_t hip_status = hipSuccess; // TODO: hipReadModeElementType is default. - hip_status = ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType, offset, devPtr, desc, + hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, desc, size, tex); return ihipLogStatus(hip_status); } -hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset, +hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset, const void* devPtr, const struct hipChannelFormatDesc* desc, size_t width, size_t height, textureReference* tex) { hipError_t hip_status = hipSuccess; @@ -519,15 +520,16 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d size_t pitch) { HIP_INIT_API(hipBindTexture2D, offset, tex, devPtr, desc, width, height, pitch); hipError_t hip_status = hipSuccess; - hip_status = ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType, offset, devPtr, + hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, offset, devPtr, desc, width, height, tex); return ihipLogStatus(hip_status); } -hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode, +hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode, hipArray_const_t array, const struct hipChannelFormatDesc& desc, textureReference* tex) { + TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_; hipError_t hip_status = hipSuccess; enum hipTextureAddressMode addressMode = tex->addressMode[0]; enum hipTextureFilterMode filterMode = tex->filterMode; @@ -617,7 +619,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array, hipError_t hip_status = hipSuccess; // TODO: hipReadModeElementType is default. hip_status = - ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array, *desc, tex); + ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array, *desc, tex); return ihipLogStatus(hip_status); } @@ -629,7 +631,7 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex, return ihipLogStatus(hip_status); } -hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) { +hipError_t ihipUnbindTextureImpl(TlsData *tls, const hipTextureObject_t& textureObject) { hipError_t hip_status = hipSuccess; auto ctx = ihipGetTlsDefaultCtx(); @@ -654,7 +656,7 @@ hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) { hipError_t hipUnbindTexture(const textureReference* tex) { HIP_INIT_API(hipUnbindTexture, tex); hipError_t hip_status = hipSuccess; - hip_status = ihipUnbindTextureImpl(tex->textureObject); + hip_status = ihipUnbindTextureImpl(tls, tex->textureObject); return ihipLogStatus(hip_status); } @@ -726,7 +728,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi HIP_INIT_API(hipTexRefSetArray, tex, array, flags); hipError_t hip_status = hipSuccess; - hip_status = ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array, + hip_status = ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array, array->desc, tex); return ihipLogStatus(hip_status); } @@ -737,7 +739,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep HIP_INIT_API(hipTexRefSetAddress, offset, tex, devPtr, size); hipError_t hip_status = hipSuccess; // TODO: hipReadModeElementType is default. - hip_status = ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType, offset, devPtr, NULL, + hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, NULL, size, tex); return ihipLogStatus(hip_status); } @@ -748,7 +750,7 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT size_t offset; hipError_t hip_status = hipSuccess; // TODO: hipReadModeElementType is default. - hip_status = ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType, &offset, devPtr, + hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, &offset, devPtr, NULL, desc->Width, desc->Height, tex); return ihipLogStatus(hip_status); } diff --git a/hipamd/src/macro_based_grid_launch.inl b/hipamd/src/macro_based_grid_launch.inl index 1e36903c56..2e804f090b 100644 --- a/hipamd/src/macro_based_grid_launch.inl +++ b/hipamd/src/macro_based_grid_launch.inl @@ -71,7 +71,8 @@ namespace hip_impl if (COMPILE_HIP_DB && HIP_TRACE_API) { std::string fullStr; - recordApiTrace(&fullStr, os.str()); + GET_TLS(); + recordApiTrace(tls, &fullStr, os.str()); } } }