consolidate thread local storage (#915)

* all thread local access now through single struct

* clean up old commented-out code, more use of GET_TLS()

* fewer calls to GET_TLS by passing tls as a funtion argument

* revert unnecessary change to printf

* fix failing tests due to TLS change

* fix merge conflicts in ihipOccupancyMaxActiveBlocksPerMultiprocessor
Этот коммит содержится в:
Jeff Daily
2019-08-05 02:51:02 -07:00
коммит произвёл Maneesh Gupta
родитель bb7cfaf91a
Коммит f337ae1edb
14 изменённых файлов: 172 добавлений и 158 удалений
+9 -7
Просмотреть файл
@@ -3007,10 +3007,12 @@ const char* hipKernelNameRef(const hipFunction_t f);
#ifdef __cplusplus
class TlsData;
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc* desc, size_t size = UINT_MAX);
hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t size, textureReference* tex);
@@ -3032,7 +3034,7 @@ hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
return ihipBindTextureImpl(dim, readMode, offset, devPtr, &desc, size, &tex);
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex);
}
/*
@@ -3052,7 +3054,7 @@ hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
size_t size = UINT_MAX) {
return ihipBindTextureImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
}
// C API
@@ -3082,27 +3084,27 @@ hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& te
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
const hipChannelFormatDesc* desc);
hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode,
hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode,
hipArray_const_t array,
const struct hipChannelFormatDesc& desc,
textureReference* tex);
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
return ihipBindTextureToArrayImpl(dim, readMode, array, tex.channelDesc, &tex);
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex);
}
template <class T, int dim, enum hipTextureReadMode readMode>
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array,
const struct hipChannelFormatDesc& desc) {
return ihipBindTextureToArrayImpl(dim, readMode, array, desc, &tex);
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex);
}
template <class T, int dim, enum hipTextureReadMode readMode>
inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
hipArray_const_t array,
const struct hipChannelFormatDesc* desc) {
return ihipBindTextureToArrayImpl(dim, readMode, array, *desc, tex);
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex);
}
// C API
+2 -2
Просмотреть файл
@@ -47,8 +47,8 @@ namespace hip_impl
return *static_cast<hsa_agent_t*>(
stream->locked_getAv()->get_hsa_agent());
}
else if (
ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
GET_TLS();
if (ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
return ihipGetDevice(
ihipGetTlsDefaultCtx()->getDevice()->_deviceId)->_hsaAgent;
}
+1
Просмотреть файл
@@ -177,6 +177,7 @@ hipError_t hipConfigureCall(
size_t sharedMem,
hipStream_t stream)
{
GET_TLS();
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
+18 -21
Просмотреть файл
@@ -29,13 +29,10 @@ THE SOFTWARE.
#include "hip_hcc_internal.h"
#include "trace_helper.h"
// Stack of contexts
thread_local std::stack<ihipCtx_t*> tls_ctxStack;
thread_local bool tls_getPrimaryCtx = true;
void ihipCtxStackUpdate() {
if (tls_ctxStack.empty()) {
tls_ctxStack.push(ihipGetTlsDefaultCtx());
GET_TLS();
if (tls->ctxStack.empty()) {
tls->ctxStack.push(ihipGetTlsDefaultCtx());
}
}
@@ -62,8 +59,8 @@ hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
auto ictx = new ihipCtx_t(deviceHandle, g_deviceCnt, flags);
*ctx = ictx;
ihipSetTlsDefaultCtx(*ctx);
tls_ctxStack.push(*ctx);
tls_getPrimaryCtx = false;
tls->ctxStack.push(*ctx);
tls->getPrimaryCtx = false;
deviceCrit->addContext(ictx);
}
@@ -119,7 +116,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) {
} else {
if (currentCtx == ctx) {
// need to destroy the ctx associated with calling thread
tls_ctxStack.pop();
tls->ctxStack.pop();
}
{
auto deviceHandle = ctx->getWriteableDevice();
@@ -140,12 +137,12 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
auto deviceHandle = currentCtx->getDevice();
*ctx = currentCtx;
if (!tls_ctxStack.empty()) {
tls_ctxStack.pop();
if (!tls->ctxStack.empty()) {
tls->ctxStack.pop();
}
if (!tls_ctxStack.empty()) {
currentCtx = tls_ctxStack.top();
if (!tls->ctxStack.empty()) {
currentCtx = tls->ctxStack.top();
} else {
currentCtx = deviceHandle->_primaryCtx;
}
@@ -159,8 +156,8 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
hipError_t e = hipSuccess;
if (ctx != NULL) { // TODO- is this check needed?
ihipSetTlsDefaultCtx(ctx);
tls_ctxStack.push(ctx);
tls_getPrimaryCtx = false;
tls->ctxStack.push(ctx);
tls->getPrimaryCtx = false;
} else {
e = hipErrorInvalidContext;
}
@@ -170,10 +167,10 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
HIP_INIT_API(hipCtxGetCurrent, ctx);
hipError_t e = hipSuccess;
if ((tls_getPrimaryCtx) || tls_ctxStack.empty()) {
if ((tls->getPrimaryCtx) || tls->ctxStack.empty()) {
*ctx = ihipGetTlsDefaultCtx();
} else {
*ctx = tls_ctxStack.top();
*ctx = tls->ctxStack.top();
}
return ihipLogStatus(e);
}
@@ -182,11 +179,11 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
HIP_INIT_API(hipCtxSetCurrent, ctx);
hipError_t e = hipSuccess;
if (ctx == NULL) {
tls_ctxStack.pop();
tls->ctxStack.pop();
} else {
ihipSetTlsDefaultCtx(ctx);
tls_ctxStack.push(ctx);
tls_getPrimaryCtx = false;
tls->ctxStack.push(ctx);
tls->getPrimaryCtx = false;
}
return ihipLogStatus(e);
}
@@ -251,7 +248,7 @@ hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
hipError_t hipCtxSynchronize(void) {
HIP_INIT_API(hipCtxSynchronize, 1);
return ihipLogStatus(ihipSynchronize()); // TODP Shall check validity of ctx?
return ihipLogStatus(ihipSynchronize(tls)); // TODO Shall check validity of ctx?
}
hipError_t hipCtxGetFlags(unsigned int* flags) {
+3 -3
Просмотреть файл
@@ -136,14 +136,14 @@ hipError_t hipSetDevice(int deviceId) {
return ihipLogStatus(hipErrorInvalidDevice);
} else {
ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(deviceId));
tls_getPrimaryCtx = true;
tls->getPrimaryCtx = true;
return ihipLogStatus(hipSuccess);
}
}
hipError_t hipDeviceSynchronize(void) {
HIP_INIT_SPECIAL_API(hipDeviceSynchronize, TRACE_SYNC);
return ihipLogStatus(ihipSynchronize());
return ihipLogStatus(ihipSynchronize(tls));
}
hipError_t hipDeviceReset(void) {
@@ -171,7 +171,7 @@ hipError_t hipDeviceReset(void) {
}
hipError_t ihipDeviceSetState(void) {
hipError_t ihipDeviceSetState(TlsData *tls) {
hipError_t e = hipErrorInvalidContext;
auto* ctx = ihipGetTlsDefaultCtx();
+3 -3
Просмотреть файл
@@ -33,8 +33,8 @@ hipError_t hipGetLastError() {
HIP_INIT_API(hipGetLastError);
// Return last error, but then reset the state:
hipError_t e = ihipLogStatus(tls_lastHipError);
tls_lastHipError = hipSuccess;
hipError_t e = ihipLogStatus(tls->lastHipError);
tls->lastHipError = hipSuccess;
return e;
}
@@ -42,7 +42,7 @@ hipError_t hipPeekAtLastError() {
HIP_INIT_API(hipPeekAtLastError);
// peek at last error, but don't reset it.
return ihipLogStatus(tls_lastHipError);
return ihipLogStatus(tls->lastHipError);
}
const char* hipGetErrorName(hipError_t hip_error) {
+17 -40
Просмотреть файл
@@ -139,24 +139,12 @@ std::atomic<int> g_lastShortTid(1);
std::vector<ProfTrigger> g_dbStartTriggers;
std::vector<ProfTrigger> g_dbStopTriggers;
//=================================================================================================
// Thread-local storage:
//=================================================================================================
// This is the implicit context used by all HIP commands.
// It can be set by hipSetDevice or by the CTX manipulation commands:
thread_local hipError_t tls_lastHipError = hipSuccess;
thread_local TidInfo tls_tidInfo;
//=================================================================================================
// Top-level "free" functions:
//=================================================================================================
uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr) {
auto apiSeqNum = tls_tidInfo.apiSeqNum();
auto tid = tls_tidInfo.tid();
uint64_t recordApiTrace(TlsData *tls, std::string* fullStr, const std::string& apiStr) {
auto apiSeqNum = tls->tidInfo.apiSeqNum();
auto tid = tls->tidInfo.tid();
if ((tid < g_dbStartTriggers.size()) && (apiSeqNum >= g_dbStartTriggers[tid].nextTrigger())) {
printf("info: resume profiling at %lu\n", apiSeqNum);
@@ -179,7 +167,7 @@ uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr) {
if (COMPILE_HIP_DB && HIP_TRACE_API) {
fprintf(stderr, "%s<<hip-api pid:%d tid:%s @%lu%s\n", API_COLOR, tls_tidInfo.pid(), fullStr->c_str(), apiStartTick,
fprintf(stderr, "%s<<hip-api pid:%d tid:%s @%lu%s\n", API_COLOR, tls->tidInfo.pid(), fullStr->c_str(), apiStartTick,
API_COLOR_END);
}
@@ -206,32 +194,18 @@ ihipCtx_t* ihipGetPrimaryCtx(unsigned deviceIndex) {
return device ? device->getPrimaryCtx() : NULL;
};
static thread_local ihipCtx_t* tls_defaultCtx = nullptr;
void ihipSetTlsDefaultCtx(ihipCtx_t* ctx) { tls_defaultCtx = ctx; }
//---
// TODO - review the context creation strategy here. Really should be:
// - first "non-device" runtime call creates the context for this thread. Allowed to call
// setDevice first.
// - hipDeviceReset destroys the primary context for device?
// - Then context is created again for next usage.
ihipCtx_t* ihipGetTlsDefaultCtx() {
// Per-thread initialization of the TLS:
if ((tls_defaultCtx == nullptr) && (g_deviceCnt > 0)) {
ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(0));
}
return tls_defaultCtx;
}
hipError_t ihipSynchronize(void) {
hipError_t ihipSynchronize(TlsData *tls) {
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
// for all activity to finish.
return (hipSuccess);
}
TlsData* tls_get_ptr() {
static thread_local TlsData data;
return &data;
}
//=================================================================================================
// ihipStream_t:
//=================================================================================================
@@ -1482,7 +1456,7 @@ hipError_t hip_init() {
}
}
hipError_t ihipStreamSynchronize(hipStream_t stream) {
hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream) {
hipError_t e = hipSuccess;
if (stream == hipStreamNull) {
@@ -1503,7 +1477,8 @@ void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) {
// Synchronize stream
tprintf(DB_SYNC, "ihipStreamCallbackHandler wait on stream %s\n",
ToString(cb->_stream).c_str());
e = ihipStreamSynchronize(cb->_stream);
GET_TLS();
e = ihipStreamSynchronize(tls, cb->_stream);
// Call registered callback function
cb->_callback(cb->_stream, e, cb->_userData);
@@ -1518,6 +1493,7 @@ void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) {
hipStream_t ihipSyncAndResolveStream(hipStream_t stream, bool lockAcquired) {
if (stream == hipStreamNull) {
// Submitting to NULL stream, call locked_syncDefaultStream to wait for all other streams:
GET_TLS();
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
tprintf(DB_SYNC, "ihipSyncAndResolveStream %s wait on default stream\n",
ToString(stream).c_str());
@@ -1581,15 +1557,16 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp,
const hipStream_t stream) {
if ((HIP_TRACE_API & (1 << TRACE_KCMD)) || HIP_PROFILE_API ||
(COMPILE_HIP_DB & HIP_TRACE_API)) {
GET_TLS();
std::stringstream os;
os << tls_tidInfo.pid() << " " << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum() << " hipLaunchKernel '"
os << tls->tidInfo.pid() << " " << tls->tidInfo.tid() << "." << tls->tidInfo.apiSeqNum() << " hipLaunchKernel '"
<< kernelName << "'"
<< " gridDim:" << lp->grid_dim << " groupDim:" << lp->group_dim << " sharedMem:+"
<< lp->dynamic_group_mem_bytes << " " << *stream;
if (COMPILE_HIP_DB && HIP_TRACE_API) {
std::string fullStr;
recordApiTrace(&fullStr, os.str());
recordApiTrace(tls, &fullStr, os.str());
}
if (HIP_PROFILE_API == 0x1) {
+64 -32
Просмотреть файл
@@ -130,10 +130,26 @@ struct ProfTrigger {
//---
// Extern tls
extern thread_local hipError_t tls_lastHipError;
extern thread_local TidInfo tls_tidInfo;
extern thread_local bool tls_getPrimaryCtx;
// Extern TLS
// Use a single struct to hold all TLS data. Attempt to reduce TLS accesses.
struct TlsData {
explicit TlsData() {
lastHipError = hipSuccess;
getPrimaryCtx = true;
defaultCtx = nullptr;
}
hipError_t lastHipError;
TidInfo tidInfo;
// This is the implicit context used by all HIP commands.
// It can be set by hipSetDevice or by the CTX manipulation commands:
ihipCtx_t* defaultCtx;
// Stack of contexts
std::stack<ihipCtx_t*> ctxStack;
bool getPrimaryCtx;
};
TlsData* tls_get_ptr();
#define GET_TLS() TlsData *tls = tls_get_ptr()
extern std::vector<ProfTrigger> g_dbStartTriggers;
extern std::vector<ProfTrigger> g_dbStopTriggers;
@@ -194,7 +210,7 @@ extern const char* API_COLOR_END;
// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function
// boundary.
// TODO - currently we print the trace message at the beginning. if we waited, we could also
// tls_tidInfo return codes, and any values returned through ptr-to-args (ie the pointers allocated
// tls->tidInfo return codes, and any values returned through ptr-to-args (ie the pointers allocated
// by hipMalloc).
#if COMPILE_HIP_ATP_MARKER
#include "CXLActivityLogger.h"
@@ -251,10 +267,11 @@ static const DbName dbName[] = {
#define tprintf(trace_level, ...) \
{ \
if (HIP_DB & (1 << (trace_level))) { \
GET_TLS(); \
char msgStr[1000]; \
snprintf(msgStr, sizeof(msgStr), __VA_ARGS__); \
fprintf(stderr, " %ship-%s pid:%d tid:%d:%s%s", dbName[trace_level]._color, \
dbName[trace_level]._shortName, tls_tidInfo.pid(), tls_tidInfo.tid(), msgStr, KNRM); \
dbName[trace_level]._shortName, tls->tidInfo.pid(), tls->tidInfo.tid(), msgStr, KNRM); \
} \
}
#else
@@ -266,18 +283,19 @@ static const DbName dbName[] = {
static inline uint64_t getTicks() { return hc::get_system_ticks(); }
//---
extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
extern uint64_t recordApiTrace(TlsData *tls, std::string* fullStr, const std::string& apiStr);
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
#define API_TRACE(forceTrace, ...) \
GET_TLS(); \
uint64_t hipApiStartTick = 0; \
{ \
tls_tidInfo.incApiSeqNum(); \
tls->tidInfo.incApiSeqNum(); \
if (forceTrace || \
(HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1 << TRACE_ALL))))) { \
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')'; \
std::string fullStr; \
hipApiStartTick = recordApiTrace(&fullStr, apiStr); \
hipApiStartTick = recordApiTrace(tls, &fullStr, apiStr); \
if (HIP_PROFILE_API == 0x1) { \
MARKER_BEGIN(__func__, "HIP") \
} else if (HIP_PROFILE_API == 0x2) { \
@@ -288,10 +306,13 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
#else
// Swallow API_TRACE
#define API_TRACE(IS_CMD, ...) tls_tidInfo.incApiSeqNum();
#define API_TRACE(IS_CMD, ...) GET_TLS(); tls->tidInfo.incApiSeqNum();
#endif
#define HIP_SET_DEVICE() ihipDeviceSetState();
#define ihipGetTlsDefaultCtx() iihipGetTlsDefaultCtx(tls)
#define ihipSetTlsDefaultCtx(ctx) tls->defaultCtx = ctx
#define HIP_SET_DEVICE() ihipDeviceSetState(tls);
// This macro should be called at the beginning of every HIP API.
// It initializes the hip runtime (exactly once), and
@@ -314,22 +335,22 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
// This macro should be called at the end of every HIP API, and only at the end of top-level hip
// APIS (not internal hip) It has dual function: logs the last error returned for use by
// hipGetLastError, and also prints the closing message when the debug trace is enabled.
#define ihipLogStatus(hipStatus) \
({ \
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
tls_lastHipError = localHipStatus; \
\
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1 << TRACE_ALL)) { \
auto ticks = getTicks() - hipApiStartTick; \
fprintf(stderr, " %ship-api pid:%d tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \
(localHipStatus == 0) ? API_COLOR : KRED, tls_tidInfo.pid(), tls_tidInfo.tid(), \
tls_tidInfo.apiSeqNum(), __func__, localHipStatus, \
ihipErrorString(localHipStatus), ticks, API_COLOR_END); \
} \
if (HIP_PROFILE_API) { \
MARKER_END(); \
} \
localHipStatus; \
#define ihipLogStatus(hipStatus) \
({ \
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
tls->lastHipError = localHipStatus; \
\
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1 << TRACE_ALL)) { \
auto ticks = getTicks() - hipApiStartTick; \
fprintf(stderr, " %ship-api pid:%d tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \
(localHipStatus == 0) ? API_COLOR : KRED, tls->tidInfo.pid(), tls->tidInfo.tid(), \
tls->tidInfo.apiSeqNum(), __func__, localHipStatus, \
ihipErrorString(localHipStatus), ticks, API_COLOR_END); \
} \
if (HIP_PROFILE_API) { \
MARKER_END(); \
} \
localHipStatus; \
})
@@ -936,11 +957,9 @@ extern hsa_agent_t* g_allAgents; // CPU agents + all the visible GPU agents.
// Extern functions:
extern void ihipInit();
extern const char* ihipErrorString(hipError_t);
extern ihipCtx_t* ihipGetTlsDefaultCtx();
extern void ihipSetTlsDefaultCtx(ihipCtx_t* ctx);
extern hipError_t ihipSynchronize(void);
extern hipError_t ihipSynchronize(TlsData *tls);
extern void ihipCtxStackUpdate();
extern hipError_t ihipDeviceSetState();
extern hipError_t ihipDeviceSetState(TlsData *tls);
extern ihipDevice_t* ihipGetDevice(int);
ihipCtx_t* ihipGetPrimaryCtx(unsigned deviceIndex);
@@ -949,7 +968,7 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod,
hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0);
hipError_t ihipStreamSynchronize(hipStream_t stream);
hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream);
void ihipStreamCallbackHandler(ihipStreamCallback_t* cb);
// Stream printf functions:
@@ -1002,5 +1021,18 @@ hipError_t memcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKi
hipStream_t stream);
};
//---
// TODO - review the context creation strategy here. Really should be:
// - first "non-device" runtime call creates the context for this thread. Allowed to call
// setDevice first.
// - hipDeviceReset destroys the primary context for device?
// - Then context is created again for next usage.
static inline ihipCtx_t* iihipGetTlsDefaultCtx(TlsData* tls) {
// Per-thread initialization of the TLS:
if ((tls->defaultCtx == nullptr) && (g_deviceCnt > 0)) {
tls->defaultCtx = ihipGetPrimaryCtx(0);
}
return tls->defaultCtx;
}
#endif
+6 -6
Просмотреть файл
@@ -300,7 +300,7 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag
return ihipLogStatus(hip_status);
}
hipError_t ihipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) {
hipError_t hip_status = hipSuccess;
if (HIP_SYNC_HOST_ALLOC) {
@@ -372,7 +372,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
hip_status = ihipHostMalloc(ptr, sizeBytes, flags);
hip_status = ihipHostMalloc(tls, ptr, sizeBytes, flags);
return ihipLogStatus(hip_status);
}
@@ -383,7 +383,7 @@ hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) {
if(flags != hipMemAttachGlobal)
hip_status = hipErrorInvalidValue;
else
hip_status = ihipHostMalloc(devPtr, size, hipHostMallocDefault);
hip_status = ihipHostMalloc(tls, devPtr, size, hipHostMallocDefault);
return ihipLogStatus(hip_status);
}
@@ -397,7 +397,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
};
// width in bytes
hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) {
hipError_t ihipMallocPitch(TlsData* tls, void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) {
hipError_t hip_status = hipSuccess;
if(ptr==NULL || pitch == NULL)
{
@@ -460,7 +460,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
if (width == 0 || height == 0) return ihipLogStatus(hipErrorUnknown);
hip_status = ihipMallocPitch(ptr, pitch, width, height, 0);
hip_status = ihipMallocPitch(tls, ptr, pitch, width, height, 0);
return ihipLogStatus(hip_status);
}
@@ -475,7 +475,7 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
size_t pitch;
hip_status =
ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth);
ihipMallocPitch(tls, &pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth);
if (hip_status == hipSuccess) {
pitchedDevPtr->pitch = pitch;
pitchedDevPtr->xsize = extent.width;
+23 -21
Просмотреть файл
@@ -137,7 +137,7 @@ hipError_t hipModuleUnload(hipModule_t hmod) {
// TODO - improve this synchronization so it is thread-safe.
// Currently we want for all inflight activity to complete, but don't prevent another
// thread from launching new kernels before we finish this operation.
ihipSynchronize();
ihipSynchronize(tls);
delete hmod; // The ihipModule_t dtor will clean everything up.
hmod = nullptr;
@@ -145,7 +145,7 @@ hipError_t hipModuleUnload(hipModule_t hmod) {
return ihipLogStatus(hipSuccess);
}
hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
@@ -285,7 +285,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gr
void** kernelParams, void** extra) {
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes,
hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
return ihipLogStatus(ihipModuleLaunchKernel(tls,
f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY,
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0));
}
@@ -298,7 +298,7 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
return ihipLogStatus(ihipModuleLaunchKernel(tls,
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
}
@@ -311,14 +311,14 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
hipEvent_t startEvent, hipEvent_t stopEvent) {
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
return ihipLogStatus(ihipModuleLaunchKernel(tls,
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0));
}
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices, unsigned int flags) {
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList,numDevices,flags);
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags);
hipError_t result;
if ((numDevices > g_deviceCnt) || (launchParamsList == nullptr)) {
@@ -361,7 +361,7 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
for (int i = 0; i < numDevices; ++i) {
const hipLaunchParams& lp = launchParamsList[i];
result = ihipModuleLaunchKernel(kds[i],
result = ihipModuleLaunchKernel(tls, kds[i],
lp.gridDim.x * lp.blockDim.x,
lp.gridDim.y * lp.blockDim.y,
lp.gridDim.z * lp.blockDim.z,
@@ -395,6 +395,7 @@ namespace hip_impl {
}
hsa_agent_t this_agent() {
GET_TLS();
auto ctx = ihipGetTlsDefaultCtx();
if (!ctx) throw runtime_error{"No active HIP context."};
@@ -574,6 +575,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
namespace {
inline void track(const hip_impl::Agent_global& x, hsa_agent_t agent) {
GET_TLS();
tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name,
x.address, x.byte_cnt);
@@ -693,7 +695,7 @@ namespace hip_impl {
}
} // Namespace hip_impl.
hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const char* name,
hipError_t ihipModuleGetFunction(TlsData *tls, hipFunction_t* func, hipModule_t hmod, const char* name,
hsa_agent_t *agent = nullptr) {
using namespace hip_impl;
@@ -728,14 +730,14 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch
// Get kernel for the current hsa agent.
hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name));
return ihipLogStatus(ihipModuleGetFunction(tls, hfunc, hmod, name));
}
// Get kernel for the given hsa agent. Internal use only.
hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod,
const char* name, hsa_agent_t *agent) {
HIP_INIT_API(hipModuleGetFunctionEx, hfunc, hmod, name, agent);
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name, agent));
return ihipLogStatus(ihipModuleGetFunction(tls, hfunc, hmod, name, agent));
}
namespace {
@@ -743,7 +745,7 @@ const amd_kernel_code_v3_t *header_v3(const ihipModuleSymbol_t& kd) {
return reinterpret_cast<const amd_kernel_code_v3_t*>(kd._header);
}
hipFuncAttributes make_function_attributes(const ihipModuleSymbol_t& kd) {
hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_t& kd) {
hipFuncAttributes r{};
hipDeviceProp_t prop{};
@@ -796,12 +798,12 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
if (!kd->_header) throw runtime_error{"Ill-formed Kernel_descriptor."};
*attr = make_function_attributes(*kd);
*attr = make_function_attributes(tls, *kd);
return ihipLogStatus(hipSuccess);
}
hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
hipError_t ihipModuleLoadData(TlsData *tls, hipModule_t* module, const void* image) {
using namespace hip_impl;
if (!module) return hipErrorInvalidValue;
@@ -839,7 +841,7 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
HIP_INIT_API(hipModuleLoadData, module, image);
return ihipLogStatus(ihipModuleLoadData(module,image));
return ihipLogStatus(ihipModuleLoadData(tls,module,image));
}
hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
@@ -853,13 +855,13 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
vector<char> tmp{istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
return ihipLogStatus(ihipModuleLoadData(module, tmp.data()));
return ihipLogStatus(ihipModuleLoadData(tls, module, tmp.data()));
}
hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
hipJitOption* options, void** optionValues) {
HIP_INIT_API(hipModuleLoadDataEx, module, image, numOptions, options, optionValues);
return ihipLogStatus(ihipModuleLoadData(module, image));
return ihipLogStatus(ihipModuleLoadData(tls, module, image));
}
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) {
@@ -903,7 +905,7 @@ void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size
}
}
hipError_t ihipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize,
hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, uint32_t* blockSize,
hipFunction_t f, size_t dynSharedMemPerBlk,
uint32_t blockSizeLimit)
{
@@ -1021,12 +1023,12 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block
{
HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit);
return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(
return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(tls,
gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit));
}
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk)
TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk)
{
using namespace hip_impl;
@@ -1083,7 +1085,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk);
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
numBlocks, f, blockSize, dynSharedMemPerBlk));
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
}
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
@@ -1093,5 +1095,5 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags);
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
numBlocks, f, blockSize, dynSharedMemPerBlk));
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
}
+6 -6
Просмотреть файл
@@ -84,7 +84,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t
//---
// Disable visibility of this device into memory allocated on peer device.
// Remove this device from peer device peerlist.
hipError_t ihipDisablePeerAccess(hipCtx_t peerCtx) {
hipError_t ihipDisablePeerAccess(TlsData* tls, hipCtx_t peerCtx) {
hipError_t err = hipSuccess;
auto thisCtx = ihipGetTlsDefaultCtx();
@@ -119,7 +119,7 @@ hipError_t ihipDisablePeerAccess(hipCtx_t peerCtx) {
//---
// Allow the current device to see all memory allocated on peerCtx.
// This should add this device to the peer-device peer list.
hipError_t ihipEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
hipError_t ihipEnablePeerAccess(TlsData* tls, hipCtx_t peerCtx, unsigned int flags) {
hipError_t err = hipSuccess;
if (flags != 0) {
err = hipErrorInvalidValue;
@@ -186,14 +186,14 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDevi
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) {
HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId);
return ihipLogStatus(ihipDisablePeerAccess(ihipGetPrimaryCtx(peerDeviceId)));
return ihipLogStatus(ihipDisablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId)));
}
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) {
HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags);
return ihipLogStatus(ihipEnablePeerAccess(ihipGetPrimaryCtx(peerDeviceId), flags));
return ihipLogStatus(ihipEnablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId), flags));
}
@@ -214,11 +214,11 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
return ihipLogStatus(ihipEnablePeerAccess(peerCtx, flags));
return ihipLogStatus(ihipEnablePeerAccess(tls, peerCtx, flags));
}
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx);
return ihipLogStatus(ihipDisablePeerAccess(peerCtx));
return ihipLogStatus(ihipDisablePeerAccess(tls, peerCtx));
}
+5 -5
Просмотреть файл
@@ -48,7 +48,7 @@ enum queue_priority
#endif
//---
hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priority) {
hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flags, int priority) {
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
hipError_t e = hipSuccess;
@@ -97,7 +97,7 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit
hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
HIP_INIT_API(hipStreamCreateWithFlags, stream, flags);
if(flags == hipStreamDefault || flags == hipStreamNonBlocking)
return ihipLogStatus(ihipStreamCreate(stream, flags, priority_normal));
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority_normal));
else
return ihipLogStatus(hipErrorInvalidValue);
}
@@ -106,7 +106,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
hipError_t hipStreamCreate(hipStream_t* stream) {
HIP_INIT_API(hipStreamCreate, stream);
return ihipLogStatus(ihipStreamCreate(stream, hipStreamDefault, priority_normal));
return ihipLogStatus(ihipStreamCreate(tls, stream, hipStreamDefault, priority_normal));
}
//---
@@ -115,7 +115,7 @@ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags,
// clamp priority to range [priority_high:priority_low]
priority = (priority < priority_high ? priority_high : (priority > priority_low ? priority_low : priority));
return ihipLogStatus(ihipStreamCreate(stream, flags, priority));
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority));
}
//---
@@ -183,7 +183,7 @@ hipError_t hipStreamQuery(hipStream_t stream) {
hipError_t hipStreamSynchronize(hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipStreamSynchronize, TRACE_SYNC, stream);
return ihipLogStatus(ihipStreamSynchronize(stream));
return ihipLogStatus(ihipStreamSynchronize(tls, stream));
}
+13 -11
Просмотреть файл
@@ -381,9 +381,10 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
}
// Texture Reference APIs
hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t size, textureReference* tex) {
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
hipError_t hip_status = hipSuccess;
enum hipTextureAddressMode addressMode = tex->addressMode[0];
enum hipTextureFilterMode filterMode = tex->filterMode;
@@ -447,12 +448,12 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev
HIP_INIT_API(hipBindTexture, offset, tex, devPtr, desc, size);
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status = ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType, offset, devPtr, desc,
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, desc,
size, tex);
return ihipLogStatus(hip_status);
}
hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t width, size_t height, textureReference* tex) {
hipError_t hip_status = hipSuccess;
@@ -519,15 +520,16 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d
size_t pitch) {
HIP_INIT_API(hipBindTexture2D, offset, tex, devPtr, desc, width, height, pitch);
hipError_t hip_status = hipSuccess;
hip_status = ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType, offset, devPtr,
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, offset, devPtr,
desc, width, height, tex);
return ihipLogStatus(hip_status);
}
hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode,
hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode,
hipArray_const_t array,
const struct hipChannelFormatDesc& desc,
textureReference* tex) {
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
hipError_t hip_status = hipSuccess;
enum hipTextureAddressMode addressMode = tex->addressMode[0];
enum hipTextureFilterMode filterMode = tex->filterMode;
@@ -617,7 +619,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status =
ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array, *desc, tex);
ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array, *desc, tex);
return ihipLogStatus(hip_status);
}
@@ -629,7 +631,7 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex,
return ihipLogStatus(hip_status);
}
hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
hipError_t ihipUnbindTextureImpl(TlsData *tls, const hipTextureObject_t& textureObject) {
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
@@ -654,7 +656,7 @@ hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
hipError_t hipUnbindTexture(const textureReference* tex) {
HIP_INIT_API(hipUnbindTexture, tex);
hipError_t hip_status = hipSuccess;
hip_status = ihipUnbindTextureImpl(tex->textureObject);
hip_status = ihipUnbindTextureImpl(tls, tex->textureObject);
return ihipLogStatus(hip_status);
}
@@ -726,7 +728,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi
HIP_INIT_API(hipTexRefSetArray, tex, array, flags);
hipError_t hip_status = hipSuccess;
hip_status = ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array,
hip_status = ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array,
array->desc, tex);
return ihipLogStatus(hip_status);
}
@@ -737,7 +739,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep
HIP_INIT_API(hipTexRefSetAddress, offset, tex, devPtr, size);
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status = ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType, offset, devPtr, NULL,
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, NULL,
size, tex);
return ihipLogStatus(hip_status);
}
@@ -748,7 +750,7 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT
size_t offset;
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status = ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType, &offset, devPtr,
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, &offset, devPtr,
NULL, desc->Width, desc->Height, tex);
return ihipLogStatus(hip_status);
}
+2 -1
Просмотреть файл
@@ -71,7 +71,8 @@ namespace hip_impl
if (COMPILE_HIP_DB && HIP_TRACE_API) {
std::string fullStr;
recordApiTrace(&fullStr, os.str());
GET_TLS();
recordApiTrace(tls, &fullStr, os.str());
}
}
}