consolidate thread local storage (#915)
* all thread local access now through single struct
* clean up old commented-out code, more use of GET_TLS()
* fewer calls to GET_TLS by passing tls as a funtion argument
* revert unnecessary change to printf
* fix failing tests due to TLS change
* fix merge conflicts in ihipOccupancyMaxActiveBlocksPerMultiprocessor
[ROCm/hip commit: 1eb3dbf065]
Этот коммит содержится в:
коммит произвёл
Maneesh Gupta
родитель
3fe8568377
Коммит
9b44993343
@@ -3007,10 +3007,12 @@ const char* hipKernelNameRef(const hipFunction_t f);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
class TlsData;
|
||||
|
||||
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc* desc, size_t size = UINT_MAX);
|
||||
|
||||
hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
hipError_t ihipBindTextureImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t size, textureReference* tex);
|
||||
|
||||
@@ -3032,7 +3034,7 @@ hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
|
||||
const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) {
|
||||
return ihipBindTextureImpl(dim, readMode, offset, devPtr, &desc, size, &tex);
|
||||
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &desc, size, &tex);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3052,7 +3054,7 @@ hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex, const void* devPtr,
|
||||
size_t size = UINT_MAX) {
|
||||
return ihipBindTextureImpl(dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
|
||||
return ihipBindTextureImpl(nullptr, dim, readMode, offset, devPtr, &(tex.channelDesc), size, &tex);
|
||||
}
|
||||
|
||||
// C API
|
||||
@@ -3082,27 +3084,27 @@ hipError_t hipBindTexture2D(size_t* offset, struct texture<T, dim, readMode>& te
|
||||
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
|
||||
const hipChannelFormatDesc* desc);
|
||||
|
||||
hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode,
|
||||
hipError_t ihipBindTextureToArrayImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
textureReference* tex);
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array) {
|
||||
return ihipBindTextureToArrayImpl(dim, readMode, array, tex.channelDesc, &tex);
|
||||
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, tex.channelDesc, &tex);
|
||||
}
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex, hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc) {
|
||||
return ihipBindTextureToArrayImpl(dim, readMode, array, desc, &tex);
|
||||
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, desc, &tex);
|
||||
}
|
||||
|
||||
template <class T, int dim, enum hipTextureReadMode readMode>
|
||||
inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode> *tex,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc* desc) {
|
||||
return ihipBindTextureToArrayImpl(dim, readMode, array, *desc, tex);
|
||||
return ihipBindTextureToArrayImpl(nullptr, dim, readMode, array, *desc, tex);
|
||||
}
|
||||
|
||||
// C API
|
||||
|
||||
@@ -47,8 +47,8 @@ namespace hip_impl
|
||||
return *static_cast<hsa_agent_t*>(
|
||||
stream->locked_getAv()->get_hsa_agent());
|
||||
}
|
||||
else if (
|
||||
ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
|
||||
GET_TLS();
|
||||
if (ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
|
||||
return ihipGetDevice(
|
||||
ihipGetTlsDefaultCtx()->getDevice()->_deviceId)->_hsaAgent;
|
||||
}
|
||||
|
||||
@@ -177,6 +177,7 @@ hipError_t hipConfigureCall(
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
|
||||
|
||||
@@ -29,13 +29,10 @@ THE SOFTWARE.
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
// Stack of contexts
|
||||
thread_local std::stack<ihipCtx_t*> tls_ctxStack;
|
||||
thread_local bool tls_getPrimaryCtx = true;
|
||||
|
||||
void ihipCtxStackUpdate() {
|
||||
if (tls_ctxStack.empty()) {
|
||||
tls_ctxStack.push(ihipGetTlsDefaultCtx());
|
||||
GET_TLS();
|
||||
if (tls->ctxStack.empty()) {
|
||||
tls->ctxStack.push(ihipGetTlsDefaultCtx());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,8 +59,8 @@ hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
|
||||
auto ictx = new ihipCtx_t(deviceHandle, g_deviceCnt, flags);
|
||||
*ctx = ictx;
|
||||
ihipSetTlsDefaultCtx(*ctx);
|
||||
tls_ctxStack.push(*ctx);
|
||||
tls_getPrimaryCtx = false;
|
||||
tls->ctxStack.push(*ctx);
|
||||
tls->getPrimaryCtx = false;
|
||||
deviceCrit->addContext(ictx);
|
||||
}
|
||||
|
||||
@@ -119,7 +116,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) {
|
||||
} else {
|
||||
if (currentCtx == ctx) {
|
||||
// need to destroy the ctx associated with calling thread
|
||||
tls_ctxStack.pop();
|
||||
tls->ctxStack.pop();
|
||||
}
|
||||
{
|
||||
auto deviceHandle = ctx->getWriteableDevice();
|
||||
@@ -140,12 +137,12 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
|
||||
auto deviceHandle = currentCtx->getDevice();
|
||||
*ctx = currentCtx;
|
||||
|
||||
if (!tls_ctxStack.empty()) {
|
||||
tls_ctxStack.pop();
|
||||
if (!tls->ctxStack.empty()) {
|
||||
tls->ctxStack.pop();
|
||||
}
|
||||
|
||||
if (!tls_ctxStack.empty()) {
|
||||
currentCtx = tls_ctxStack.top();
|
||||
if (!tls->ctxStack.empty()) {
|
||||
currentCtx = tls->ctxStack.top();
|
||||
} else {
|
||||
currentCtx = deviceHandle->_primaryCtx;
|
||||
}
|
||||
@@ -159,8 +156,8 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
|
||||
hipError_t e = hipSuccess;
|
||||
if (ctx != NULL) { // TODO- is this check needed?
|
||||
ihipSetTlsDefaultCtx(ctx);
|
||||
tls_ctxStack.push(ctx);
|
||||
tls_getPrimaryCtx = false;
|
||||
tls->ctxStack.push(ctx);
|
||||
tls->getPrimaryCtx = false;
|
||||
} else {
|
||||
e = hipErrorInvalidContext;
|
||||
}
|
||||
@@ -170,10 +167,10 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
|
||||
hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
|
||||
HIP_INIT_API(hipCtxGetCurrent, ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
if ((tls_getPrimaryCtx) || tls_ctxStack.empty()) {
|
||||
if ((tls->getPrimaryCtx) || tls->ctxStack.empty()) {
|
||||
*ctx = ihipGetTlsDefaultCtx();
|
||||
} else {
|
||||
*ctx = tls_ctxStack.top();
|
||||
*ctx = tls->ctxStack.top();
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
@@ -182,11 +179,11 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
|
||||
HIP_INIT_API(hipCtxSetCurrent, ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
if (ctx == NULL) {
|
||||
tls_ctxStack.pop();
|
||||
tls->ctxStack.pop();
|
||||
} else {
|
||||
ihipSetTlsDefaultCtx(ctx);
|
||||
tls_ctxStack.push(ctx);
|
||||
tls_getPrimaryCtx = false;
|
||||
tls->ctxStack.push(ctx);
|
||||
tls->getPrimaryCtx = false;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
@@ -251,7 +248,7 @@ hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
|
||||
|
||||
hipError_t hipCtxSynchronize(void) {
|
||||
HIP_INIT_API(hipCtxSynchronize, 1);
|
||||
return ihipLogStatus(ihipSynchronize()); // TODP Shall check validity of ctx?
|
||||
return ihipLogStatus(ihipSynchronize(tls)); // TODO Shall check validity of ctx?
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetFlags(unsigned int* flags) {
|
||||
|
||||
@@ -136,14 +136,14 @@ hipError_t hipSetDevice(int deviceId) {
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
} else {
|
||||
ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(deviceId));
|
||||
tls_getPrimaryCtx = true;
|
||||
tls->getPrimaryCtx = true;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSynchronize(void) {
|
||||
HIP_INIT_SPECIAL_API(hipDeviceSynchronize, TRACE_SYNC);
|
||||
return ihipLogStatus(ihipSynchronize());
|
||||
return ihipLogStatus(ihipSynchronize(tls));
|
||||
}
|
||||
|
||||
hipError_t hipDeviceReset(void) {
|
||||
@@ -171,7 +171,7 @@ hipError_t hipDeviceReset(void) {
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipDeviceSetState(void) {
|
||||
hipError_t ihipDeviceSetState(TlsData *tls) {
|
||||
hipError_t e = hipErrorInvalidContext;
|
||||
auto* ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
|
||||
@@ -33,8 +33,8 @@ hipError_t hipGetLastError() {
|
||||
HIP_INIT_API(hipGetLastError);
|
||||
|
||||
// Return last error, but then reset the state:
|
||||
hipError_t e = ihipLogStatus(tls_lastHipError);
|
||||
tls_lastHipError = hipSuccess;
|
||||
hipError_t e = ihipLogStatus(tls->lastHipError);
|
||||
tls->lastHipError = hipSuccess;
|
||||
return e;
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ hipError_t hipPeekAtLastError() {
|
||||
HIP_INIT_API(hipPeekAtLastError);
|
||||
|
||||
// peek at last error, but don't reset it.
|
||||
return ihipLogStatus(tls_lastHipError);
|
||||
return ihipLogStatus(tls->lastHipError);
|
||||
}
|
||||
|
||||
const char* hipGetErrorName(hipError_t hip_error) {
|
||||
|
||||
@@ -139,24 +139,12 @@ std::atomic<int> g_lastShortTid(1);
|
||||
std::vector<ProfTrigger> g_dbStartTriggers;
|
||||
std::vector<ProfTrigger> g_dbStopTriggers;
|
||||
|
||||
//=================================================================================================
|
||||
// Thread-local storage:
|
||||
//=================================================================================================
|
||||
|
||||
// This is the implicit context used by all HIP commands.
|
||||
// It can be set by hipSetDevice or by the CTX manipulation commands:
|
||||
thread_local hipError_t tls_lastHipError = hipSuccess;
|
||||
|
||||
|
||||
thread_local TidInfo tls_tidInfo;
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
// Top-level "free" functions:
|
||||
//=================================================================================================
|
||||
uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr) {
|
||||
auto apiSeqNum = tls_tidInfo.apiSeqNum();
|
||||
auto tid = tls_tidInfo.tid();
|
||||
uint64_t recordApiTrace(TlsData *tls, std::string* fullStr, const std::string& apiStr) {
|
||||
auto apiSeqNum = tls->tidInfo.apiSeqNum();
|
||||
auto tid = tls->tidInfo.tid();
|
||||
|
||||
if ((tid < g_dbStartTriggers.size()) && (apiSeqNum >= g_dbStartTriggers[tid].nextTrigger())) {
|
||||
printf("info: resume profiling at %lu\n", apiSeqNum);
|
||||
@@ -179,7 +167,7 @@ uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr) {
|
||||
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
fprintf(stderr, "%s<<hip-api pid:%d tid:%s @%lu%s\n", API_COLOR, tls_tidInfo.pid(), fullStr->c_str(), apiStartTick,
|
||||
fprintf(stderr, "%s<<hip-api pid:%d tid:%s @%lu%s\n", API_COLOR, tls->tidInfo.pid(), fullStr->c_str(), apiStartTick,
|
||||
API_COLOR_END);
|
||||
}
|
||||
|
||||
@@ -206,32 +194,18 @@ ihipCtx_t* ihipGetPrimaryCtx(unsigned deviceIndex) {
|
||||
return device ? device->getPrimaryCtx() : NULL;
|
||||
};
|
||||
|
||||
|
||||
static thread_local ihipCtx_t* tls_defaultCtx = nullptr;
|
||||
void ihipSetTlsDefaultCtx(ihipCtx_t* ctx) { tls_defaultCtx = ctx; }
|
||||
|
||||
|
||||
//---
|
||||
// TODO - review the context creation strategy here. Really should be:
|
||||
// - first "non-device" runtime call creates the context for this thread. Allowed to call
|
||||
// setDevice first.
|
||||
// - hipDeviceReset destroys the primary context for device?
|
||||
// - Then context is created again for next usage.
|
||||
ihipCtx_t* ihipGetTlsDefaultCtx() {
|
||||
// Per-thread initialization of the TLS:
|
||||
if ((tls_defaultCtx == nullptr) && (g_deviceCnt > 0)) {
|
||||
ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(0));
|
||||
}
|
||||
return tls_defaultCtx;
|
||||
}
|
||||
|
||||
hipError_t ihipSynchronize(void) {
|
||||
hipError_t ihipSynchronize(TlsData *tls) {
|
||||
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits
|
||||
// for all activity to finish.
|
||||
|
||||
return (hipSuccess);
|
||||
}
|
||||
|
||||
TlsData* tls_get_ptr() {
|
||||
static thread_local TlsData data;
|
||||
return &data;
|
||||
}
|
||||
|
||||
//=================================================================================================
|
||||
// ihipStream_t:
|
||||
//=================================================================================================
|
||||
@@ -1482,7 +1456,7 @@ hipError_t hip_init() {
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t ihipStreamSynchronize(hipStream_t stream) {
|
||||
hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream) {
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if (stream == hipStreamNull) {
|
||||
@@ -1503,7 +1477,8 @@ void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) {
|
||||
// Synchronize stream
|
||||
tprintf(DB_SYNC, "ihipStreamCallbackHandler wait on stream %s\n",
|
||||
ToString(cb->_stream).c_str());
|
||||
e = ihipStreamSynchronize(cb->_stream);
|
||||
GET_TLS();
|
||||
e = ihipStreamSynchronize(tls, cb->_stream);
|
||||
|
||||
// Call registered callback function
|
||||
cb->_callback(cb->_stream, e, cb->_userData);
|
||||
@@ -1518,6 +1493,7 @@ void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) {
|
||||
hipStream_t ihipSyncAndResolveStream(hipStream_t stream, bool lockAcquired) {
|
||||
if (stream == hipStreamNull) {
|
||||
// Submitting to NULL stream, call locked_syncDefaultStream to wait for all other streams:
|
||||
GET_TLS();
|
||||
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
|
||||
tprintf(DB_SYNC, "ihipSyncAndResolveStream %s wait on default stream\n",
|
||||
ToString(stream).c_str());
|
||||
@@ -1581,15 +1557,16 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp,
|
||||
const hipStream_t stream) {
|
||||
if ((HIP_TRACE_API & (1 << TRACE_KCMD)) || HIP_PROFILE_API ||
|
||||
(COMPILE_HIP_DB & HIP_TRACE_API)) {
|
||||
GET_TLS();
|
||||
std::stringstream os;
|
||||
os << tls_tidInfo.pid() << " " << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum() << " hipLaunchKernel '"
|
||||
os << tls->tidInfo.pid() << " " << tls->tidInfo.tid() << "." << tls->tidInfo.apiSeqNum() << " hipLaunchKernel '"
|
||||
<< kernelName << "'"
|
||||
<< " gridDim:" << lp->grid_dim << " groupDim:" << lp->group_dim << " sharedMem:+"
|
||||
<< lp->dynamic_group_mem_bytes << " " << *stream;
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
std::string fullStr;
|
||||
recordApiTrace(&fullStr, os.str());
|
||||
recordApiTrace(tls, &fullStr, os.str());
|
||||
}
|
||||
|
||||
if (HIP_PROFILE_API == 0x1) {
|
||||
|
||||
@@ -130,10 +130,26 @@ struct ProfTrigger {
|
||||
|
||||
|
||||
//---
|
||||
// Extern tls
|
||||
extern thread_local hipError_t tls_lastHipError;
|
||||
extern thread_local TidInfo tls_tidInfo;
|
||||
extern thread_local bool tls_getPrimaryCtx;
|
||||
// Extern TLS
|
||||
// Use a single struct to hold all TLS data. Attempt to reduce TLS accesses.
|
||||
struct TlsData {
|
||||
explicit TlsData() {
|
||||
lastHipError = hipSuccess;
|
||||
getPrimaryCtx = true;
|
||||
defaultCtx = nullptr;
|
||||
}
|
||||
|
||||
hipError_t lastHipError;
|
||||
TidInfo tidInfo;
|
||||
// This is the implicit context used by all HIP commands.
|
||||
// It can be set by hipSetDevice or by the CTX manipulation commands:
|
||||
ihipCtx_t* defaultCtx;
|
||||
// Stack of contexts
|
||||
std::stack<ihipCtx_t*> ctxStack;
|
||||
bool getPrimaryCtx;
|
||||
};
|
||||
TlsData* tls_get_ptr();
|
||||
#define GET_TLS() TlsData *tls = tls_get_ptr()
|
||||
|
||||
extern std::vector<ProfTrigger> g_dbStartTriggers;
|
||||
extern std::vector<ProfTrigger> g_dbStopTriggers;
|
||||
@@ -194,7 +210,7 @@ extern const char* API_COLOR_END;
|
||||
// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function
|
||||
// boundary.
|
||||
// TODO - currently we print the trace message at the beginning. if we waited, we could also
|
||||
// tls_tidInfo return codes, and any values returned through ptr-to-args (ie the pointers allocated
|
||||
// tls->tidInfo return codes, and any values returned through ptr-to-args (ie the pointers allocated
|
||||
// by hipMalloc).
|
||||
#if COMPILE_HIP_ATP_MARKER
|
||||
#include "CXLActivityLogger.h"
|
||||
@@ -251,10 +267,11 @@ static const DbName dbName[] = {
|
||||
#define tprintf(trace_level, ...) \
|
||||
{ \
|
||||
if (HIP_DB & (1 << (trace_level))) { \
|
||||
GET_TLS(); \
|
||||
char msgStr[1000]; \
|
||||
snprintf(msgStr, sizeof(msgStr), __VA_ARGS__); \
|
||||
fprintf(stderr, " %ship-%s pid:%d tid:%d:%s%s", dbName[trace_level]._color, \
|
||||
dbName[trace_level]._shortName, tls_tidInfo.pid(), tls_tidInfo.tid(), msgStr, KNRM); \
|
||||
dbName[trace_level]._shortName, tls->tidInfo.pid(), tls->tidInfo.tid(), msgStr, KNRM); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
@@ -266,18 +283,19 @@ static const DbName dbName[] = {
|
||||
static inline uint64_t getTicks() { return hc::get_system_ticks(); }
|
||||
|
||||
//---
|
||||
extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
|
||||
extern uint64_t recordApiTrace(TlsData *tls, std::string* fullStr, const std::string& apiStr);
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
|
||||
#define API_TRACE(forceTrace, ...) \
|
||||
GET_TLS(); \
|
||||
uint64_t hipApiStartTick = 0; \
|
||||
{ \
|
||||
tls_tidInfo.incApiSeqNum(); \
|
||||
tls->tidInfo.incApiSeqNum(); \
|
||||
if (forceTrace || \
|
||||
(HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1 << TRACE_ALL))))) { \
|
||||
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')'; \
|
||||
std::string fullStr; \
|
||||
hipApiStartTick = recordApiTrace(&fullStr, apiStr); \
|
||||
hipApiStartTick = recordApiTrace(tls, &fullStr, apiStr); \
|
||||
if (HIP_PROFILE_API == 0x1) { \
|
||||
MARKER_BEGIN(__func__, "HIP") \
|
||||
} else if (HIP_PROFILE_API == 0x2) { \
|
||||
@@ -288,10 +306,13 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
|
||||
|
||||
#else
|
||||
// Swallow API_TRACE
|
||||
#define API_TRACE(IS_CMD, ...) tls_tidInfo.incApiSeqNum();
|
||||
#define API_TRACE(IS_CMD, ...) GET_TLS(); tls->tidInfo.incApiSeqNum();
|
||||
#endif
|
||||
|
||||
#define HIP_SET_DEVICE() ihipDeviceSetState();
|
||||
#define ihipGetTlsDefaultCtx() iihipGetTlsDefaultCtx(tls)
|
||||
#define ihipSetTlsDefaultCtx(ctx) tls->defaultCtx = ctx
|
||||
|
||||
#define HIP_SET_DEVICE() ihipDeviceSetState(tls);
|
||||
|
||||
// This macro should be called at the beginning of every HIP API.
|
||||
// It initializes the hip runtime (exactly once), and
|
||||
@@ -314,22 +335,22 @@ extern uint64_t recordApiTrace(std::string* fullStr, const std::string& apiStr);
|
||||
// This macro should be called at the end of every HIP API, and only at the end of top-level hip
|
||||
// APIS (not internal hip) It has dual function: logs the last error returned for use by
|
||||
// hipGetLastError, and also prints the closing message when the debug trace is enabled.
|
||||
#define ihipLogStatus(hipStatus) \
|
||||
({ \
|
||||
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
|
||||
tls_lastHipError = localHipStatus; \
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1 << TRACE_ALL)) { \
|
||||
auto ticks = getTicks() - hipApiStartTick; \
|
||||
fprintf(stderr, " %ship-api pid:%d tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \
|
||||
(localHipStatus == 0) ? API_COLOR : KRED, tls_tidInfo.pid(), tls_tidInfo.tid(), \
|
||||
tls_tidInfo.apiSeqNum(), __func__, localHipStatus, \
|
||||
ihipErrorString(localHipStatus), ticks, API_COLOR_END); \
|
||||
} \
|
||||
if (HIP_PROFILE_API) { \
|
||||
MARKER_END(); \
|
||||
} \
|
||||
localHipStatus; \
|
||||
#define ihipLogStatus(hipStatus) \
|
||||
({ \
|
||||
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
|
||||
tls->lastHipError = localHipStatus; \
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1 << TRACE_ALL)) { \
|
||||
auto ticks = getTicks() - hipApiStartTick; \
|
||||
fprintf(stderr, " %ship-api pid:%d tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \
|
||||
(localHipStatus == 0) ? API_COLOR : KRED, tls->tidInfo.pid(), tls->tidInfo.tid(), \
|
||||
tls->tidInfo.apiSeqNum(), __func__, localHipStatus, \
|
||||
ihipErrorString(localHipStatus), ticks, API_COLOR_END); \
|
||||
} \
|
||||
if (HIP_PROFILE_API) { \
|
||||
MARKER_END(); \
|
||||
} \
|
||||
localHipStatus; \
|
||||
})
|
||||
|
||||
|
||||
@@ -936,11 +957,9 @@ extern hsa_agent_t* g_allAgents; // CPU agents + all the visible GPU agents.
|
||||
// Extern functions:
|
||||
extern void ihipInit();
|
||||
extern const char* ihipErrorString(hipError_t);
|
||||
extern ihipCtx_t* ihipGetTlsDefaultCtx();
|
||||
extern void ihipSetTlsDefaultCtx(ihipCtx_t* ctx);
|
||||
extern hipError_t ihipSynchronize(void);
|
||||
extern hipError_t ihipSynchronize(TlsData *tls);
|
||||
extern void ihipCtxStackUpdate();
|
||||
extern hipError_t ihipDeviceSetState();
|
||||
extern hipError_t ihipDeviceSetState(TlsData *tls);
|
||||
|
||||
extern ihipDevice_t* ihipGetDevice(int);
|
||||
ihipCtx_t* ihipGetPrimaryCtx(unsigned deviceIndex);
|
||||
@@ -949,7 +968,7 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod,
|
||||
|
||||
|
||||
hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0);
|
||||
hipError_t ihipStreamSynchronize(hipStream_t stream);
|
||||
hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream);
|
||||
void ihipStreamCallbackHandler(ihipStreamCallback_t* cb);
|
||||
|
||||
// Stream printf functions:
|
||||
@@ -1002,5 +1021,18 @@ hipError_t memcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKi
|
||||
hipStream_t stream);
|
||||
};
|
||||
|
||||
//---
|
||||
// TODO - review the context creation strategy here. Really should be:
|
||||
// - first "non-device" runtime call creates the context for this thread. Allowed to call
|
||||
// setDevice first.
|
||||
// - hipDeviceReset destroys the primary context for device?
|
||||
// - Then context is created again for next usage.
|
||||
static inline ihipCtx_t* iihipGetTlsDefaultCtx(TlsData* tls) {
|
||||
// Per-thread initialization of the TLS:
|
||||
if ((tls->defaultCtx == nullptr) && (g_deviceCnt > 0)) {
|
||||
tls->defaultCtx = ihipGetPrimaryCtx(0);
|
||||
}
|
||||
return tls->defaultCtx;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -300,7 +300,7 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
if (HIP_SYNC_HOST_ALLOC) {
|
||||
@@ -372,7 +372,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags);
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hip_status = ihipHostMalloc(ptr, sizeBytes, flags);
|
||||
hip_status = ihipHostMalloc(tls, ptr, sizeBytes, flags);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -383,7 +383,7 @@ hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) {
|
||||
if(flags != hipMemAttachGlobal)
|
||||
hip_status = hipErrorInvalidValue;
|
||||
else
|
||||
hip_status = ihipHostMalloc(devPtr, size, hipHostMallocDefault);
|
||||
hip_status = ihipHostMalloc(tls, devPtr, size, hipHostMallocDefault);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -397,7 +397,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) {
|
||||
};
|
||||
|
||||
// width in bytes
|
||||
hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) {
|
||||
hipError_t ihipMallocPitch(TlsData* tls, void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
if(ptr==NULL || pitch == NULL)
|
||||
{
|
||||
@@ -460,7 +460,7 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
|
||||
|
||||
if (width == 0 || height == 0) return ihipLogStatus(hipErrorUnknown);
|
||||
|
||||
hip_status = ihipMallocPitch(ptr, pitch, width, height, 0);
|
||||
hip_status = ihipMallocPitch(tls, ptr, pitch, width, height, 0);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -475,7 +475,7 @@ hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
|
||||
size_t pitch;
|
||||
|
||||
hip_status =
|
||||
ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth);
|
||||
ihipMallocPitch(tls, &pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth);
|
||||
if (hip_status == hipSuccess) {
|
||||
pitchedDevPtr->pitch = pitch;
|
||||
pitchedDevPtr->xsize = extent.width;
|
||||
|
||||
@@ -137,7 +137,7 @@ hipError_t hipModuleUnload(hipModule_t hmod) {
|
||||
// TODO - improve this synchronization so it is thread-safe.
|
||||
// Currently we want for all inflight activity to complete, but don't prevent another
|
||||
// thread from launching new kernels before we finish this operation.
|
||||
ihipSynchronize();
|
||||
ihipSynchronize(tls);
|
||||
|
||||
delete hmod; // The ihipModule_t dtor will clean everything up.
|
||||
hmod = nullptr;
|
||||
@@ -145,7 +145,7 @@ hipError_t hipModuleUnload(hipModule_t hmod) {
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
|
||||
uint32_t localWorkSizeZ, size_t sharedMemBytes,
|
||||
@@ -285,7 +285,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gr
|
||||
void** kernelParams, void** extra) {
|
||||
HIP_INIT_API(hipModuleLaunchKernel, f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes,
|
||||
hStream, kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(tls,
|
||||
f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY,
|
||||
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0));
|
||||
}
|
||||
@@ -298,7 +298,7 @@ hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
|
||||
HIP_INIT_API(hipExtModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
|
||||
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(tls,
|
||||
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
||||
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
|
||||
}
|
||||
@@ -311,14 +311,14 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent) {
|
||||
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
|
||||
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(tls,
|
||||
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
|
||||
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0));
|
||||
}
|
||||
|
||||
hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags) {
|
||||
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList,numDevices,flags);
|
||||
HIP_INIT_API(hipExtLaunchMultiKernelMultiDevice, launchParamsList, numDevices, flags);
|
||||
hipError_t result;
|
||||
|
||||
if ((numDevices > g_deviceCnt) || (launchParamsList == nullptr)) {
|
||||
@@ -361,7 +361,7 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
const hipLaunchParams& lp = launchParamsList[i];
|
||||
|
||||
result = ihipModuleLaunchKernel(kds[i],
|
||||
result = ihipModuleLaunchKernel(tls, kds[i],
|
||||
lp.gridDim.x * lp.blockDim.x,
|
||||
lp.gridDim.y * lp.blockDim.y,
|
||||
lp.gridDim.z * lp.blockDim.z,
|
||||
@@ -395,6 +395,7 @@ namespace hip_impl {
|
||||
}
|
||||
|
||||
hsa_agent_t this_agent() {
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (!ctx) throw runtime_error{"No active HIP context."};
|
||||
@@ -574,6 +575,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes,
|
||||
|
||||
namespace {
|
||||
inline void track(const hip_impl::Agent_global& x, hsa_agent_t agent) {
|
||||
GET_TLS();
|
||||
tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name,
|
||||
x.address, x.byte_cnt);
|
||||
|
||||
@@ -693,7 +695,7 @@ namespace hip_impl {
|
||||
}
|
||||
} // Namespace hip_impl.
|
||||
|
||||
hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const char* name,
|
||||
hipError_t ihipModuleGetFunction(TlsData *tls, hipFunction_t* func, hipModule_t hmod, const char* name,
|
||||
hsa_agent_t *agent = nullptr) {
|
||||
using namespace hip_impl;
|
||||
|
||||
@@ -728,14 +730,14 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch
|
||||
// Get kernel for the current hsa agent.
|
||||
hipError_t hipModuleGetFunction(hipFunction_t* hfunc, hipModule_t hmod, const char* name) {
|
||||
HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name);
|
||||
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name));
|
||||
return ihipLogStatus(ihipModuleGetFunction(tls, hfunc, hmod, name));
|
||||
}
|
||||
|
||||
// Get kernel for the given hsa agent. Internal use only.
|
||||
hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod,
|
||||
const char* name, hsa_agent_t *agent) {
|
||||
HIP_INIT_API(hipModuleGetFunctionEx, hfunc, hmod, name, agent);
|
||||
return ihipLogStatus(ihipModuleGetFunction(hfunc, hmod, name, agent));
|
||||
return ihipLogStatus(ihipModuleGetFunction(tls, hfunc, hmod, name, agent));
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -743,7 +745,7 @@ const amd_kernel_code_v3_t *header_v3(const ihipModuleSymbol_t& kd) {
|
||||
return reinterpret_cast<const amd_kernel_code_v3_t*>(kd._header);
|
||||
}
|
||||
|
||||
hipFuncAttributes make_function_attributes(const ihipModuleSymbol_t& kd) {
|
||||
hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_t& kd) {
|
||||
hipFuncAttributes r{};
|
||||
|
||||
hipDeviceProp_t prop{};
|
||||
@@ -796,12 +798,12 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
|
||||
|
||||
if (!kd->_header) throw runtime_error{"Ill-formed Kernel_descriptor."};
|
||||
|
||||
*attr = make_function_attributes(*kd);
|
||||
*attr = make_function_attributes(tls, *kd);
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
|
||||
hipError_t ihipModuleLoadData(TlsData *tls, hipModule_t* module, const void* image) {
|
||||
using namespace hip_impl;
|
||||
|
||||
if (!module) return hipErrorInvalidValue;
|
||||
@@ -839,7 +841,7 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t* module, const void* image) {
|
||||
HIP_INIT_API(hipModuleLoadData, module, image);
|
||||
return ihipLogStatus(ihipModuleLoadData(module,image));
|
||||
return ihipLogStatus(ihipModuleLoadData(tls,module,image));
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
|
||||
@@ -853,13 +855,13 @@ hipError_t hipModuleLoad(hipModule_t* module, const char* fname) {
|
||||
|
||||
vector<char> tmp{istreambuf_iterator<char>{file}, istreambuf_iterator<char>{}};
|
||||
|
||||
return ihipLogStatus(ihipModuleLoadData(module, tmp.data()));
|
||||
return ihipLogStatus(ihipModuleLoadData(tls, module, tmp.data()));
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, unsigned int numOptions,
|
||||
hipJitOption* options, void** optionValues) {
|
||||
HIP_INIT_API(hipModuleLoadDataEx, module, image, numOptions, options, optionValues);
|
||||
return ihipLogStatus(ihipModuleLoadData(module, image));
|
||||
return ihipLogStatus(ihipModuleLoadData(tls, module, image));
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name) {
|
||||
@@ -903,7 +905,7 @@ void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t ihipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize,
|
||||
hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, uint32_t* blockSize,
|
||||
hipFunction_t f, size_t dynSharedMemPerBlk,
|
||||
uint32_t blockSizeLimit)
|
||||
{
|
||||
@@ -1021,12 +1023,12 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block
|
||||
{
|
||||
HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit);
|
||||
|
||||
return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(
|
||||
return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(tls,
|
||||
gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit));
|
||||
}
|
||||
|
||||
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk)
|
||||
TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk)
|
||||
{
|
||||
using namespace hip_impl;
|
||||
|
||||
@@ -1083,7 +1085,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk);
|
||||
|
||||
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
@@ -1093,5 +1095,5 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags);
|
||||
|
||||
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
@@ -84,7 +84,7 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t
|
||||
//---
|
||||
// Disable visibility of this device into memory allocated on peer device.
|
||||
// Remove this device from peer device peerlist.
|
||||
hipError_t ihipDisablePeerAccess(hipCtx_t peerCtx) {
|
||||
hipError_t ihipDisablePeerAccess(TlsData* tls, hipCtx_t peerCtx) {
|
||||
hipError_t err = hipSuccess;
|
||||
|
||||
auto thisCtx = ihipGetTlsDefaultCtx();
|
||||
@@ -119,7 +119,7 @@ hipError_t ihipDisablePeerAccess(hipCtx_t peerCtx) {
|
||||
//---
|
||||
// Allow the current device to see all memory allocated on peerCtx.
|
||||
// This should add this device to the peer-device peer list.
|
||||
hipError_t ihipEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
|
||||
hipError_t ihipEnablePeerAccess(TlsData* tls, hipCtx_t peerCtx, unsigned int flags) {
|
||||
hipError_t err = hipSuccess;
|
||||
if (flags != 0) {
|
||||
err = hipErrorInvalidValue;
|
||||
@@ -186,14 +186,14 @@ hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDevi
|
||||
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) {
|
||||
HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId);
|
||||
|
||||
return ihipLogStatus(ihipDisablePeerAccess(ihipGetPrimaryCtx(peerDeviceId)));
|
||||
return ihipLogStatus(ihipDisablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId)));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) {
|
||||
HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags);
|
||||
|
||||
return ihipLogStatus(ihipEnablePeerAccess(ihipGetPrimaryCtx(peerDeviceId), flags));
|
||||
return ihipLogStatus(ihipEnablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId), flags));
|
||||
}
|
||||
|
||||
|
||||
@@ -214,11 +214,11 @@ hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int src
|
||||
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
|
||||
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
|
||||
|
||||
return ihipLogStatus(ihipEnablePeerAccess(peerCtx, flags));
|
||||
return ihipLogStatus(ihipEnablePeerAccess(tls, peerCtx, flags));
|
||||
}
|
||||
|
||||
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
|
||||
HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx);
|
||||
|
||||
return ihipLogStatus(ihipDisablePeerAccess(peerCtx));
|
||||
return ihipLogStatus(ihipDisablePeerAccess(tls, peerCtx));
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ enum queue_priority
|
||||
#endif
|
||||
|
||||
//---
|
||||
hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priority) {
|
||||
hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flags, int priority) {
|
||||
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
@@ -97,7 +97,7 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit
|
||||
hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
|
||||
HIP_INIT_API(hipStreamCreateWithFlags, stream, flags);
|
||||
if(flags == hipStreamDefault || flags == hipStreamNonBlocking)
|
||||
return ihipLogStatus(ihipStreamCreate(stream, flags, priority_normal));
|
||||
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority_normal));
|
||||
else
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -106,7 +106,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
|
||||
hipError_t hipStreamCreate(hipStream_t* stream) {
|
||||
HIP_INIT_API(hipStreamCreate, stream);
|
||||
|
||||
return ihipLogStatus(ihipStreamCreate(stream, hipStreamDefault, priority_normal));
|
||||
return ihipLogStatus(ihipStreamCreate(tls, stream, hipStreamDefault, priority_normal));
|
||||
}
|
||||
|
||||
//---
|
||||
@@ -115,7 +115,7 @@ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags,
|
||||
|
||||
// clamp priority to range [priority_high:priority_low]
|
||||
priority = (priority < priority_high ? priority_high : (priority > priority_low ? priority_low : priority));
|
||||
return ihipLogStatus(ihipStreamCreate(stream, flags, priority));
|
||||
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority));
|
||||
}
|
||||
|
||||
//---
|
||||
@@ -183,7 +183,7 @@ hipError_t hipStreamQuery(hipStream_t stream) {
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API(hipStreamSynchronize, TRACE_SYNC, stream);
|
||||
|
||||
return ihipLogStatus(ihipStreamSynchronize(stream));
|
||||
return ihipLogStatus(ihipStreamSynchronize(tls, stream));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -381,9 +381,10 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
|
||||
}
|
||||
|
||||
// Texture Reference APIs
|
||||
hipError_t ihipBindTextureImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t size, textureReference* tex) {
|
||||
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
enum hipTextureAddressMode addressMode = tex->addressMode[0];
|
||||
enum hipTextureFilterMode filterMode = tex->filterMode;
|
||||
@@ -447,12 +448,12 @@ hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* dev
|
||||
HIP_INIT_API(hipBindTexture, offset, tex, devPtr, desc, size);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
hip_status = ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType, offset, devPtr, desc,
|
||||
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, desc,
|
||||
size, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTexture2DImpl(int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, textureReference* tex) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
@@ -519,15 +520,16 @@ hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* d
|
||||
size_t pitch) {
|
||||
HIP_INIT_API(hipBindTexture2D, offset, tex, devPtr, desc, width, height, pitch);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hip_status = ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType, offset, devPtr,
|
||||
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, offset, devPtr,
|
||||
desc, width, height, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTextureToArrayImpl(int dim, enum hipTextureReadMode readMode,
|
||||
hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
textureReference* tex) {
|
||||
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
enum hipTextureAddressMode addressMode = tex->addressMode[0];
|
||||
enum hipTextureFilterMode filterMode = tex->filterMode;
|
||||
@@ -617,7 +619,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
hip_status =
|
||||
ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array, *desc, tex);
|
||||
ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array, *desc, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -629,7 +631,7 @@ hipError_t hipBindTextureToMipmappedArray(textureReference* tex,
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
|
||||
hipError_t ihipUnbindTextureImpl(TlsData *tls, const hipTextureObject_t& textureObject) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
@@ -654,7 +656,7 @@ hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
|
||||
hipError_t hipUnbindTexture(const textureReference* tex) {
|
||||
HIP_INIT_API(hipUnbindTexture, tex);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hip_status = ihipUnbindTextureImpl(tex->textureObject);
|
||||
hip_status = ihipUnbindTextureImpl(tls, tex->textureObject);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -726,7 +728,7 @@ hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsi
|
||||
HIP_INIT_API(hipTexRefSetArray, tex, array, flags);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
hip_status = ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array,
|
||||
hip_status = ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array,
|
||||
array->desc, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
@@ -737,7 +739,7 @@ hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDevicep
|
||||
HIP_INIT_API(hipTexRefSetAddress, offset, tex, devPtr, size);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
hip_status = ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType, offset, devPtr, NULL,
|
||||
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, NULL,
|
||||
size, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
@@ -748,7 +750,7 @@ hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPT
|
||||
size_t offset;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
hip_status = ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType, &offset, devPtr,
|
||||
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, &offset, devPtr,
|
||||
NULL, desc->Width, desc->Height, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -71,7 +71,8 @@ namespace hip_impl
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
std::string fullStr;
|
||||
recordApiTrace(&fullStr, os.str());
|
||||
GET_TLS();
|
||||
recordApiTrace(tls, &fullStr, os.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user